diff --git a/.arclint b/.arclint index 01de1ee897ada..246ff9f39d475 100644 --- a/.arclint +++ b/.arclint @@ -2,7 +2,7 @@ "linters": { "clang-format": { "type": "script-and-regex", - "script-and-regex.script": "utils/arcanist/clang-format.sh", + "script-and-regex.script": "bash utils/arcanist/clang-format.sh", "script-and-regex.regex": "/^(?P[[:alpha:]]+)\n(?P[^\n]+)\n(====|(?P\\d),(?P\\d)\n(?P.*)>>>>\n(?P.*)<<<<\n)$/s", "include": [ "(\\.(cc|cpp|h)$)" diff --git a/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.cpp index b2f5142651773..acc258a5bc564 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.cpp @@ -76,7 +76,7 @@ static std::string getExprAsString(const clang::Expr &E, Text.erase( llvm::remove_if( Text, - [](char C) { return std::isspace(static_cast(C)); }), + [](char C) { return llvm::isSpace(static_cast(C)); }), Text.end()); return Text; } diff --git a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp index 732ccbc9dd2ae..66f00e35c7e7c 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp @@ -102,11 +102,31 @@ void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) { .bind("comparison"); Finder->addMatcher(CompareOperator, this); + + // Catch array subscripts with signed char -> integer conversion. + // Matcher for C arrays. + const auto CArraySubscript = + arraySubscriptExpr(hasIndex(SignedCharCastExpr)).bind("arraySubscript"); + + Finder->addMatcher(CArraySubscript, this); + + // Matcher for std arrays. + const auto STDArraySubscript = + cxxOperatorCallExpr( + hasOverloadedOperatorName("[]"), + hasArgument(0, hasType(cxxRecordDecl(hasName("::std::array")))), + hasArgument(1, SignedCharCastExpr)) + .bind("arraySubscript"); + + Finder->addMatcher(STDArraySubscript, this); } void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) { const auto *SignedCastExpression = Result.Nodes.getNodeAs("signedCastExpression"); + const auto *IntegerType = Result.Nodes.getNodeAs("integerType"); + assert(SignedCastExpression); + assert(IntegerType); // Ignore the match if we know that the signed char's value is not negative. // The potential misinterpretation happens for negative values only. @@ -135,14 +155,17 @@ void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) { diag(Comparison->getBeginLoc(), "comparison between 'signed char' and 'unsigned char'"); - } else if (const auto *IntegerType = - Result.Nodes.getNodeAs("integerType")) { + } else if (Result.Nodes.getNodeAs("arraySubscript")) { + diag(SignedCastExpression->getBeginLoc(), + "'signed char' to %0 conversion in array subscript; " + "consider casting to 'unsigned char' first.") + << *IntegerType; + } else { diag(SignedCastExpression->getBeginLoc(), "'signed char' to %0 conversion; " "consider casting to 'unsigned char' first.") << *IntegerType; - } else - llvm_unreachable("Unexpected match"); + } } } // namespace bugprone diff --git a/clang-tools-extra/clang-tidy/llvmlibc/CMakeLists.txt b/clang-tools-extra/clang-tidy/llvmlibc/CMakeLists.txt index d7965e126b722..9d4edb2abf1a2 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/llvmlibc/CMakeLists.txt @@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS ) add_clang_library(clangTidyLLVMLibcModule + CalleeNamespaceCheck.cpp ImplementationInNamespaceCheck.cpp LLVMLibcTidyModule.cpp RestrictSystemLibcHeadersCheck.cpp diff --git a/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.cpp new file mode 100644 index 0000000000000..fbc6762a44e78 --- /dev/null +++ b/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.cpp @@ -0,0 +1,56 @@ +//===-- CalleeNamespaceCheck.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CalleeNamespaceCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace llvm_libc { + +// Gets the outermost namespace of a DeclContext, right under the Translation +// Unit. +const DeclContext *getOutermostNamespace(const DeclContext *Decl) { + const DeclContext *Parent = Decl->getParent(); + if (Parent && Parent->isTranslationUnit()) + return Decl; + return getOutermostNamespace(Parent); +} + +void CalleeNamespaceCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher( + declRefExpr(to(functionDecl().bind("func"))).bind("use-site"), this); +} + +void CalleeNamespaceCheck::check(const MatchFinder::MatchResult &Result) { + const auto *UsageSiteExpr = Result.Nodes.getNodeAs("use-site"); + const auto *FuncDecl = Result.Nodes.getNodeAs("func"); + + // Ignore compiler builtin functions. + if (FuncDecl->getBuiltinID() != 0) + return; + + // If the outermost namespace of the function is __llvm_libc, we're good. + const auto *NS = dyn_cast(getOutermostNamespace(FuncDecl)); + if (NS && NS->getName() == "__llvm_libc") + return; + + diag(UsageSiteExpr->getBeginLoc(), "%0 must resolve to a function declared " + "within the '__llvm_libc' namespace") + << FuncDecl; + + diag(FuncDecl->getLocation(), "resolves to this declaration", + clang::DiagnosticIDs::Note); +} + +} // namespace llvm_libc +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.h b/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.h new file mode 100644 index 0000000000000..b35c6011f0889 --- /dev/null +++ b/clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.h @@ -0,0 +1,38 @@ +//===-- CalleeNamespaceCheck.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_LLVMLIBC_CALLEENAMESPACECHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_LLVMLIBC_CALLEENAMESPACECHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace llvm_libc { + +/// Checks all calls resolve to functions within __llvm_libc namespace. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/llvmlibc-callee-namespace.html +class CalleeNamespaceCheck : public ClangTidyCheck { +public: + CalleeNamespaceCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus; + } + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; +}; + +} // namespace llvm_libc +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_LLVMLIBC_CALLEENAMESPACECHECK_H diff --git a/clang-tools-extra/clang-tidy/llvmlibc/LLVMLibcTidyModule.cpp b/clang-tools-extra/clang-tidy/llvmlibc/LLVMLibcTidyModule.cpp index 8c5a7ac201616..61cfe29536646 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/LLVMLibcTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/llvmlibc/LLVMLibcTidyModule.cpp @@ -9,6 +9,7 @@ #include "../ClangTidy.h" #include "../ClangTidyModule.h" #include "../ClangTidyModuleRegistry.h" +#include "CalleeNamespaceCheck.h" #include "ImplementationInNamespaceCheck.h" #include "RestrictSystemLibcHeadersCheck.h" @@ -19,6 +20,8 @@ namespace llvm_libc { class LLVMLibcModule : public ClangTidyModule { public: void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { + CheckFactories.registerCheck( + "llvmlibc-callee-namespace"); CheckFactories.registerCheck( "llvmlibc-implementation-in-namespace"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp index 87be93252227d..e3953c5d84041 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp @@ -12,6 +12,7 @@ #include "clang/AST/RecursiveASTVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Tooling/FixIt.h" +#include "llvm/ADT/StringExtras.h" #include @@ -356,10 +357,10 @@ bool UseTrailingReturnTypeCheck::keepSpecifiers( unsigned int TOffsetInRT = TOffset - ReturnTypeBeginOffset - DeletedChars; unsigned int TLengthWithWS = CT.T.getLength(); while (TOffsetInRT + TLengthWithWS < ReturnType.size() && - std::isspace(ReturnType[TOffsetInRT + TLengthWithWS])) + llvm::isSpace(ReturnType[TOffsetInRT + TLengthWithWS])) TLengthWithWS++; std::string Specifier = ReturnType.substr(TOffsetInRT, TLengthWithWS); - if (!std::isspace(Specifier.back())) + if (!llvm::isSpace(Specifier.back())) Specifier.push_back(' '); Auto.insert(Auto.size() - InitialAutoLength, Specifier); ReturnType.erase(TOffsetInRT, TLengthWithWS); @@ -459,7 +460,7 @@ void UseTrailingReturnTypeCheck::check(const MatchFinder::MatchResult &Result) { ReturnTypeEnd.getLocWithOffset(1)), SM, LangOpts); bool NeedSpaceAfterAuto = - CharAfterReturnType.empty() || !std::isspace(CharAfterReturnType[0]); + CharAfterReturnType.empty() || !llvm::isSpace(CharAfterReturnType[0]); std::string Auto = NeedSpaceAfterAuto ? "auto " : "auto"; std::string ReturnType = diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp index 821c6290fe01b..3f7edd659c63a 100644 --- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp @@ -30,7 +30,10 @@ TransformerClangTidyCheck::TransformerClangTidyCheck( const OptionsView &)> MakeRule, StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context), Rule(MakeRule(getLangOpts(), Options)) { + : ClangTidyCheck(Name, Context), Rule(MakeRule(getLangOpts(), Options)), + IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", + IncludeSorter::getMapping(), + IncludeSorter::IS_LLVM)) { if (Rule) assert(llvm::all_of(Rule->Cases, hasExplanation) && "clang-tidy checks must have an explanation by default;" @@ -40,7 +43,10 @@ TransformerClangTidyCheck::TransformerClangTidyCheck( TransformerClangTidyCheck::TransformerClangTidyCheck(RewriteRule R, StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context), Rule(std::move(R)) { + : ClangTidyCheck(Name, Context), Rule(std::move(R)), + IncludeStyle(Options.getLocalOrGlobal("IncludeStyle", + IncludeSorter::getMapping(), + IncludeSorter::IS_LLVM)) { assert(llvm::all_of(Rule->Cases, hasExplanation) && "clang-tidy checks must have an explanation by default;" " explicitly provide an empty explanation if none is desired"); @@ -53,8 +59,8 @@ void TransformerClangTidyCheck::registerPPCallbacks( if (Rule && llvm::any_of(Rule->Cases, [](const RewriteRule::Case &C) { return !C.AddedIncludes.empty(); })) { - Inserter = std::make_unique( - SM, getLangOpts(), utils::IncludeSorter::IS_LLVM); + Inserter = + std::make_unique(SM, getLangOpts(), IncludeStyle); PP->addPPCallbacks(Inserter->CreatePPCallbacks()); } } diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h index e411a3f6f0f52..796222373eb53 100644 --- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h @@ -10,7 +10,8 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_TRANSFORMER_CLANG_TIDY_CHECK_H #include "../ClangTidy.h" -#include "../utils/IncludeInserter.h" +#include "IncludeInserter.h" +#include "IncludeSorter.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Tooling/Transformer/Transformer.h" @@ -31,6 +32,13 @@ namespace utils { // MyCheck(StringRef Name, ClangTidyContext *Context) // : TransformerClangTidyCheck(MyCheckAsRewriteRule, Name, Context) {} // }; +// +// `TransformerClangTidyCheck` recognizes this clang-tidy option: +// +// * IncludeStyle. A string specifying which file naming convention is used by +// the source code, 'llvm' or 'google'. Default is 'llvm'. The naming +// convention influences how canonical headers are distinguished from other +// includes. class TransformerClangTidyCheck : public ClangTidyCheck { public: // \p MakeRule generates the rewrite rule to be used by the check, based on @@ -61,7 +69,8 @@ class TransformerClangTidyCheck : public ClangTidyCheck { private: Optional Rule; - std::unique_ptr Inserter; + const IncludeSorter::IncludeStyle IncludeStyle; + std::unique_ptr Inserter; }; } // namespace utils diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 124f087589d69..dd3e027135590 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(support) + # Configure the Features.inc file. if (NOT DEFINED CLANGD_BUILD_XPC) if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") @@ -25,19 +27,8 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP ) -if(CLANG_BUILT_STANDALONE) - # needed to get HAVE_CXX_ATOMICS64_WITHOUT_LIB defined - include(CheckAtomic) -endif() - -set(CLANGD_ATOMIC_LIB "") -if(NOT HAVE_CXX_ATOMICS_WITHOUT_LIB OR NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB) - list(APPEND CLANGD_ATOMIC_LIB "atomic") -endif() - add_clang_library(clangDaemon AST.cpp - Cancellation.cpp ClangdLSPServer.cpp ClangdServer.cpp CodeComplete.cpp @@ -45,7 +36,6 @@ add_clang_library(clangDaemon CollectMacros.cpp CompileCommands.cpp Compiler.cpp - Context.cpp Diagnostics.cpp DraftStore.cpp ExpectedTypes.cpp @@ -54,8 +44,6 @@ add_clang_library(clangDaemon FileDistance.cpp Format.cpp FS.cpp - FSProvider.cpp - FormattedString.cpp FuzzyMatch.cpp GlobalCompilationDatabase.cpp Headers.cpp @@ -63,7 +51,6 @@ add_clang_library(clangDaemon Hover.cpp IncludeFixer.cpp JSONTransport.cpp - Logger.cpp PathMapping.cpp Protocol.cpp Quality.cpp @@ -73,11 +60,8 @@ add_clang_library(clangDaemon Selection.cpp SemanticHighlighting.cpp SemanticSelection.cpp - Shutdown.cpp SourceCode.cpp QueryDriverDatabase.cpp - Threading.cpp - Trace.cpp TUScheduler.cpp URI.cpp XRefs.cpp @@ -128,8 +112,8 @@ add_clang_library(clangDaemon clangToolingInclusions clangToolingRefactoring clangToolingSyntax + clangdSupport ${LLVM_PTHREAD_LIB} - ${CLANGD_ATOMIC_LIB} ${ALL_CLANG_TIDY_CHECKS} ) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index f58bfaf996481..a89fa2f8104e3 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -7,18 +7,17 @@ //===----------------------------------------------------------------------===// #include "ClangdLSPServer.h" -#include "Context.h" #include "Diagnostics.h" #include "DraftStore.h" -#include "FormattedString.h" #include "GlobalCompilationDatabase.h" #include "Protocol.h" #include "SemanticHighlighting.h" #include "SourceCode.h" #include "TUScheduler.h" -#include "Trace.h" #include "URI.h" #include "refactor/Tweak.h" +#include "support/Context.h" +#include "support/Trace.h" #include "clang/Basic/Version.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/ArrayRef.h" @@ -43,6 +42,11 @@ namespace clang { namespace clangd { namespace { +// Tracks end-to-end latency of high level lsp calls. Measurements are in +// seconds. +constexpr trace::Metric LSPLatency("lsp_latency", trace::Metric::Distribution, + "method_name"); + // LSP defines file versions as numbers that increase. // ClangdServer treats them as opaque and therefore uses strings instead. std::string encodeVersion(int64_t LSPVersion) { @@ -185,7 +189,7 @@ class ClangdLSPServer::MessageHandler : public Transport::MessageHandler { WithContext HandlerContext(handlerContext()); // Calls can be canceled by the client. Add cancellation context. WithContext WithCancel(cancelableRequestContext(ID)); - trace::Span Tracer(Method); + trace::Span Tracer(Method, LSPLatency); SPAN_ATTACH(Tracer, "Params", Params); ReplyOnce Reply(ID, Method, &Server, Tracer.Args); log("<-- {0}({1})", Method, ID); @@ -297,7 +301,7 @@ class ClangdLSPServer::MessageHandler : public Transport::MessageHandler { elog("Failed to decode {0} request.", Method); return; } - trace::Span Tracer(Method); + trace::Span Tracer(Method, LSPLatency); SPAN_ATTACH(Tracer, "Params", RawParams); (Server.*Handler)(P); }; @@ -528,6 +532,8 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, CCOpts.IncludeFixIts = Params.capabilities.CompletionFixes; if (!CCOpts.BundleOverloads.hasValue()) CCOpts.BundleOverloads = Params.capabilities.HasSignatureHelp; + CCOpts.DocumentationFormat = + Params.capabilities.CompletionDocumentationFormat; DiagOpts.EmbedFixesInDiagnostics = Params.capabilities.DiagnosticFixes; DiagOpts.SendDiagnosticCategory = Params.capabilities.DiagnosticCategory; DiagOpts.EmitRelatedLocations = @@ -1288,7 +1294,7 @@ void ClangdLSPServer::onSemanticTokens(const SemanticTokensParams &Params, Result.tokens = toSemanticTokens(*HT); { std::lock_guard Lock(SemanticTokensMutex); - auto& Last = LastSemanticTokens[File]; + auto &Last = LastSemanticTokens[File]; Last.tokens = Result.tokens; increment(Last.resultId); @@ -1313,7 +1319,7 @@ void ClangdLSPServer::onSemanticTokensEdits( SemanticTokensOrEdits Result; { std::lock_guard Lock(SemanticTokensMutex); - auto& Last = LastSemanticTokens[File]; + auto &Last = LastSemanticTokens[File]; if (PrevResultID == Last.resultId) { Result.edits = diffTokens(Last.tokens, Toks); diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index 9c35ca6bda3a6..c3da2f9a9457f 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -10,14 +10,14 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_CLANGDLSPSERVER_H #include "ClangdServer.h" -#include "Context.h" #include "DraftStore.h" #include "Features.inc" #include "FindSymbols.h" #include "GlobalCompilationDatabase.h" -#include "Path.h" #include "Protocol.h" #include "Transport.h" +#include "support/Context.h" +#include "support/Path.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringSet.h" diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index c5148f81f3dfe..9231296e729d0 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -10,10 +10,8 @@ #include "CodeComplete.h" #include "FindSymbols.h" #include "Format.h" -#include "FormattedString.h" #include "HeaderSourceSwitch.h" #include "Headers.h" -#include "Logger.h" #include "ParsedAST.h" #include "Preamble.h" #include "Protocol.h" @@ -21,13 +19,15 @@ #include "SemanticSelection.h" #include "SourceCode.h" #include "TUScheduler.h" -#include "Trace.h" #include "XRefs.h" #include "index/CanonicalIncludes.h" #include "index/FileIndex.h" #include "index/Merge.h" #include "refactor/Rename.h" #include "refactor/Tweak.h" +#include "support/Logger.h" +#include "support/Markup.h" +#include "support/Trace.h" #include "clang/Format/Format.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" @@ -43,6 +43,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +#include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -368,6 +369,9 @@ void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName, auto Action = [File = File.str(), NewName = NewName.str(), Pos, Opts, CB = std::move(CB), Snapshot = std::move(Snapshot), this](llvm::Expected InpAST) mutable { + // Tracks number of files edited per invocation. + static constexpr trace::Metric RenameFiles("rename_files", + trace::Metric::Distribution); if (!InpAST) return CB(InpAST.takeError()); auto GetDirtyBuffer = @@ -393,6 +397,7 @@ void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName, if (Err) return CB(std::move(Err)); } + RenameFiles.record(Edits->size()); return CB(std::move(*Edits)); }; WorkScheduler.runWithAST("Rename", File, std::move(Action)); @@ -422,6 +427,9 @@ tweakSelection(const Range &Sel, const InputsAndAST &AST) { void ClangdServer::enumerateTweaks(PathRef File, Range Sel, Callback> CB) { + // Tracks number of times a tweak has been offered. + static constexpr trace::Metric TweakAvailable( + "tweak_available", trace::Metric::Counter, "tweak_id"); auto Action = [File = File.str(), Sel, CB = std::move(CB), this](Expected InpAST) mutable { if (!InpAST) @@ -439,6 +447,7 @@ void ClangdServer::enumerateTweaks(PathRef File, Range Sel, for (auto &T : prepareTweaks(*Sel, Filter)) { Res.push_back({T->id(), T->title(), T->intent()}); PreparedTweaks.insert(T->id()); + TweakAvailable.record(1, T->id()); } } @@ -451,6 +460,10 @@ void ClangdServer::enumerateTweaks(PathRef File, Range Sel, void ClangdServer::applyTweak(PathRef File, Range Sel, StringRef TweakID, Callback CB) { + // Tracks number of times a tweak has been applied. + static constexpr trace::Metric TweakAttempt( + "tweak_attempt", trace::Metric::Counter, "tweak_id"); + TweakAttempt.record(1, TweakID); auto Action = [File = File.str(), Sel, TweakID = TweakID.str(), CB = std::move(CB), FS = FSProvider.getFileSystem()](Expected InpAST) mutable { diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index f6e7e0799d83b..76fa64b5a3142 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -10,11 +10,7 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_CLANGDSERVER_H #include "../clang-tidy/ClangTidyOptions.h" -#include "Cancellation.h" #include "CodeComplete.h" -#include "FSProvider.h" -#include "FormattedString.h" -#include "Function.h" #include "GlobalCompilationDatabase.h" #include "Hover.h" #include "Protocol.h" @@ -26,6 +22,9 @@ #include "index/Index.h" #include "refactor/Rename.h" #include "refactor/Tweak.h" +#include "support/Cancellation.h" +#include "support/FSProvider.h" +#include "support/Function.h" #include "clang/Tooling/CompilationDatabase.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/FunctionExtras.h" diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index 3d9bfe509f2b5..4c7f1457518c4 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -26,18 +26,19 @@ #include "FileDistance.h" #include "FuzzyMatch.h" #include "Headers.h" -#include "Logger.h" +#include "Hover.h" #include "Preamble.h" #include "Protocol.h" #include "Quality.h" #include "SourceCode.h" #include "TUScheduler.h" -#include "Threading.h" -#include "Trace.h" #include "URI.h" #include "index/Index.h" #include "index/Symbol.h" #include "index/SymbolOrigin.h" +#include "support/Logger.h" +#include "support/Threading.h" +#include "support/Trace.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/Basic/CharInfo.h" @@ -371,12 +372,19 @@ struct CodeCompletionBuilder { S.SnippetSuffix = std::string(C.IndexResult->CompletionSnippetSuffix); S.ReturnType = std::string(C.IndexResult->ReturnType); } - if (ExtractDocumentation && Completion.Documentation.empty()) { - if (C.IndexResult) - Completion.Documentation = std::string(C.IndexResult->Documentation); - else if (C.SemaResult) - Completion.Documentation = getDocComment(*ASTCtx, *C.SemaResult, - /*CommentsFromHeader=*/false); + if (ExtractDocumentation && !Completion.Documentation) { + auto SetDoc = [&](llvm::StringRef Doc) { + if (!Doc.empty()) { + Completion.Documentation.emplace(); + parseDocumentation(Doc, *Completion.Documentation); + } + }; + if (C.IndexResult) { + SetDoc(C.IndexResult->Documentation); + } else if (C.SemaResult) { + SetDoc(getDocComment(*ASTCtx, *C.SemaResult, + /*CommentsFromHeader=*/false)); + } } } @@ -1816,6 +1824,21 @@ bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) { return false; } +// FIXME: find a home for this (that can depend on both markup and Protocol). +static MarkupContent renderDoc(const markup::Document &Doc, MarkupKind Kind) { + MarkupContent Result; + Result.kind = Kind; + switch (Kind) { + case MarkupKind::PlainText: + Result.value.append(Doc.asPlainText()); + break; + case MarkupKind::Markdown: + Result.value.append(Doc.asMarkdown()); + break; + } + return Result; +} + CompletionItem CodeCompletion::render(const CodeCompleteOptions &Opts) const { CompletionItem LSP; const auto *InsertInclude = Includes.empty() ? nullptr : &Includes[0]; @@ -1830,9 +1853,16 @@ CompletionItem CodeCompletion::render(const CodeCompleteOptions &Opts) const { ? std::string(llvm::formatv("[{0} overloads]", BundleSize)) : ReturnType; LSP.deprecated = Deprecated; - if (InsertInclude) - LSP.detail += "\n" + InsertInclude->Header; - LSP.documentation = Documentation; + // Combine header information and documentation in LSP `documentation` field. + // This is not quite right semantically, but tends to display well in editors. + if (InsertInclude || Documentation) { + markup::Document Doc; + if (InsertInclude) + Doc.addParagraph().appendText("From ").appendCode(InsertInclude->Header); + if (Documentation) + Doc.append(*Documentation); + LSP.documentation = renderDoc(Doc, Opts.DocumentationFormat); + } LSP.sortText = sortText(Score.Total, Name); LSP.filterText = Name; LSP.textEdit = {CompletionTokenRange, RequiredQualifier + Name}; diff --git a/clang-tools-extra/clangd/CodeComplete.h b/clang-tools-extra/clangd/CodeComplete.h index 3adea47c89a11..7070aec79b799 100644 --- a/clang-tools-extra/clangd/CodeComplete.h +++ b/clang-tools-extra/clangd/CodeComplete.h @@ -16,13 +16,14 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_CODECOMPLETE_H #include "Headers.h" -#include "Logger.h" -#include "Path.h" #include "Protocol.h" #include "Quality.h" #include "index/Index.h" #include "index/Symbol.h" #include "index/SymbolOrigin.h" +#include "support/Logger.h" +#include "support/Markup.h" +#include "support/Path.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "clang/Sema/CodeCompleteOptions.h" #include "clang/Tooling/CompilationDatabase.h" @@ -73,6 +74,9 @@ struct CodeCompleteOptions { /// If more results are available, we set CompletionList.isIncomplete. size_t Limit = 0; + /// Whether to present doc comments as plain-text or markdown. + MarkupKind DocumentationFormat = MarkupKind::PlainText; + enum IncludeInsertion { IWYU, NeverInsert, @@ -161,7 +165,8 @@ struct CodeCompletion { std::string SnippetSuffix; // Type to be displayed for this completion. std::string ReturnType; - std::string Documentation; + // The parsed documentation comment. + llvm::Optional Documentation; CompletionItemKind Kind = CompletionItemKind::Missing; // This completion item may represent several symbols that can be inserted in // the same way, such as function overloads. In this case BundleSize > 1, and diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index ae343d5fb3a58..84f72f5f58c72 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "CompileCommands.h" -#include "Logger.h" +#include "support/Logger.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Tooling/ArgumentsAdjusters.h" #include "llvm/Support/FileSystem.h" diff --git a/clang-tools-extra/clangd/Compiler.cpp b/clang-tools-extra/clangd/Compiler.cpp index 47cec5ae12e8d..957d7c382125e 100644 --- a/clang-tools-extra/clangd/Compiler.cpp +++ b/clang-tools-extra/clangd/Compiler.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "Compiler.h" -#include "Logger.h" +#include "support/Logger.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/Serialization/PCHContainerOperations.h" diff --git a/clang-tools-extra/clangd/Diagnostics.cpp b/clang-tools-extra/clangd/Diagnostics.cpp index d72c2bd68ce89..3558ca9297982 100644 --- a/clang-tools-extra/clangd/Diagnostics.cpp +++ b/clang-tools-extra/clangd/Diagnostics.cpp @@ -9,9 +9,9 @@ #include "Diagnostics.h" #include "../clang-tidy/ClangTidyDiagnosticConsumer.h" #include "Compiler.h" -#include "Logger.h" #include "Protocol.h" #include "SourceCode.h" +#include "support/Logger.h" #include "clang/Basic/AllDiagnostics.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticIDs.h" diff --git a/clang-tools-extra/clangd/Diagnostics.h b/clang-tools-extra/clangd/Diagnostics.h index 7461f8529c2f6..ebf86ba8716a1 100644 --- a/clang-tools-extra/clangd/Diagnostics.h +++ b/clang-tools-extra/clangd/Diagnostics.h @@ -9,8 +9,8 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIAGNOSTICS_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIAGNOSTICS_H -#include "Path.h" #include "Protocol.h" +#include "support/Path.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" #include "llvm/ADT/ArrayRef.h" diff --git a/clang-tools-extra/clangd/DraftStore.cpp b/clang-tools-extra/clangd/DraftStore.cpp index 03867dcd286e3..bef48ddfa37d6 100644 --- a/clang-tools-extra/clangd/DraftStore.cpp +++ b/clang-tools-extra/clangd/DraftStore.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "DraftStore.h" -#include "Logger.h" #include "SourceCode.h" +#include "support/Logger.h" #include "llvm/Support/Errc.h" namespace clang { diff --git a/clang-tools-extra/clangd/DraftStore.h b/clang-tools-extra/clangd/DraftStore.h index babc679ed7639..3c2d0c6a4b0fa 100644 --- a/clang-tools-extra/clangd/DraftStore.h +++ b/clang-tools-extra/clangd/DraftStore.h @@ -9,8 +9,8 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DRAFTSTORE_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DRAFTSTORE_H -#include "Path.h" #include "Protocol.h" +#include "support/Path.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/StringMap.h" #include diff --git a/clang-tools-extra/clangd/FS.h b/clang-tools-extra/clangd/FS.h index 7b7f7cb25fac9..5994bdff9271e 100644 --- a/clang-tools-extra/clangd/FS.h +++ b/clang-tools-extra/clangd/FS.h @@ -9,7 +9,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FS_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FS_H -#include "Path.h" +#include "support/Path.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" diff --git a/clang-tools-extra/clangd/Features.inc.in b/clang-tools-extra/clangd/Features.inc.in index 6797232ddac7c..da75aa67a65b8 100644 --- a/clang-tools-extra/clangd/Features.inc.in +++ b/clang-tools-extra/clangd/Features.inc.in @@ -1,2 +1 @@ #define CLANGD_BUILD_XPC @CLANGD_BUILD_XPC@ -#define CLANGD_ENABLE_REMOTE @CLANGD_ENABLE_REMOTE@ diff --git a/clang-tools-extra/clangd/FileDistance.cpp b/clang-tools-extra/clangd/FileDistance.cpp index a9ce3e514bcc1..584c64d077e17 100644 --- a/clang-tools-extra/clangd/FileDistance.cpp +++ b/clang-tools-extra/clangd/FileDistance.cpp @@ -31,7 +31,7 @@ //===-------------------------------------------------------------------------// #include "FileDistance.h" -#include "Logger.h" +#include "support/Logger.h" #include "llvm/ADT/STLExtras.h" #include diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp index 06c124c0b35b1..58e2ee1e21c77 100644 --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -9,11 +9,11 @@ #include "AST.h" #include "FuzzyMatch.h" -#include "Logger.h" #include "ParsedAST.h" #include "Quality.h" #include "SourceCode.h" #include "index/Index.h" +#include "support/Logger.h" #include "clang/AST/DeclTemplate.h" #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexSymbol.h" diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index be3306f3fc78d..2813e30b4335a 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -8,7 +8,7 @@ #include "FindTarget.h" #include "AST.h" -#include "Logger.h" +#include "support/Logger.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" diff --git a/clang-tools-extra/clangd/Format.cpp b/clang-tools-extra/clangd/Format.cpp index b8b9d8e627d29..052cf74f8bd00 100644 --- a/clang-tools-extra/clangd/Format.cpp +++ b/clang-tools-extra/clangd/Format.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// #include "Format.h" -#include "Logger.h" -#include "clang/Basic/SourceManager.h" +#include "support/Logger.h" #include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" #include "clang/Lex/Lexer.h" #include "clang/Tooling/Core/Replacement.h" diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp index 3f8a7d8219f70..5e75864ec8d44 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -8,8 +8,8 @@ #include "GlobalCompilationDatabase.h" #include "FS.h" -#include "Logger.h" -#include "Path.h" +#include "support/Logger.h" +#include "support/Path.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.h b/clang-tools-extra/clangd/GlobalCompilationDatabase.h index 2fc754927fd95..e9a5417d9d69b 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.h +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.h @@ -10,8 +10,8 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_GLOBALCOMPILATIONDATABASE_H #include "CompileCommands.h" -#include "Function.h" -#include "Path.h" +#include "support/Function.h" +#include "support/Path.h" #include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/Optional.h" diff --git a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp index 1490d3ebe5c0d..3ebf0542dcf0f 100644 --- a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp +++ b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp @@ -8,9 +8,9 @@ #include "HeaderSourceSwitch.h" #include "AST.h" -#include "Logger.h" #include "SourceCode.h" #include "index/SymbolCollector.h" +#include "support/Logger.h" #include "clang/AST/Decl.h" namespace clang { diff --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp index dd4cafd2ddfc2..50c375988f7b2 100644 --- a/clang-tools-extra/clangd/Headers.cpp +++ b/clang-tools-extra/clangd/Headers.cpp @@ -8,8 +8,8 @@ #include "Headers.h" #include "Compiler.h" -#include "Logger.h" #include "SourceCode.h" +#include "support/Logger.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" @@ -38,11 +38,12 @@ class RecordHeaders : public PPCallbacks { if (isInsideMainFile(HashLoc, SM)) { Out->MainFileIncludes.emplace_back(); auto &Inc = Out->MainFileIncludes.back(); - Inc.R = halfOpenToRange(SM, FilenameRange); Inc.Written = (IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str(); Inc.Resolved = std::string(File ? File->tryGetRealPathName() : ""); Inc.HashOffset = SM.getFileOffset(HashLoc); + Inc.HashLine = + SM.getLineNumber(SM.getFileID(HashLoc), Inc.HashOffset) - 1; Inc.FileKind = FileKind; Inc.Directive = IncludeTok.getIdentifierInfo()->getPPKeywordID(); } @@ -228,8 +229,8 @@ IncludeInserter::insert(llvm::StringRef VerbatimHeader) const { llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Inclusion &Inc) { return OS << Inc.Written << " = " - << (!Inc.Resolved.empty() ? Inc.Resolved : "[unresolved]") << " at " - << Inc.R; + << (!Inc.Resolved.empty() ? Inc.Resolved : "[unresolved]") + << " at line" << Inc.HashLine; } } // namespace clangd diff --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h index 1f3968765d838..d7053b396d392 100644 --- a/clang-tools-extra/clangd/Headers.h +++ b/clang-tools-extra/clangd/Headers.h @@ -9,10 +9,10 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H -#include "Path.h" #include "Protocol.h" #include "SourceCode.h" #include "index/Symbol.h" +#include "support/Path.h" #include "clang/Basic/TokenKinds.h" #include "clang/Format/Format.h" #include "clang/Lex/HeaderSearch.h" @@ -52,11 +52,11 @@ llvm::SmallVector getRankedIncludes(const Symbol &Sym); // An #include directive that we found in the main file. struct Inclusion { - Range R; // Inclusion range. tok::PPKeywordKind Directive; // Directive used for inclusion, e.g. import std::string Written; // Inclusion name as written e.g. . Path Resolved; // Resolved path of included file. Empty if not resolved. unsigned HashOffset = 0; // Byte offset from start of file to #. + int HashLine = 0; // Line number containing the directive, 0-indexed. SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion &); diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index ba8fcd4687b03..799d376264896 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -11,12 +11,12 @@ #include "AST.h" #include "CodeCompletionStrings.h" #include "FindTarget.h" -#include "FormattedString.h" -#include "Logger.h" #include "ParsedAST.h" #include "Selection.h" #include "SourceCode.h" #include "index/SymbolCollector.h" +#include "support/Logger.h" +#include "support/Markup.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Decl.h" @@ -648,6 +648,7 @@ bool isHardLineBreakIndicator(llvm::StringRef Rest) { } bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest) { + // Should we also consider whether Line is short? return punctuationIndicatesLineBreak(Line) || isHardLineBreakIndicator(Rest); } @@ -772,7 +773,7 @@ markup::Document HoverInfo::present() const { // https://github.com/microsoft/vscode/issues/88417 for details. markup::Paragraph &Header = Output.addHeading(3); if (Kind != index::SymbolKind::Unknown) - Header.appendText(std::string(index::getSymbolKindString(Kind))); + Header.appendText(index::getSymbolKindString(Kind)).appendSpace(); assert(!Name.empty() && "hover triggered on a nameless symbol"); Header.appendCode(Name); @@ -786,9 +787,9 @@ markup::Document HoverInfo::present() const { // Parameters: // - `bool param1` // - `int param2 = 5` - Output.addParagraph().appendText("→").appendCode(*ReturnType); + Output.addParagraph().appendText("→ ").appendCode(*ReturnType); if (Parameters && !Parameters->empty()) { - Output.addParagraph().appendText("Parameters:"); + Output.addParagraph().appendText("Parameters: "); markup::BulletList &L = Output.addBulletList(); for (const auto &Param : *Parameters) { std::string Buffer; @@ -803,16 +804,17 @@ markup::Document HoverInfo::present() const { if (Value) { markup::Paragraph &P = Output.addParagraph(); - P.appendText("Value ="); + P.appendText("Value = "); P.appendCode(*Value); } if (Offset) Output.addParagraph().appendText( - llvm::formatv("Offset: {0} byte{1}", *Offset, *Offset == 1 ? "" : "s")); + llvm::formatv("Offset: {0} byte{1}", *Offset, *Offset == 1 ? "" : "s") + .str()); if (Size) Output.addParagraph().appendText( - llvm::formatv("Size: {0} byte{1}", *Size, *Size == 1 ? "" : "s")); + llvm::formatv("Size: {0} byte{1}", *Size, *Size == 1 ? "" : "s").str()); if (!Documentation.empty()) parseDocumentation(Documentation, Output); @@ -838,6 +840,52 @@ markup::Document HoverInfo::present() const { return Output; } +// If the backtick at `Offset` starts a probable quoted range, return the range +// (including the quotes). +llvm::Optional getBacktickQuoteRange(llvm::StringRef Line, + unsigned Offset) { + assert(Line[Offset] == '`'); + + // The open-quote is usually preceded by whitespace. + llvm::StringRef Prefix = Line.substr(0, Offset); + constexpr llvm::StringLiteral BeforeStartChars = " \t(="; + if (!Prefix.empty() && !BeforeStartChars.contains(Prefix.back())) + return llvm::None; + + // The quoted string must be nonempty and usually has no leading/trailing ws. + auto Next = Line.find('`', Offset + 1); + if (Next == llvm::StringRef::npos) + return llvm::None; + llvm::StringRef Contents = Line.slice(Offset + 1, Next); + if (Contents.empty() || isWhitespace(Contents.front()) || + isWhitespace(Contents.back())) + return llvm::None; + + // The close-quote is usually followed by whitespace or punctuation. + llvm::StringRef Suffix = Line.substr(Next + 1); + constexpr llvm::StringLiteral AfterEndChars = " \t)=.,;:"; + if (!Suffix.empty() && !AfterEndChars.contains(Suffix.front())) + return llvm::None; + + return Line.slice(Offset, Next+1); +} + +void parseDocumentationLine(llvm::StringRef Line, markup::Paragraph &Out) { + // Probably this is appendText(Line), but scan for something interesting. + for (unsigned I = 0; I < Line.size(); ++I) { + switch (Line[I]) { + case '`': + if (auto Range = getBacktickQuoteRange(Line, I)) { + Out.appendText(Line.substr(0, I)); + Out.appendCode(Range->trim("`"), /*Preserve=*/true); + return parseDocumentationLine(Line.substr(I+Range->size()), Out); + } + break; + } + } + Out.appendText(Line).appendSpace(); +} + void parseDocumentation(llvm::StringRef Input, markup::Document &Output) { std::vector ParagraphLines; auto FlushParagraph = [&] { @@ -845,7 +893,7 @@ void parseDocumentation(llvm::StringRef Input, markup::Document &Output) { return; auto &P = Output.addParagraph(); for (llvm::StringRef Line : ParagraphLines) - P.appendText(Line.str()); + parseDocumentationLine(Line, P); ParagraphLines.clear(); }; diff --git a/clang-tools-extra/clangd/Hover.h b/clang-tools-extra/clangd/Hover.h index 4476ed8743058..931e1c2363a45 100644 --- a/clang-tools-extra/clangd/Hover.h +++ b/clang-tools-extra/clangd/Hover.h @@ -9,9 +9,9 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_HOVER_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_HOVER_H -#include "FormattedString.h" #include "ParsedAST.h" #include "Protocol.h" +#include "support/Markup.h" #include "clang/Index/IndexSymbol.h" namespace clang { @@ -80,6 +80,7 @@ struct HoverInfo { }; // Try to infer structure of a documentation comment (e.g. line breaks). +// FIXME: move to another file so CodeComplete doesn't depend on Hover. void parseDocumentation(llvm::StringRef Input, markup::Document &Output); llvm::raw_ostream &operator<<(llvm::raw_ostream &, const HoverInfo::Param &); diff --git a/clang-tools-extra/clangd/IncludeFixer.cpp b/clang-tools-extra/clangd/IncludeFixer.cpp index 45fb07ad06a1a..945f4eced88c4 100644 --- a/clang-tools-extra/clangd/IncludeFixer.cpp +++ b/clang-tools-extra/clangd/IncludeFixer.cpp @@ -9,11 +9,11 @@ #include "IncludeFixer.h" #include "AST.h" #include "Diagnostics.h" -#include "Logger.h" #include "SourceCode.h" -#include "Trace.h" #include "index/Index.h" #include "index/Symbol.h" +#include "support/Logger.h" +#include "support/Trace.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclarationName.h" diff --git a/clang-tools-extra/clangd/JSONTransport.cpp b/clang-tools-extra/clangd/JSONTransport.cpp index a9254732562bc..fa86baf6c5816 100644 --- a/clang-tools-extra/clangd/JSONTransport.cpp +++ b/clang-tools-extra/clangd/JSONTransport.cpp @@ -5,11 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "Cancellation.h" -#include "Logger.h" #include "Protocol.h" // For LSPError -#include "Shutdown.h" #include "Transport.h" +#include "support/Cancellation.h" +#include "support/Logger.h" +#include "support/Shutdown.h" #include "llvm/Support/Errno.h" #include "llvm/Support/Error.h" diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp index 7b2426115df7c..61221aaa1491e 100644 --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -14,11 +14,11 @@ #include "Diagnostics.h" #include "Headers.h" #include "IncludeFixer.h" -#include "Logger.h" #include "SourceCode.h" -#include "Trace.h" #include "index/CanonicalIncludes.h" #include "index/Index.h" +#include "support/Logger.h" +#include "support/Trace.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/Basic/LangOptions.h" diff --git a/clang-tools-extra/clangd/ParsedAST.h b/clang-tools-extra/clangd/ParsedAST.h index 88fb6c3b2d655..d90f77f9263b3 100644 --- a/clang-tools-extra/clangd/ParsedAST.h +++ b/clang-tools-extra/clangd/ParsedAST.h @@ -24,9 +24,9 @@ #include "Compiler.h" #include "Diagnostics.h" #include "Headers.h" -#include "Path.h" #include "Preamble.h" #include "index/CanonicalIncludes.h" +#include "support/Path.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Frontend/PrecompiledPreamble.h" #include "clang/Lex/Preprocessor.h" diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index 8392748227b4a..2768bd1ec6a08 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -9,8 +9,8 @@ #include "Preamble.h" #include "Compiler.h" #include "Headers.h" -#include "Logger.h" -#include "Trace.h" +#include "support/Logger.h" +#include "support/Trace.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" diff --git a/clang-tools-extra/clangd/Preamble.h b/clang-tools-extra/clangd/Preamble.h index 10c292a71f388..f2e1ba98f3326 100644 --- a/clang-tools-extra/clangd/Preamble.h +++ b/clang-tools-extra/clangd/Preamble.h @@ -27,8 +27,8 @@ #include "Diagnostics.h" #include "FS.h" #include "Headers.h" -#include "Path.h" #include "index/CanonicalIncludes.h" +#include "support/Path.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/PrecompiledPreamble.h" #include "clang/Tooling/CompilationDatabase.h" diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp index b0c7ce2acb337..675ba18e80ce1 100644 --- a/clang-tools-extra/clangd/Protocol.cpp +++ b/clang-tools-extra/clangd/Protocol.cpp @@ -11,8 +11,8 @@ //===----------------------------------------------------------------------===// #include "Protocol.h" -#include "Logger.h" #include "URI.h" +#include "support/Logger.h" #include "clang/Basic/LLVM.h" #include "clang/Index/IndexSymbol.h" #include "llvm/ADT/Hashing.h" @@ -311,6 +311,12 @@ bool fromJSON(const llvm::json::Value &Params, ClientCapabilities &R) { if (auto *Item = Completion->getObject("completionItem")) { if (auto SnippetSupport = Item->getBoolean("snippetSupport")) R.CompletionSnippets = *SnippetSupport; + if (auto DocumentationFormat = Item->getArray("documentationFormat")) { + for (const auto &Format : *DocumentationFormat) { + if (fromJSON(Format, R.CompletionDocumentationFormat)) + break; + } + } } if (auto *ItemKind = Completion->getObject("completionItemKind")) { if (auto *ValueSet = ItemKind->get("valueSet")) { @@ -334,11 +340,8 @@ bool fromJSON(const llvm::json::Value &Params, ClientCapabilities &R) { if (auto *Hover = TextDocument->getObject("hover")) { if (auto *ContentFormat = Hover->getArray("contentFormat")) { for (const auto &Format : *ContentFormat) { - MarkupKind K = MarkupKind::PlainText; - if (fromJSON(Format, K)) { - R.HoverContentFormat = K; + if (fromJSON(Format, R.HoverContentFormat)) break; - } } } } @@ -891,7 +894,7 @@ llvm::json::Value toJSON(const CompletionItem &CI) { Result["kind"] = static_cast(CI.kind); if (!CI.detail.empty()) Result["detail"] = CI.detail; - if (!CI.documentation.empty()) + if (CI.documentation) Result["documentation"] = CI.documentation; if (!CI.sortText.empty()) Result["sortText"] = CI.sortText; diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h index 8a0e5b44e057f..0177ee262ae34 100644 --- a/clang-tools-extra/clangd/Protocol.h +++ b/clang-tools-extra/clangd/Protocol.h @@ -430,6 +430,10 @@ struct ClientCapabilities { /// textDocument.completion.completionItemKind.valueSet llvm::Optional CompletionItemKinds; + /// The documentation format that should be used for textDocument/completion. + /// textDocument.completion.completionItem.documentationFormat + MarkupKind CompletionDocumentationFormat = MarkupKind::PlainText; + /// Client supports CodeAction return value for textDocument/codeAction. /// textDocument.codeAction.codeActionLiteralSupport. bool CodeActionStructure = false; @@ -1105,7 +1109,7 @@ struct CompletionItem { std::string detail; /// A human-readable string that represents a doc-comment. - std::string documentation; + llvm::Optional documentation; /// A string that should be used when comparing this item with other items. /// When `falsy` the label is used. diff --git a/clang-tools-extra/clangd/QueryDriverDatabase.cpp b/clang-tools-extra/clangd/QueryDriverDatabase.cpp index d6502c6b84bad..2ab217dac1559 100644 --- a/clang-tools-extra/clangd/QueryDriverDatabase.cpp +++ b/clang-tools-extra/clangd/QueryDriverDatabase.cpp @@ -30,9 +30,9 @@ // in the paths that are explicitly whitelisted by the user. #include "GlobalCompilationDatabase.h" -#include "Logger.h" -#include "Path.h" -#include "Trace.h" +#include "support/Logger.h" +#include "support/Path.h" +#include "support/Trace.h" #include "clang/Driver/Types.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/DenseMap.h" diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp index b4565446c8ec9..7d52714ad3fa4 100644 --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "Selection.h" -#include "Logger.h" #include "SourceCode.h" +#include "support/Logger.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp index baf13755ecf71..d2470da601408 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -8,10 +8,10 @@ #include "SemanticHighlighting.h" #include "FindTarget.h" -#include "Logger.h" #include "ParsedAST.h" #include "Protocol.h" #include "SourceCode.h" +#include "support/Logger.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index dd4c863cb96ab..0d08bf8e0a1ae 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "SourceCode.h" -#include "Context.h" #include "FuzzyMatch.h" -#include "Logger.h" #include "Protocol.h" #include "refactor/Tweak.h" +#include "support/Context.h" +#include "support/Logger.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" @@ -564,7 +564,7 @@ format::FormatStyle getFormatStyleForFile(llvm::StringRef File, if (!Style) { log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File, Style.takeError()); - Style = format::getLLVMStyle(); + return format::getLLVMStyle(); } return *Style; } diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h index d1e5aa5f7c2b3..c00cc17ac9bcf 100644 --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -13,8 +13,8 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H -#include "Context.h" #include "Protocol.h" +#include "support/Context.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index 1ff6bbaebc244..ee6d52188934a 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -47,18 +47,18 @@ // requests will receive latest build preamble, which might possibly be stale. #include "TUScheduler.h" -#include "Cancellation.h" #include "Compiler.h" -#include "Context.h" #include "Diagnostics.h" #include "GlobalCompilationDatabase.h" -#include "Logger.h" #include "ParsedAST.h" -#include "Path.h" #include "Preamble.h" -#include "Threading.h" -#include "Trace.h" #include "index/CanonicalIncludes.h" +#include "support/Cancellation.h" +#include "support/Context.h" +#include "support/Logger.h" +#include "support/Path.h" +#include "support/Threading.h" +#include "support/Trace.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/FunctionExtras.h" @@ -142,11 +142,19 @@ class TUScheduler::ASTCache { /// Returns the cached value for \p K, or llvm::None if the value is not in /// the cache anymore. If nullptr was cached for \p K, this function will /// return a null unique_ptr wrapped into an optional. - llvm::Optional> take(Key K) { + /// If \p AccessMetric is set records whether there was a hit or miss. + llvm::Optional> + take(Key K, const trace::Metric *AccessMetric = nullptr) { + // Record metric after unlocking the mutex. std::unique_lock Lock(Mut); auto Existing = findByKey(K); - if (Existing == LRU.end()) + if (Existing == LRU.end()) { + if (AccessMetric) + AccessMetric->record(1, "miss"); return None; + } + if (AccessMetric) + AccessMetric->record(1, "hit"); std::unique_ptr V = std::move(Existing->second); LRU.erase(Existing); // GCC 4.8 fails to compile `return V;`, as it tries to call the copy @@ -649,10 +657,14 @@ void ASTWorker::runWithAST( llvm::StringRef Name, llvm::unique_function)> Action, TUScheduler::ASTActionInvalidation Invalidation) { + // Tracks ast cache accesses for read operations. + static constexpr trace::Metric ASTAccessForRead( + "ast_access_read", trace::Metric::Counter, "result"); auto Task = [=, Action = std::move(Action)]() mutable { if (auto Reason = isCancelled()) return Action(llvm::make_error(Reason)); - llvm::Optional> AST = IdleASTs.take(this); + llvm::Optional> AST = + IdleASTs.take(this, &ASTAccessForRead); if (!AST) { StoreDiags CompilerInvocationDiagConsumer; std::unique_ptr Invocation = @@ -773,6 +785,9 @@ void ASTWorker::updatePreamble(std::unique_ptr CI, void ASTWorker::generateDiagnostics( std::unique_ptr Invocation, ParseInputs Inputs, std::vector CIDiags) { + // Tracks ast cache accesses for publishing diags. + static constexpr trace::Metric ASTAccessForDiag( + "ast_access_diag", trace::Metric::Counter, "result"); assert(Invocation); // No need to rebuild the AST if we won't send the diagnostics. { @@ -801,7 +816,8 @@ void ASTWorker::generateDiagnostics( // We might be able to reuse the last we've built for a read request. // FIXME: It might be better to not reuse this AST. That way queued AST builds // won't be required for diags. - llvm::Optional> AST = IdleASTs.take(this); + llvm::Optional> AST = + IdleASTs.take(this, &ASTAccessForDiag); if (!AST || !InputsAreLatest) { auto RebuildStartTime = DebouncePolicy::clock::now(); llvm::Optional NewAST = ParsedAST::build( diff --git a/clang-tools-extra/clangd/TUScheduler.h b/clang-tools-extra/clangd/TUScheduler.h index 48ed2c76f5461..f24a777d5836f 100644 --- a/clang-tools-extra/clangd/TUScheduler.h +++ b/clang-tools-extra/clangd/TUScheduler.h @@ -11,11 +11,11 @@ #include "Compiler.h" #include "Diagnostics.h" -#include "Function.h" #include "GlobalCompilationDatabase.h" -#include "Path.h" -#include "Threading.h" #include "index/CanonicalIncludes.h" +#include "support/Function.h" +#include "support/Path.h" +#include "support/Threading.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 7eb3c4fcc7a36..1d82763b6a3cf 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -10,7 +10,6 @@ #include "CodeCompletionStrings.h" #include "FindSymbols.h" #include "FindTarget.h" -#include "Logger.h" #include "ParsedAST.h" #include "Protocol.h" #include "Quality.h" @@ -21,6 +20,7 @@ #include "index/Merge.h" #include "index/Relation.h" #include "index/SymbolLocation.h" +#include "support/Logger.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Attr.h" @@ -187,7 +187,7 @@ llvm::Optional locateFileReferent(const Position &Pos, ParsedAST &AST, llvm::StringRef MainFilePath) { for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) { - if (!Inc.Resolved.empty() && Inc.R.start.line == Pos.line) { + if (!Inc.Resolved.empty() && Inc.HashLine == Pos.line) { LocatedSymbol File; File.Name = std::string(llvm::sys::path::filename(Inc.Resolved)); File.PreferredDeclaration = { @@ -599,10 +599,23 @@ std::vector getDocumentLinks(ParsedAST &AST) { std::vector Result; for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) { - if (!Inc.Resolved.empty()) { - Result.push_back(DocumentLink( - {Inc.R, URIForFile::canonicalize(Inc.Resolved, *MainFilePath)})); - } + if (Inc.Resolved.empty()) + continue; + auto HashLoc = SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset); + const auto *HashTok = AST.getTokens().spelledTokenAt(HashLoc); + assert(HashTok && "got inclusion at wrong offset"); + const auto *IncludeTok = std::next(HashTok); + const auto *FileTok = std::next(IncludeTok); + // FileTok->range is not sufficient here, as raw lexing wouldn't yield + // correct tokens for angled filenames. Hence we explicitly use + // Inc.Written's length. + auto FileRange = + syntax::FileRange(SM, FileTok->location(), Inc.Written.length()) + .toCharRange(SM); + + Result.push_back( + DocumentLink({halfOpenToRange(SM, FileRange), + URIForFile::canonicalize(Inc.Resolved, *MainFilePath)})); } return Result; diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h index 4645d32c763cb..521c28f934bef 100644 --- a/clang-tools-extra/clangd/XRefs.h +++ b/clang-tools-extra/clangd/XRefs.h @@ -13,12 +13,11 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_XREFS_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_XREFS_H -#include "FormattedString.h" -#include "Path.h" #include "Protocol.h" #include "SourceCode.h" #include "index/Index.h" #include "index/SymbolLocation.h" +#include "support/Path.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Type.h" #include "clang/Format/Format.h" diff --git a/clang-tools-extra/clangd/fuzzer/CMakeLists.txt b/clang-tools-extra/clangd/fuzzer/CMakeLists.txt index 90379822ac661..778b61158304c 100644 --- a/clang-tools-extra/clangd/fuzzer/CMakeLists.txt +++ b/clang-tools-extra/clangd/fuzzer/CMakeLists.txt @@ -24,4 +24,5 @@ clang_target_link_libraries(clangd-fuzzer target_link_libraries(clangd-fuzzer PRIVATE clangDaemon + clangdSupport ) diff --git a/clang-tools-extra/clangd/fuzzer/clangd-fuzzer.cpp b/clang-tools-extra/clangd/fuzzer/clangd-fuzzer.cpp index 9776b0585e2b3..982d5fa292d37 100644 --- a/clang-tools-extra/clangd/fuzzer/clangd-fuzzer.cpp +++ b/clang-tools-extra/clangd/fuzzer/clangd-fuzzer.cpp @@ -15,8 +15,8 @@ #include "ClangdLSPServer.h" #include "ClangdServer.h" #include "CodeComplete.h" -#include "FSProvider.h" #include "refactor/Rename.h" +#include "support/FSProvider.h" #include #include diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index 4c5719d0526c7..cc2c288c090e6 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -8,16 +8,10 @@ #include "index/Background.h" #include "Compiler.h" -#include "Context.h" -#include "FSProvider.h" #include "Headers.h" -#include "Logger.h" #include "ParsedAST.h" -#include "Path.h" #include "SourceCode.h" #include "Symbol.h" -#include "Threading.h" -#include "Trace.h" #include "URI.h" #include "index/BackgroundIndexLoader.h" #include "index/FileIndex.h" @@ -27,6 +21,12 @@ #include "index/Relation.h" #include "index/Serialization.h" #include "index/SymbolCollector.h" +#include "support/Context.h" +#include "support/FSProvider.h" +#include "support/Logger.h" +#include "support/Path.h" +#include "support/Threading.h" +#include "support/Trace.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Driver/Types.h" @@ -174,44 +174,50 @@ void BackgroundIndex::update( llvm::StringRef MainFile, IndexFileIn Index, const llvm::StringMap &ShardVersionsSnapshot, bool HadErrors) { - llvm::StringMap FilesToUpdate; + // Keys are URIs. + llvm::StringMap> FilesToUpdate; + // Note that sources do not contain any information regarding missing headers, + // since we don't even know what absolute path they should fall in. for (const auto &IndexIt : *Index.Sources) { const auto &IGN = IndexIt.getValue(); - // Note that sources do not contain any information regarding missing - // headers, since we don't even know what absolute path they should fall in. - auto AbsPath = llvm::cantFail(URI::resolve(IGN.URI, MainFile), - "Failed to resovle URI"); - const auto DigestIt = ShardVersionsSnapshot.find(AbsPath); + auto AbsPath = URI::resolve(IGN.URI, MainFile); + if (!AbsPath) { + elog("Failed to resolve URI: {0}", AbsPath.takeError()); + continue; + } + const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath); // File has different contents, or indexing was successful this time. if (DigestIt == ShardVersionsSnapshot.end() || DigestIt->getValue().Digest != IGN.Digest || (DigestIt->getValue().HadErrors && !HadErrors)) - FilesToUpdate[AbsPath] = IGN.Digest; + FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest}; } // Shard slabs into files. - FileShardedIndex ShardedIndex(std::move(Index), MainFile); + FileShardedIndex ShardedIndex(std::move(Index)); // Build and store new slabs for each updated file. for (const auto &FileIt : FilesToUpdate) { - PathRef Path = FileIt.first(); - auto IF = ShardedIndex.getShard(Path); + auto Uri = FileIt.first(); + auto IF = ShardedIndex.getShard(Uri); + assert(IF && "no shard for file in Index.Sources?"); + PathRef Path = FileIt.getValue().first; // Only store command line hash for main files of the TU, since our // current model keeps only one version of a header file. if (Path != MainFile) - IF.Cmd.reset(); + IF->Cmd.reset(); // We need to store shards before updating the index, since the latter // consumes slabs. // FIXME: Also skip serializing the shard if it is already up-to-date. - if (auto Error = IndexStorageFactory(Path)->storeShard(Path, IF)) + if (auto Error = IndexStorageFactory(Path)->storeShard(Path, *IF)) elog("Failed to write background-index shard for file {0}: {1}", Path, std::move(Error)); { std::lock_guard Lock(ShardVersionsMu); - const auto &Hash = FileIt.getValue(); + const auto &Hash = FileIt.getValue().second; auto DigestIt = ShardVersions.try_emplace(Path); ShardVersion &SV = DigestIt.first->second; // Skip if file is already up to date, unless previous index was broken @@ -225,9 +231,9 @@ void BackgroundIndex::update( // this thread sees the older version but finishes later. This should be // rare in practice. IndexedSymbols.update( - Path, std::make_unique(std::move(*IF.Symbols)), - std::make_unique(std::move(*IF.Refs)), - std::make_unique(std::move(*IF.Relations)), + Path, std::make_unique(std::move(*IF->Symbols)), + std::make_unique(std::move(*IF->Refs)), + std::make_unique(std::move(*IF->Relations)), Path == MainFile); } } diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h index 2ae11c72d5d43..ffaea75c5d417 100644 --- a/clang-tools-extra/clangd/index/Background.h +++ b/clang-tools-extra/clangd/index/Background.h @@ -9,16 +9,16 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H -#include "Context.h" -#include "FSProvider.h" #include "GlobalCompilationDatabase.h" -#include "Path.h" #include "SourceCode.h" -#include "Threading.h" #include "index/BackgroundRebuild.h" #include "index/FileIndex.h" #include "index/Index.h" #include "index/Serialization.h" +#include "support/Context.h" +#include "support/FSProvider.h" +#include "support/Path.h" +#include "support/Threading.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Threading.h" diff --git a/clang-tools-extra/clangd/index/BackgroundIndexLoader.cpp b/clang-tools-extra/clangd/index/BackgroundIndexLoader.cpp index 08bf07d8b2809..5645569c9ae10 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexLoader.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexLoader.cpp @@ -8,9 +8,9 @@ #include "index/BackgroundIndexLoader.h" #include "GlobalCompilationDatabase.h" -#include "Logger.h" -#include "Path.h" #include "index/Background.h" +#include "support/Logger.h" +#include "support/Path.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallString.h" diff --git a/clang-tools-extra/clangd/index/BackgroundIndexLoader.h b/clang-tools-extra/clangd/index/BackgroundIndexLoader.h index 0caf1b4635250..aa7ee39ef0bb5 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexLoader.h +++ b/clang-tools-extra/clangd/index/BackgroundIndexLoader.h @@ -9,8 +9,8 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_INDEX_LOADER_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_INDEX_LOADER_H -#include "Path.h" #include "index/Background.h" +#include "support/Path.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index b07728ee6a2c2..eee050b0db9ad 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "GlobalCompilationDatabase.h" -#include "Logger.h" -#include "Path.h" #include "index/Background.h" +#include "support/Logger.h" +#include "support/Path.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" @@ -36,18 +36,13 @@ std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, return std::string(ShardRootSS.str()); } -// Uses disk as a storage for index shards. Creates a directory called -// ".clangd/index/" under the path provided during construction. +// Uses disk as a storage for index shards. class DiskBackedIndexStorage : public BackgroundIndexStorage { std::string DiskShardRoot; public: - // Sets DiskShardRoot to (Directory + ".clangd/index/") which is the base - // directory for all shard files. - DiskBackedIndexStorage(llvm::StringRef Directory) { - llvm::SmallString<128> CDBDirectory(Directory); - llvm::sys::path::append(CDBDirectory, ".clangd", "index"); - DiskShardRoot = std::string(CDBDirectory.str()); + // Creates `DiskShardRoot` and any parents during construction. + DiskBackedIndexStorage(llvm::StringRef Directory) : DiskShardRoot(Directory) { std::error_code OK; std::error_code EC = llvm::sys::fs::create_directories(DiskShardRoot); if (EC != OK) { @@ -100,26 +95,31 @@ class NullStorage : public BackgroundIndexStorage { }; // Creates and owns IndexStorages for multiple CDBs. +// When a CDB root is found, shards are stored in $ROOT/.clangd/index. +// When no root is found, the fallback path is ~/.cache/clangd/index. class DiskBackedIndexStorageManager { public: DiskBackedIndexStorageManager( std::function(PathRef)> GetProjectInfo) : IndexStorageMapMu(std::make_unique()), GetProjectInfo(std::move(GetProjectInfo)) { - llvm::SmallString<128> HomeDir; - llvm::sys::path::home_directory(HomeDir); - this->HomeDir = HomeDir.str().str(); + llvm::SmallString<128> FallbackDir; + if (llvm::sys::path::cache_directory(FallbackDir)) + llvm::sys::path::append(FallbackDir, "clangd", "index"); + this->FallbackDir = FallbackDir.str().str(); } // Creates or fetches to storage from cache for the specified project. BackgroundIndexStorage *operator()(PathRef File) { std::lock_guard Lock(*IndexStorageMapMu); - Path CDBDirectory = HomeDir; - if (auto PI = GetProjectInfo(File)) - CDBDirectory = PI->SourceRoot; - auto &IndexStorage = IndexStorageMap[CDBDirectory]; + llvm::SmallString<128> StorageDir(FallbackDir); + if (auto PI = GetProjectInfo(File)) { + StorageDir = PI->SourceRoot; + llvm::sys::path::append(StorageDir, ".clangd", "index"); + } + auto &IndexStorage = IndexStorageMap[StorageDir]; if (!IndexStorage) - IndexStorage = create(CDBDirectory); + IndexStorage = create(StorageDir); return IndexStorage.get(); } @@ -132,7 +132,7 @@ class DiskBackedIndexStorageManager { return std::make_unique(CDBDirectory); } - Path HomeDir; + Path FallbackDir; llvm::StringMap> IndexStorageMap; std::unique_ptr IndexStorageMapMu; diff --git a/clang-tools-extra/clangd/index/BackgroundQueue.cpp b/clang-tools-extra/clangd/index/BackgroundQueue.cpp index 00d483f380050..3262a2f46d38f 100644 --- a/clang-tools-extra/clangd/index/BackgroundQueue.cpp +++ b/clang-tools-extra/clangd/index/BackgroundQueue.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "Logger.h" #include "index/Background.h" +#include "support/Logger.h" namespace clang { namespace clangd { diff --git a/clang-tools-extra/clangd/index/BackgroundRebuild.cpp b/clang-tools-extra/clangd/index/BackgroundRebuild.cpp index 5cc61a6decb6c..2aa5fa9ca812f 100644 --- a/clang-tools-extra/clangd/index/BackgroundRebuild.cpp +++ b/clang-tools-extra/clangd/index/BackgroundRebuild.cpp @@ -9,13 +9,9 @@ #include "index/BackgroundRebuild.h" #include "Compiler.h" #include "Headers.h" -#include "Logger.h" #include "ParsedAST.h" -#include "Path.h" #include "SourceCode.h" #include "Symbol.h" -#include "Threading.h" -#include "Trace.h" #include "URI.h" #include "index/FileIndex.h" #include "index/IndexAction.h" @@ -24,6 +20,10 @@ #include "index/Relation.h" #include "index/Serialization.h" #include "index/SymbolCollector.h" +#include "support/Logger.h" +#include "support/Path.h" +#include "support/Threading.h" +#include "support/Trace.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/Hashing.h" diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp index 590bf46ec01c7..79ec1120df986 100644 --- a/clang-tools-extra/clangd/index/FileIndex.cpp +++ b/clang-tools-extra/clangd/index/FileIndex.cpp @@ -8,9 +8,7 @@ #include "FileIndex.h" #include "CollectMacros.h" -#include "Logger.h" #include "ParsedAST.h" -#include "Path.h" #include "SymbolCollector.h" #include "index/CanonicalIncludes.h" #include "index/Index.h" @@ -23,12 +21,15 @@ #include "index/SymbolID.h" #include "index/SymbolOrigin.h" #include "index/dex/Dex.h" +#include "support/Logger.h" +#include "support/Path.h" #include "clang/AST/ASTContext.h" #include "clang/Index/IndexingAction.h" #include "clang/Index/IndexingOptions.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -96,27 +97,6 @@ SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr PP, std::move(Relations)); } -// Resolves URI to file paths with cache. -class URIToFileCache { -public: - URIToFileCache(PathRef HintPath) : HintPath(HintPath) {} - - llvm::StringRef operator[](llvm::StringRef FileURI) { - if (FileURI.empty()) - return ""; - auto I = URIToPathCache.try_emplace(FileURI); - if (I.second) { - I.first->second = llvm::cantFail(URI::resolve(FileURI, HintPath), - "Failed to resolve URI"); - } - return I.first->second; - } - -private: - PathRef HintPath; - llvm::StringMap URIToPathCache; -}; - // We keep only the node "U" and its edges. Any node other than "U" will be // empty in the resultant graph. IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) { @@ -137,24 +117,21 @@ IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) { } } // namespace -FileShardedIndex::FileShardedIndex(IndexFileIn Input, PathRef HintPath) +FileShardedIndex::FileShardedIndex(IndexFileIn Input) : Index(std::move(Input)) { - URIToFileCache UriToFile(HintPath); // Used to build RelationSlabs. llvm::DenseMap SymbolIDToFile; // Attribute each Symbol to both their declaration and definition locations. if (Index.Symbols) { for (const auto &S : *Index.Symbols) { - auto File = UriToFile[S.CanonicalDeclaration.FileURI]; - auto It = Shards.try_emplace(File); + auto It = Shards.try_emplace(S.CanonicalDeclaration.FileURI); It.first->getValue().Symbols.insert(&S); SymbolIDToFile[S.ID] = &It.first->getValue(); // Only bother if definition file is different than declaration file. if (S.Definition && S.Definition.FileURI != S.CanonicalDeclaration.FileURI) { - auto File = UriToFile[S.Definition.FileURI]; - auto It = Shards.try_emplace(File); + auto It = Shards.try_emplace(S.Definition.FileURI); It.first->getValue().Symbols.insert(&S); } } @@ -163,8 +140,7 @@ FileShardedIndex::FileShardedIndex(IndexFileIn Input, PathRef HintPath) if (Index.Refs) { for (const auto &SymRefs : *Index.Refs) { for (const auto &R : SymRefs.second) { - auto File = UriToFile[R.Location.FileURI]; - const auto It = Shards.try_emplace(File); + const auto It = Shards.try_emplace(R.Location.FileURI); It.first->getValue().Refs.insert(&R); RefToSymID[&R] = SymRefs.first; } @@ -183,25 +159,26 @@ FileShardedIndex::FileShardedIndex(IndexFileIn Input, PathRef HintPath) if (Index.Sources) { const auto &FullGraph = *Index.Sources; for (const auto &It : FullGraph) { - auto File = UriToFile[It.first()]; - auto ShardIt = Shards.try_emplace(File); + auto ShardIt = Shards.try_emplace(It.first()); ShardIt.first->getValue().IG = getSubGraph(It.first(), FullGraph); } } } -std::vector FileShardedIndex::getAllFiles() const { +std::vector FileShardedIndex::getAllSources() const { // It should be enough to construct a vector with {Shards.keys().begin(), // Shards.keys().end()} but MSVC fails to compile that. std::vector Result; Result.reserve(Shards.size()); - for (PathRef Key : Shards.keys()) + for (auto Key : Shards.keys()) Result.push_back(Key); return Result; } -IndexFileIn FileShardedIndex::getShard(PathRef File) const { - auto It = Shards.find(File); - assert(It != Shards.end() && "received unknown file"); +llvm::Optional +FileShardedIndex::getShard(llvm::StringRef Uri) const { + auto It = Shards.find(Uri); + if (It == Shards.end()) + return llvm::None; IndexFileIn IF; IF.Sources = It->getValue().IG; @@ -245,27 +222,28 @@ SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, /*IsIndexMainAST=*/false, Version); } -void FileSymbols::update(PathRef Path, std::unique_ptr Symbols, +void FileSymbols::update(llvm::StringRef Key, + std::unique_ptr Symbols, std::unique_ptr Refs, std::unique_ptr Relations, bool CountReferences) { std::lock_guard Lock(Mutex); if (!Symbols) - FileToSymbols.erase(Path); + SymbolsSnapshot.erase(Key); else - FileToSymbols[Path] = std::move(Symbols); + SymbolsSnapshot[Key] = std::move(Symbols); if (!Refs) { - FileToRefs.erase(Path); + RefsSnapshot.erase(Key); } else { RefSlabAndCountReferences Item; Item.CountReferences = CountReferences; Item.Slab = std::move(Refs); - FileToRefs[Path] = std::move(Item); + RefsSnapshot[Key] = std::move(Item); } if (!Relations) - FileToRelations.erase(Path); + RelatiosSnapshot.erase(Key); else - FileToRelations[Path] = std::move(Relations); + RelatiosSnapshot[Key] = std::move(Relations); } std::unique_ptr @@ -276,14 +254,14 @@ FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle) { std::vector MainFileRefs; { std::lock_guard Lock(Mutex); - for (const auto &FileAndSymbols : FileToSymbols) + for (const auto &FileAndSymbols : SymbolsSnapshot) SymbolSlabs.push_back(FileAndSymbols.second); - for (const auto &FileAndRefs : FileToRefs) { + for (const auto &FileAndRefs : RefsSnapshot) { RefSlabs.push_back(FileAndRefs.second.Slab); if (FileAndRefs.second.CountReferences) MainFileRefs.push_back(RefSlabs.back().get()); } - for (const auto &FileAndRelations : FileToRelations) + for (const auto &FileAndRelations : RelatiosSnapshot) RelationSlabs.push_back(FileAndRelations.second); } std::vector AllSymbols; @@ -399,13 +377,16 @@ void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version, IndexFileIn IF; std::tie(IF.Symbols, std::ignore, IF.Relations) = indexHeaderSymbols(Version, AST, std::move(PP), Includes); - FileShardedIndex ShardedIndex(std::move(IF), Path); - for (PathRef File : ShardedIndex.getAllFiles()) { - auto IF = ShardedIndex.getShard(File); + FileShardedIndex ShardedIndex(std::move(IF)); + for (auto Uri : ShardedIndex.getAllSources()) { + auto IF = ShardedIndex.getShard(Uri); + // We are using the key received from ShardedIndex, so it should always + // exist. + assert(IF); PreambleSymbols.update( - File, std::make_unique(std::move(*IF.Symbols)), + Uri, std::make_unique(std::move(*IF->Symbols)), std::make_unique(), - std::make_unique(std::move(*IF.Relations)), + std::make_unique(std::move(*IF->Relations)), /*CountReferences=*/false); } PreambleIndex.reset( diff --git a/clang-tools-extra/clangd/index/FileIndex.h b/clang-tools-extra/clangd/index/FileIndex.h index 539f232a7523b..65a2c305dca55 100644 --- a/clang-tools-extra/clangd/index/FileIndex.h +++ b/clang-tools-extra/clangd/index/FileIndex.h @@ -19,12 +19,12 @@ #include "Index.h" #include "MemIndex.h" #include "Merge.h" -#include "Path.h" #include "index/CanonicalIncludes.h" #include "index/Ref.h" #include "index/Relation.h" #include "index/Serialization.h" #include "index/Symbol.h" +#include "support/Path.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/DenseSet.h" @@ -55,30 +55,30 @@ enum class DuplicateHandling { Merge, }; -/// A container of Symbols from several source files. It can be updated -/// at source-file granularity, replacing all symbols from one file with a new -/// set. +/// A container of slabs associated with a key. It can be updated at key +/// granularity, replacing all slabs belonging to a key with a new set. Keys are +/// usually file paths/uris. /// -/// This implements a snapshot semantics for symbols in a file. Each update to a -/// file will create a new snapshot for all symbols in the file. Snapshots are -/// managed with shared pointers that are shared between this class and the -/// users. For each file, this class only stores a pointer pointing to the -/// newest snapshot, and an outdated snapshot is deleted by the last owner of -/// the snapshot, either this class or the symbol index. +/// This implements snapshot semantics. Each update will create a new snapshot +/// for all slabs of the Key. Snapshots are managed with shared pointers that +/// are shared between this class and the users. For each key, this class only +/// stores a pointer pointing to the newest snapshot, and an outdated snapshot +/// is deleted by the last owner of the snapshot, either this class or the +/// symbol index. /// /// The snapshot semantics keeps critical sections minimal since we only need /// locking when we swap or obtain references to snapshots. class FileSymbols { public: - /// Updates all symbols and refs in a file. - /// If either is nullptr, corresponding data for \p Path will be removed. - /// If CountReferences is true, \p Refs will be used for counting References + /// Updates all slabs associated with the \p Key. + /// If either is nullptr, corresponding data for \p Key will be removed. + /// If CountReferences is true, \p Refs will be used for counting references /// during merging. - void update(PathRef Path, std::unique_ptr Slab, + void update(llvm::StringRef Key, std::unique_ptr Symbols, std::unique_ptr Refs, std::unique_ptr Relations, bool CountReferences); - /// The index keeps the symbols alive. + /// The index keeps the slabs alive. /// Will count Symbol::References based on number of references in the main /// files, while building the index with DuplicateHandling::Merge option. std::unique_ptr @@ -92,12 +92,9 @@ class FileSymbols { }; mutable std::mutex Mutex; - /// Stores the latest symbol snapshots for all active files. - llvm::StringMap> FileToSymbols; - /// Stores the latest ref snapshots for all active files. - llvm::StringMap FileToRefs; - /// Stores the latest relation snapshots for all active files. - llvm::StringMap> FileToRelations; + llvm::StringMap> SymbolsSnapshot; + llvm::StringMap RefsSnapshot; + llvm::StringMap> RelatiosSnapshot; }; /// This manages symbols from files and an in-memory index on all symbols. @@ -159,16 +156,16 @@ SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, struct FileShardedIndex { /// \p HintPath is used to convert file URIs stored in symbols into absolute /// paths. - explicit FileShardedIndex(IndexFileIn Input, PathRef HintPath); + explicit FileShardedIndex(IndexFileIn Input); - /// Returns absolute paths for all files that has a shard. - std::vector getAllFiles() const; + /// Returns uris for all files that has a shard. + std::vector getAllSources() const; - /// Generates index shard for the \p File. Note that this function results in + /// Generates index shard for the \p Uri. Note that this function results in /// a copy of all the relevant data. /// Returned index will always have Symbol/Refs/Relation Slabs set, even if /// they are empty. - IndexFileIn getShard(PathRef File) const; + llvm::Optional getShard(llvm::StringRef Uri) const; private: // Contains all the information that belongs to a single file. @@ -185,7 +182,7 @@ struct FileShardedIndex { // Keeps all the information alive. const IndexFileIn Index; - // Mapping from absolute paths to slab information. + // Mapping from URIs to slab information. llvm::StringMap Shards; // Used to build RefSlabs. llvm::DenseMap RefToSymID; diff --git a/clang-tools-extra/clangd/index/Index.cpp b/clang-tools-extra/clangd/index/Index.cpp index 9ec4908bbbfc4..e4eb1b586cc5e 100644 --- a/clang-tools-extra/clangd/index/Index.cpp +++ b/clang-tools-extra/clangd/index/Index.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "Index.h" -#include "Logger.h" +#include "support/Logger.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" diff --git a/clang-tools-extra/clangd/index/IndexAction.cpp b/clang-tools-extra/clangd/index/IndexAction.cpp index 8fd2159932b46..9f294d4ab9252 100644 --- a/clang-tools-extra/clangd/index/IndexAction.cpp +++ b/clang-tools-extra/clangd/index/IndexAction.cpp @@ -8,9 +8,9 @@ #include "IndexAction.h" #include "Headers.h" -#include "Logger.h" #include "index/Relation.h" #include "index/SymbolOrigin.h" +#include "support/Logger.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/SourceLocation.h" diff --git a/clang-tools-extra/clangd/index/MemIndex.cpp b/clang-tools-extra/clangd/index/MemIndex.cpp index 453b226e79074..46e9c0a8ee45e 100644 --- a/clang-tools-extra/clangd/index/MemIndex.cpp +++ b/clang-tools-extra/clangd/index/MemIndex.cpp @@ -8,9 +8,9 @@ #include "MemIndex.h" #include "FuzzyMatch.h" -#include "Logger.h" #include "Quality.h" -#include "Trace.h" +#include "support/Logger.h" +#include "support/Trace.h" #include "clang/Index/IndexSymbol.h" namespace clang { diff --git a/clang-tools-extra/clangd/index/Merge.cpp b/clang-tools-extra/clangd/index/Merge.cpp index 0cef7dc7630a9..a93aa204e18fb 100644 --- a/clang-tools-extra/clangd/index/Merge.cpp +++ b/clang-tools-extra/clangd/index/Merge.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "Merge.h" -#include "Logger.h" -#include "Trace.h" #include "index/Symbol.h" #include "index/SymbolLocation.h" #include "index/SymbolOrigin.h" +#include "support/Logger.h" +#include "support/Trace.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index 6ba4e046189c9..06527a615c202 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -8,12 +8,12 @@ #include "Serialization.h" #include "Headers.h" -#include "Logger.h" #include "RIFF.h" #include "SymbolLocation.h" #include "SymbolOrigin.h" -#include "Trace.h" #include "dex/Dex.h" +#include "support/Logger.h" +#include "support/Trace.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compression.h" diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index 471061672107d..b502dfb03aec3 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -12,11 +12,11 @@ #include "CodeComplete.h" #include "CodeCompletionStrings.h" #include "ExpectedTypes.h" -#include "Logger.h" #include "SourceCode.h" #include "SymbolLocation.h" #include "URI.h" #include "index/SymbolID.h" +#include "support/Logger.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" @@ -743,7 +743,11 @@ bool SymbolCollector::isSelfContainedHeader(FileID FID) { const FileEntry *FE = SM.getFileEntryForID(FID); if (!FE) return false; - if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE)) + // FIXME: Should files that have been #import'd be considered + // self-contained? That's really a property of the includer, + // not of the file. + if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) && + !PP->getHeaderSearchInfo().hasFileBeenImported(FE)) return false; // This pattern indicates that a header can't be used without // particular preprocessor state, usually set up by another header. diff --git a/clang-tools-extra/clangd/index/YAMLSerialization.cpp b/clang-tools-extra/clangd/index/YAMLSerialization.cpp index fc515a17d1006..4f6bd927cc196 100644 --- a/clang-tools-extra/clangd/index/YAMLSerialization.cpp +++ b/clang-tools-extra/clangd/index/YAMLSerialization.cpp @@ -17,8 +17,8 @@ #include "Serialization.h" #include "SymbolLocation.h" #include "SymbolOrigin.h" -#include "Trace.h" #include "dex/Dex.h" +#include "support/Trace.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" diff --git a/clang-tools-extra/clangd/index/dex/Dex.cpp b/clang-tools-extra/clangd/index/dex/Dex.cpp index 36ddedd04e1fc..a663e5387ece2 100644 --- a/clang-tools-extra/clangd/index/dex/Dex.cpp +++ b/clang-tools-extra/clangd/index/dex/Dex.cpp @@ -9,11 +9,11 @@ #include "Dex.h" #include "FileDistance.h" #include "FuzzyMatch.h" -#include "Logger.h" #include "Quality.h" -#include "Trace.h" #include "index/Index.h" #include "index/dex/Iterator.h" +#include "support/Logger.h" +#include "support/Trace.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/ScopedPrinter.h" #include diff --git a/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp b/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp index 8738f9cd144c6..a784fe47ccb49 100644 --- a/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp +++ b/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp @@ -11,15 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "Features.inc" -#include "SourceCode.h" #include "index/Serialization.h" #include "index/dex/Dex.h" #include "index/remote/Client.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/LineEditor/LineEditor.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Signals.h" diff --git a/clang-tools-extra/clangd/index/remote/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/CMakeLists.txt index 1bd336e626d9c..81287b58afa52 100644 --- a/clang-tools-extra/clangd/index/remote/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/remote/CMakeLists.txt @@ -18,6 +18,10 @@ if (CLANGD_ENABLE_REMOTE) protobuf grpc++ clangDaemon + clangdSupport + + DEPENDS + RemoteIndexProtos ) add_subdirectory(marshalling) diff --git a/clang-tools-extra/clangd/index/remote/Client.cpp b/clang-tools-extra/clangd/index/remote/Client.cpp index 0c5fe142214bc..90eeb5c55ee2c 100644 --- a/clang-tools-extra/clangd/index/remote/Client.cpp +++ b/clang-tools-extra/clangd/index/remote/Client.cpp @@ -10,11 +10,10 @@ #include "Client.h" #include "Index.grpc.pb.h" -#include "Logger.h" -#include "Trace.h" #include "index/Serialization.h" #include "marshalling/Marshalling.h" -#include "llvm/Support/YAMLTraits.h" +#include "support/Logger.h" +#include "support/Trace.h" namespace clang { namespace clangd { diff --git a/clang-tools-extra/clangd/index/remote/marshalling/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/marshalling/CMakeLists.txt index 4789a728951c0..75e57edd45d94 100644 --- a/clang-tools-extra/clangd/index/remote/marshalling/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/remote/marshalling/CMakeLists.txt @@ -6,4 +6,8 @@ add_clang_library(clangdRemoteMarshalling protobuf clangDaemon + clangdSupport + + DEPENDS + RemoteIndexProtos ) diff --git a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp index d2ec07136fa50..60a258a6db2e9 100644 --- a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp +++ b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "Marshalling.h" -#include "Logger.h" #include "index/Serialization.h" +#include "support/Logger.h" namespace clang { namespace clangd { diff --git a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.h b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.h index ae58318e3dbb1..1bc25bf5a9de5 100644 --- a/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.h +++ b/clang-tools-extra/clangd/index/remote/marshalling/Marshalling.h @@ -13,7 +13,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_REMOTE_MARSHALLING_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_REMOTE_MARSHALLING_H -#include "Index.grpc.pb.h" +#include "Index.pb.h" #include "index/Index.h" #include "llvm/Support/StringSaver.h" diff --git a/clang-tools-extra/clangd/index/remote/server/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/server/CMakeLists.txt index 378ea2946839d..79513b8a86dba 100644 --- a/clang-tools-extra/clangd/index/remote/server/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/remote/server/CMakeLists.txt @@ -1,18 +1,19 @@ set(LLVM_LINK_COMPONENTS - LineEditor Support ) add_clang_executable(clangd-index-server Server.cpp + + DEPENDS + RemoteIndexProtos ) -target_compile_definitions(clangd-index-server PRIVATE -D GOOGLE_PROTOBUF_NO_RTTI=1) -clang_target_link_libraries(clangd-index-server - PRIVATE - clangDaemon - ) + target_link_libraries(clangd-index-server PRIVATE - RemoteIndexProtos + clangDaemon + RemoteIndexProtos clangdRemoteMarshalling + + grpc++ ) diff --git a/clang-tools-extra/clangd/index/remote/server/Server.cpp b/clang-tools-extra/clangd/index/remote/server/Server.cpp index ca35ff7715f04..1cd620366bb47 100644 --- a/clang-tools-extra/clangd/index/remote/server/Server.cpp +++ b/clang-tools-extra/clangd/index/remote/server/Server.cpp @@ -9,12 +9,8 @@ #include "index/Index.h" #include "index/Serialization.h" #include "index/remote/marshalling/Marshalling.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" -#include "llvm/LineEditor/LineEditor.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" #include "llvm/Support/Signals.h" #include diff --git a/clang-tools-extra/clangd/index/remote/unimplemented/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/unimplemented/CMakeLists.txt index aebe4e674ab7e..5d223ad3c8b35 100644 --- a/clang-tools-extra/clangd/index/remote/unimplemented/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/remote/unimplemented/CMakeLists.txt @@ -7,4 +7,5 @@ add_clang_library(clangdRemoteIndex LINK_LIBS clangDaemon + clangdSupport ) diff --git a/clang-tools-extra/clangd/index/remote/unimplemented/UnimplementedClient.cpp b/clang-tools-extra/clangd/index/remote/unimplemented/UnimplementedClient.cpp index 725f0e472c3a6..f0fb612f6332f 100644 --- a/clang-tools-extra/clangd/index/remote/unimplemented/UnimplementedClient.cpp +++ b/clang-tools-extra/clangd/index/remote/unimplemented/UnimplementedClient.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "Logger.h" #include "index/remote/Client.h" +#include "support/Logger.h" namespace clang { namespace clangd { diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index 0585897947dfc..c08f6ea805aaa 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -9,12 +9,12 @@ #include "refactor/Rename.h" #include "AST.h" #include "FindTarget.h" -#include "Logger.h" #include "ParsedAST.h" #include "Selection.h" #include "SourceCode.h" -#include "Trace.h" #include "index/SymbolCollector.h" +#include "support/Logger.h" +#include "support/Trace.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclTemplate.h" #include "clang/Basic/SourceLocation.h" diff --git a/clang-tools-extra/clangd/refactor/Rename.h b/clang-tools-extra/clangd/refactor/Rename.h index 5c53ba633fa2b..e27951605ebca 100644 --- a/clang-tools-extra/clangd/refactor/Rename.h +++ b/clang-tools-extra/clangd/refactor/Rename.h @@ -9,9 +9,9 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_REFACTOR_RENAME_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_REFACTOR_RENAME_H -#include "Path.h" #include "Protocol.h" #include "SourceCode.h" +#include "support/Path.h" #include "clang/Basic/LangOptions.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/Support/Error.h" diff --git a/clang-tools-extra/clangd/refactor/Tweak.cpp b/clang-tools-extra/clangd/refactor/Tweak.cpp index 3e3033ce5c7a5..b1f4dcd69af6b 100644 --- a/clang-tools-extra/clangd/refactor/Tweak.cpp +++ b/clang-tools-extra/clangd/refactor/Tweak.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// #include "Tweak.h" -#include "Logger.h" -#include "Path.h" #include "SourceCode.h" #include "index/Index.h" +#include "support/Logger.h" +#include "support/Path.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" diff --git a/clang-tools-extra/clangd/refactor/Tweak.h b/clang-tools-extra/clangd/refactor/Tweak.h index 84d3bdb3fc0a0..10e3e8d3e5653 100644 --- a/clang-tools-extra/clangd/refactor/Tweak.h +++ b/clang-tools-extra/clangd/refactor/Tweak.h @@ -20,11 +20,11 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_REFACTOR_ACTIONS_TWEAK_H #include "ParsedAST.h" -#include "Path.h" #include "Protocol.h" #include "Selection.h" #include "SourceCode.h" #include "index/Index.h" +#include "support/Path.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" diff --git a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp index f2ef235c43f79..836ac9ac57abf 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp @@ -8,8 +8,8 @@ #include "AST.h" #include "FindTarget.h" -#include "Logger.h" #include "refactor/Tweak.h" +#include "support/Logger.h" #include "clang/AST/Decl.h" #include "clang/AST/RecursiveASTVisitor.h" diff --git a/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt b/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt index 995288bca2cf4..c5bff2f34a9a2 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt +++ b/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt @@ -30,6 +30,7 @@ add_clang_library(clangDaemonTweaks OBJECT clangAST clangBasic clangDaemon + clangdSupport clangFormat clangLex clangToolingCore diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp index b1057d8b80e78..e28a2c46c374a 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp @@ -8,11 +8,11 @@ #include "AST.h" #include "FindTarget.h" -#include "Logger.h" #include "Selection.h" #include "SourceCode.h" #include "XRefs.h" #include "refactor/Tweak.h" +#include "support/Logger.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Decl.h" diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp index c2d344a3a46ef..405ff90a5945c 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp @@ -9,12 +9,12 @@ #include "AST.h" #include "FindTarget.h" #include "HeaderSourceSwitch.h" -#include "Logger.h" #include "ParsedAST.h" -#include "Path.h" #include "Selection.h" #include "SourceCode.h" #include "refactor/Tweak.h" +#include "support/Logger.h" +#include "support/Path.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp index bffd6c02fb6e9..d2dfc4a537d4a 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp @@ -7,20 +7,20 @@ //===----------------------------------------------------------------------===// #include "refactor/Tweak.h" -#include "Logger.h" +#include "XRefs.h" +#include "support/Logger.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" +#include #include #include #include -#include -#include "XRefs.h" -#include "llvm/ADT/StringExtras.h" namespace clang { namespace clangd { diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp index a22b5ab60cd52..dd62670646c41 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp @@ -47,11 +47,11 @@ //===----------------------------------------------------------------------===// #include "AST.h" -#include "Logger.h" #include "ParsedAST.h" #include "Selection.h" #include "SourceCode.h" #include "refactor/Tweak.h" +#include "support/Logger.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclTemplate.h" diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp index cf38227c29d7e..69bfe67314185 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp @@ -5,12 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "Logger.h" #include "ParsedAST.h" #include "Protocol.h" #include "Selection.h" #include "SourceCode.h" #include "refactor/Tweak.h" +#include "support/Logger.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" diff --git a/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp b/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp index 62a306c9316d5..2534cf562daa8 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "Logger.h" #include "ParsedAST.h" #include "SourceCode.h" #include "refactor/Tweak.h" +#include "support/Logger.h" #include "clang/AST/ExprObjC.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" diff --git a/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp b/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp index c5f480cacd760..bec45be6c3254 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp @@ -5,10 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "Logger.h" #include "ParsedAST.h" #include "SourceCode.h" #include "refactor/Tweak.h" +#include "support/Logger.h" #include "clang/AST/ASTContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" diff --git a/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp b/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp index 1d57cd5c71320..2422743019a13 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp @@ -5,10 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "Logger.h" #include "ParsedAST.h" #include "SourceCode.h" #include "refactor/Tweak.h" +#include "support/Logger.h" #include "clang/AST/ASTContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" diff --git a/clang-tools-extra/clangd/support/CMakeLists.txt b/clang-tools-extra/clangd/support/CMakeLists.txt new file mode 100644 index 0000000000000..04a897e670372 --- /dev/null +++ b/clang-tools-extra/clangd/support/CMakeLists.txt @@ -0,0 +1,32 @@ +# clangd/support contains low-level support libraries that do not depend +# on clang either programmatically or conceptually. + +set(LLVM_LINK_COMPONENTS + Support + ) + +if(CLANG_BUILT_STANDALONE) + # needed to get HAVE_CXX_ATOMICS64_WITHOUT_LIB defined + include(CheckAtomic) +endif() + +set(CLANGD_ATOMIC_LIB "") +if(NOT HAVE_CXX_ATOMICS_WITHOUT_LIB OR NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB) + list(APPEND CLANGD_ATOMIC_LIB "atomic") +endif() + +include_directories(..) +add_clang_library(clangdSupport + Cancellation.cpp + Context.cpp + FSProvider.cpp + Logger.cpp + Markup.cpp + Shutdown.cpp + Threading.cpp + Trace.cpp + + LINK_LIBS + ${LLVM_PTHREAD_LIB} + ${CLANGD_ATOMIC_LIB} + ) diff --git a/clang-tools-extra/clangd/Cancellation.cpp b/clang-tools-extra/clangd/support/Cancellation.cpp similarity index 97% rename from clang-tools-extra/clangd/Cancellation.cpp rename to clang-tools-extra/clangd/support/Cancellation.cpp index 2120e700e7198..d544c17e279c7 100644 --- a/clang-tools-extra/clangd/Cancellation.cpp +++ b/clang-tools-extra/clangd/support/Cancellation.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "Cancellation.h" +#include "support/Cancellation.h" #include namespace clang { diff --git a/clang-tools-extra/clangd/Cancellation.h b/clang-tools-extra/clangd/support/Cancellation.h similarity index 96% rename from clang-tools-extra/clangd/Cancellation.h rename to clang-tools-extra/clangd/support/Cancellation.h index 0bee6f355c39a..f33b0b5c67635 100644 --- a/clang-tools-extra/clangd/Cancellation.h +++ b/clang-tools-extra/clangd/support/Cancellation.h @@ -54,10 +54,10 @@ // Measuring the start -> cancel -> acknowledge -> finish timeline would // help find where libraries' cancellation should be improved. -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_CANCELLATION_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_CANCELLATION_H +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_CANCELLATION_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_CANCELLATION_H -#include "Context.h" +#include "support/Context.h" #include "llvm/Support/Error.h" #include #include diff --git a/clang-tools-extra/clangd/Context.cpp b/clang-tools-extra/clangd/support/Context.cpp similarity index 97% rename from clang-tools-extra/clangd/Context.cpp rename to clang-tools-extra/clangd/support/Context.cpp index 8e8bba6e97a86..266dd003f8f6e 100644 --- a/clang-tools-extra/clangd/Context.cpp +++ b/clang-tools-extra/clangd/support/Context.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "Context.h" +#include "support/Context.h" #include namespace clang { diff --git a/clang-tools-extra/clangd/Context.h b/clang-tools-extra/clangd/support/Context.h similarity index 93% rename from clang-tools-extra/clangd/Context.h rename to clang-tools-extra/clangd/support/Context.h index 71aeaba95254d..894032bdd8838 100644 --- a/clang-tools-extra/clangd/Context.h +++ b/clang-tools-extra/clangd/support/Context.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_CONTEXT_H_ -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_CONTEXT_H_ +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_CONTEXT_H_ +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_CONTEXT_H_ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" @@ -120,20 +120,20 @@ class Context { template Context derive(const Key &Key, typename std::decay::type Value) const & { - return Context(std::make_shared(Data{ - /*Parent=*/DataPtr, &Key, - std::make_unique::type>>( - std::move(Value))})); + return Context(std::make_shared( + Data{/*Parent=*/DataPtr, &Key, + std::make_unique::type>>( + std::move(Value))})); } template Context derive(const Key &Key, typename std::decay::type Value) && /* takes ownership */ { - return Context(std::make_shared(Data{ - /*Parent=*/std::move(DataPtr), &Key, - std::make_unique::type>>( - std::move(Value))})); + return Context(std::make_shared( + Data{/*Parent=*/std::move(DataPtr), &Key, + std::make_unique::type>>( + std::move(Value))})); } /// Derives a child context, using an anonymous key. @@ -219,4 +219,4 @@ class LLVM_NODISCARD WithContextValue { } // namespace clangd } // namespace clang -#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_CONTEXT_H_ +#endif diff --git a/clang-tools-extra/clangd/FSProvider.cpp b/clang-tools-extra/clangd/support/FSProvider.cpp similarity index 98% rename from clang-tools-extra/clangd/FSProvider.cpp rename to clang-tools-extra/clangd/support/FSProvider.cpp index 80d6be005cc41..6474a3c4a3650 100644 --- a/clang-tools-extra/clangd/FSProvider.cpp +++ b/clang-tools-extra/clangd/support/FSProvider.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "FSProvider.h" +#include "support/FSProvider.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" diff --git a/clang-tools-extra/clangd/FSProvider.h b/clang-tools-extra/clangd/support/FSProvider.h similarity index 91% rename from clang-tools-extra/clangd/FSProvider.h rename to clang-tools-extra/clangd/support/FSProvider.h index 1fe3ba864a33d..a53a4149a47b8 100644 --- a/clang-tools-extra/clangd/FSProvider.h +++ b/clang-tools-extra/clangd/support/FSProvider.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FSPROVIDER_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FSPROVIDER_H +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FSPROVIDER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FSPROVIDER_H #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/Support/VirtualFileSystem.h" diff --git a/clang-tools-extra/clangd/Function.h b/clang-tools-extra/clangd/support/Function.h similarity index 96% rename from clang-tools-extra/clangd/Function.h rename to clang-tools-extra/clangd/support/Function.h index cfcb4a550a765..2cac1b1e7f67e 100644 --- a/clang-tools-extra/clangd/Function.h +++ b/clang-tools-extra/clangd/support/Function.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FUNCTION_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FUNCTION_H +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FUNCTION_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_FUNCTION_H #include "llvm/ADT/FunctionExtras.h" #include "llvm/Support/Error.h" diff --git a/clang-tools-extra/clangd/Logger.cpp b/clang-tools-extra/clangd/support/Logger.cpp similarity index 96% rename from clang-tools-extra/clangd/Logger.cpp rename to clang-tools-extra/clangd/support/Logger.cpp index 7b8c9a3e6ffc0..768d2e52210b2 100644 --- a/clang-tools-extra/clangd/Logger.cpp +++ b/clang-tools-extra/clangd/support/Logger.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "Logger.h" -#include "Trace.h" +#include "support/Logger.h" +#include "support/Trace.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" diff --git a/clang-tools-extra/clangd/Logger.h b/clang-tools-extra/clangd/support/Logger.h similarity index 97% rename from clang-tools-extra/clangd/Logger.h rename to clang-tools-extra/clangd/support/Logger.h index 7a5d5140d4824..72d1408bdc77c 100644 --- a/clang-tools-extra/clangd/Logger.h +++ b/clang-tools-extra/clangd/support/Logger.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_LOGGER_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_LOGGER_H +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_LOGGER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_LOGGER_H #include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" diff --git a/clang-tools-extra/clangd/FormattedString.cpp b/clang-tools-extra/clangd/support/Markup.cpp similarity index 83% rename from clang-tools-extra/clangd/FormattedString.cpp rename to clang-tools-extra/clangd/support/Markup.cpp index d3dbdbba17bc3..9dffdf675d3ee 100644 --- a/clang-tools-extra/clangd/FormattedString.cpp +++ b/clang-tools-extra/clangd/support/Markup.cpp @@ -1,12 +1,11 @@ -//===--- FormattedString.cpp --------------------------------*- C++-*------===// +//===--- Markup.cpp -----------------------------------------*- C++-*------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "FormattedString.h" -#include "clang/Basic/CharInfo.h" +#include "support/Markup.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -25,7 +24,6 @@ namespace clang { namespace clangd { namespace markup { - namespace { // Is ' || Contents.startswith("/>")) return true; // May close the tag. @@ -70,7 +68,7 @@ bool looksLikeTag(llvm::StringRef Contents) { // a markdown grammar construct. bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After, bool StartsLine) { - assert(Before.take_while(isWhitespace).empty()); + assert(Before.take_while(llvm::isSpace).empty()); auto RulerLength = [&]() -> /*Length*/ unsigned { if (!StartsLine || !Before.empty()) return false; @@ -82,8 +80,8 @@ bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After, (After.empty() || After.startswith(" ")); }; auto SpaceSurrounds = [&]() { - return (After.empty() || isWhitespace(After.front())) && - (Before.empty() || isWhitespace(Before.back())); + return (After.empty() || llvm::isSpace(After.front())) && + (Before.empty() || llvm::isSpace(Before.back())); }; auto WordSurrounds = [&]() { return (!After.empty() && llvm::isAlnum(After.front())) && @@ -216,23 +214,10 @@ std::string getMarkerForCodeBlock(llvm::StringRef Input) { } // Trims the input and concatenates whitespace blocks into a single ` `. -std::string canonicalizeSpaces(std::string Input) { - // Goes over the string and preserves only a single ` ` for any whitespace - // chunks, the rest is moved to the end of the string and dropped in the end. - auto WritePtr = Input.begin(); +std::string canonicalizeSpaces(llvm::StringRef Input) { llvm::SmallVector Words; llvm::SplitString(Input, Words); - if (Words.empty()) - return ""; - // Go over each word and add it to the string. - for (llvm::StringRef Word : Words) { - if (WritePtr > Input.begin()) - *WritePtr++ = ' '; // Separate from previous block. - llvm::for_each(Word, [&WritePtr](const char C) { *WritePtr++ = C; }); - } - // Get rid of extra spaces. - Input.resize(WritePtr - Input.begin()); - return Input; + return llvm::join(Words, " "); } std::string renderBlocks(llvm::ArrayRef> Children, @@ -284,6 +269,9 @@ class Ruler : public Block { OS << "\n---\n"; } void renderPlainText(llvm::raw_ostream &OS) const override { OS << '\n'; } + std::unique_ptr clone() const override { + return std::make_unique(*this); + } bool isRuler() const override { return true; } }; @@ -300,6 +288,10 @@ class CodeBlock : public Block { OS << '\n' << Contents << "\n\n"; } + std::unique_ptr clone() const override { + return std::make_unique(*this); + } + CodeBlock(std::string Contents, std::string Language) : Contents(std::move(Contents)), Language(std::move(Language)) {} @@ -352,18 +344,21 @@ std::string Block::asPlainText() const { } void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const { - llvm::StringRef Sep = ""; + bool NeedsSpace = false; + bool HasChunks = false; for (auto &C : Chunks) { - OS << Sep; + if (C.SpaceBefore || NeedsSpace) + OS << " "; switch (C.Kind) { case Chunk::PlainText: - OS << renderText(C.Contents, Sep.empty()); + OS << renderText(C.Contents, !HasChunks); break; case Chunk::InlineCode: OS << renderInlineBlock(C.Contents); break; } - Sep = " "; + HasChunks = true; + NeedsSpace = C.SpaceAfter; } // Paragraphs are translated into markdown lines, not markdown paragraphs. // Therefore it only has a single linebreak afterwards. @@ -371,11 +366,31 @@ void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const { OS << " \n"; } +std::unique_ptr Paragraph::clone() const { + return std::make_unique(*this); +} + +/// Choose a marker to delimit `Text` from a prioritized list of options. +/// This is more readable than escaping for plain-text. +llvm::StringRef chooseMarker(llvm::ArrayRef Options, + llvm::StringRef Text) { + // Prefer a delimiter whose characters don't appear in the text. + for (llvm::StringRef S : Options) + if (Text.find_first_of(S) == llvm::StringRef::npos) + return S; + return Options.front(); +} + void Paragraph::renderPlainText(llvm::raw_ostream &OS) const { - llvm::StringRef Sep = ""; + bool NeedsSpace = false; for (auto &C : Chunks) { - OS << Sep << C.Contents; - Sep = " "; + if (C.SpaceBefore || NeedsSpace) + OS << " "; + llvm::StringRef Marker = ""; + if (C.Preserve && C.Kind == Chunk::InlineCode) + Marker = chooseMarker({"`", "'", "\""}, C.Contents); + OS << Marker << C.Contents << Marker; + NeedsSpace = C.SpaceAfter; } OS << '\n'; } @@ -398,33 +413,62 @@ void BulletList::renderPlainText(llvm::raw_ostream &OS) const { } } -Paragraph &Paragraph::appendText(std::string Text) { - Text = canonicalizeSpaces(std::move(Text)); - if (Text.empty()) +Paragraph &Paragraph::appendSpace() { + if (!Chunks.empty()) + Chunks.back().SpaceAfter = true; + return *this; +} + +Paragraph &Paragraph::appendText(llvm::StringRef Text) { + std::string Norm = canonicalizeSpaces(Text); + if (Norm.empty()) return *this; Chunks.emplace_back(); Chunk &C = Chunks.back(); - C.Contents = std::move(Text); + C.Contents = std::move(Norm); C.Kind = Chunk::PlainText; + C.SpaceBefore = llvm::isSpace(Text.front()); + C.SpaceAfter = llvm::isSpace(Text.back()); return *this; } -Paragraph &Paragraph::appendCode(std::string Code) { - Code = canonicalizeSpaces(std::move(Code)); - if (Code.empty()) +Paragraph &Paragraph::appendCode(llvm::StringRef Code, bool Preserve) { + bool AdjacentCode = + !Chunks.empty() && Chunks.back().Kind == Chunk::InlineCode; + std::string Norm = canonicalizeSpaces(std::move(Code)); + if (Norm.empty()) return *this; Chunks.emplace_back(); Chunk &C = Chunks.back(); - C.Contents = std::move(Code); + C.Contents = std::move(Norm); C.Kind = Chunk::InlineCode; + C.Preserve = Preserve; + // Disallow adjacent code spans without spaces, markdown can't render them. + C.SpaceBefore = AdjacentCode; return *this; } +std::unique_ptr BulletList::clone() const { + return std::make_unique(*this); +} + class Document &BulletList::addItem() { Items.emplace_back(); return Items.back(); } +Document &Document::operator=(const Document &Other) { + Children.clear(); + for (const auto &C : Other.Children) + Children.push_back(C->clone()); + return *this; +} + +void Document::append(Document Other) { + std::move(Other.Children.begin(), Other.Children.end(), + std::back_inserter(Children)); +} + Paragraph &Document::addParagraph() { Children.push_back(std::make_unique()); return *static_cast(Children.back().get()); diff --git a/clang-tools-extra/clangd/FormattedString.h b/clang-tools-extra/clangd/support/Markup.h similarity index 70% rename from clang-tools-extra/clangd/FormattedString.h rename to clang-tools-extra/clangd/support/Markup.h index fe864ba575681..560071031e66a 100644 --- a/clang-tools-extra/clangd/FormattedString.h +++ b/clang-tools-extra/clangd/support/Markup.h @@ -1,4 +1,4 @@ -//===--- FormattedString.h ----------------------------------*- C++-*------===// +//===--- Markup.h -------------------------------------------*- C++-*------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,12 +6,11 @@ // //===----------------------------------------------------------------------===// // -// A simple intermediate representation of formatted text that could be -// converted to plaintext or markdown. +// A model of formatted text that can be rendered to plaintext or markdown. // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_MARKUP_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_MARKUP_H #include "llvm/Support/raw_ostream.h" #include @@ -30,6 +29,7 @@ class Block { public: virtual void renderMarkdown(llvm::raw_ostream &OS) const = 0; virtual void renderPlainText(llvm::raw_ostream &OS) const = 0; + virtual std::unique_ptr clone() const = 0; std::string asMarkdown() const; std::string asPlainText() const; @@ -44,12 +44,18 @@ class Paragraph : public Block { public: void renderMarkdown(llvm::raw_ostream &OS) const override; void renderPlainText(llvm::raw_ostream &OS) const override; + std::unique_ptr clone() const override; /// Append plain text to the end of the string. - Paragraph &appendText(std::string Text); + Paragraph &appendText(llvm::StringRef Text); /// Append inline code, this translates to the ` block in markdown. - Paragraph &appendCode(std::string Code); + /// \p Preserve indicates the code span must be apparent even in plaintext. + Paragraph &appendCode(llvm::StringRef Code, bool Preserve = false); + + /// Ensure there is space between the surrounding chunks. + /// Has no effect at the beginning or end of a paragraph. + Paragraph &appendSpace(); private: struct Chunk { @@ -57,9 +63,14 @@ class Paragraph : public Block { PlainText, InlineCode, } Kind = PlainText; + // Preserve chunk markers in plaintext. + bool Preserve = false; std::string Contents; - /// Language for code block chunks. Ignored for other chunks. - std::string Language; + // Whether this chunk should be surrounded by whitespace. + // Consecutive SpaceAfter and SpaceBefore will be collapsed into one space. + // Code spans don't usually set this: their spaces belong "inside" the span. + bool SpaceBefore = false; + bool SpaceAfter = false; }; std::vector Chunks; }; @@ -70,6 +81,7 @@ class BulletList : public Block { public: void renderMarkdown(llvm::raw_ostream &OS) const override; void renderPlainText(llvm::raw_ostream &OS) const override; + std::unique_ptr clone() const override; class Document &addItem(); @@ -81,6 +93,14 @@ class BulletList : public Block { /// markdown and plaintext. class Document { public: + Document() = default; + Document(const Document &Other) { *this = Other; } + Document &operator=(const Document &); + Document(Document &&) = default; + Document &operator=(Document &&) = default; + + void append(Document Other); + /// Adds a semantical block that will be separate from others. Paragraph &addParagraph(); /// Inserts a horizontal separator to the document. diff --git a/clang-tools-extra/clangd/Path.h b/clang-tools-extra/clangd/support/Path.h similarity index 87% rename from clang-tools-extra/clangd/Path.h rename to clang-tools-extra/clangd/support/Path.h index eaa7224504250..4d4ad7f490473 100644 --- a/clang-tools-extra/clangd/Path.h +++ b/clang-tools-extra/clangd/support/Path.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATH_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATH_H +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_PATH_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_PATH_H #include "llvm/ADT/StringRef.h" #include diff --git a/clang-tools-extra/clangd/Shutdown.cpp b/clang-tools-extra/clangd/support/Shutdown.cpp similarity index 91% rename from clang-tools-extra/clangd/Shutdown.cpp rename to clang-tools-extra/clangd/support/Shutdown.cpp index 36d977570a4fe..7ce01ee171e87 100644 --- a/clang-tools-extra/clangd/Shutdown.cpp +++ b/clang-tools-extra/clangd/support/Shutdown.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "Shutdown.h" +#include "support/Shutdown.h" #include #include @@ -31,10 +31,7 @@ void requestShutdown() { std::abort(); } -bool shutdownRequested() { - return ShutdownRequested; -} +bool shutdownRequested() { return ShutdownRequested; } } // namespace clangd } // namespace clang - diff --git a/clang-tools-extra/clangd/Shutdown.h b/clang-tools-extra/clangd/support/Shutdown.h similarity index 96% rename from clang-tools-extra/clangd/Shutdown.h rename to clang-tools-extra/clangd/support/Shutdown.h index 94ada3171f319..896e1521fe6a1 100644 --- a/clang-tools-extra/clangd/Shutdown.h +++ b/clang-tools-extra/clangd/support/Shutdown.h @@ -40,8 +40,8 @@ // returns an error, etc. // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SHUTDOWN_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SHUTDOWN_H +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_SHUTDOWN_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_SHUTDOWN_H #include #include diff --git a/clang-tools-extra/clangd/Threading.cpp b/clang-tools-extra/clangd/support/Threading.cpp similarity index 93% rename from clang-tools-extra/clangd/Threading.cpp rename to clang-tools-extra/clangd/support/Threading.cpp index a651b70f02a6f..5f95888ae3e2d 100644 --- a/clang-tools-extra/clangd/Threading.cpp +++ b/clang-tools-extra/clangd/support/Threading.cpp @@ -1,6 +1,5 @@ -#include "Threading.h" -#include "Trace.h" -#include "clang/Basic/Stack.h" +#include "support/Threading.h" +#include "support/Trace.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Threading.h" @@ -10,7 +9,7 @@ #include #elif defined(__APPLE__) #include -#elif defined (_WIN32) +#elif defined(_WIN32) #include #endif @@ -96,7 +95,8 @@ void AsyncTaskRunner::runAsync(const llvm::Twine &Name, }; // Ensure our worker threads have big enough stacks to run clang. - llvm::llvm_execute_on_thread_async(std::move(Task), clang::DesiredStackSize); + llvm::llvm_execute_on_thread_async(std::move(Task), + /*clang::DesiredStackSize*/ 8 << 20); } Deadline timeoutSeconds(llvm::Optional Seconds) { diff --git a/clang-tools-extra/clangd/Threading.h b/clang-tools-extra/clangd/support/Threading.h similarity index 96% rename from clang-tools-extra/clangd/Threading.h rename to clang-tools-extra/clangd/support/Threading.h index 878069f3b9ee8..310dd7bc5a246 100644 --- a/clang-tools-extra/clangd/Threading.h +++ b/clang-tools-extra/clangd/support/Threading.h @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_THREADING_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_THREADING_H +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_THREADING_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_THREADING_H -#include "Context.h" +#include "support/Context.h" #include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/Twine.h" #include diff --git a/clang-tools-extra/clangd/Trace.cpp b/clang-tools-extra/clangd/support/Trace.cpp similarity index 84% rename from clang-tools-extra/clangd/Trace.cpp rename to clang-tools-extra/clangd/support/Trace.cpp index 5bcbf221c4351..6bf4816268e5c 100644 --- a/clang-tools-extra/clangd/Trace.cpp +++ b/clang-tools-extra/clangd/support/Trace.cpp @@ -6,15 +6,19 @@ // //===----------------------------------------------------------------------===// -#include "Trace.h" -#include "Context.h" +#include "support/Trace.h" +#include "support/Context.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/FormatProviders.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Threading.h" #include +#include +#include #include namespace clang { @@ -149,10 +153,10 @@ class JSONTracer : public EventTracer { void rawEvent(llvm::StringRef Phase, const llvm::json::Object &Event) /*REQUIRES(Mu)*/ { // PID 0 represents the clangd process. - Out.object([&]{ + Out.object([&] { Out.attribute("pid", 0); Out.attribute("ph", Phase); - for (const auto& KV : Event) + for (const auto &KV : Event) Out.attribute(KV.first, KV.second); }); } @@ -209,27 +213,54 @@ void log(const llvm::Twine &Message) { } // Returned context owns Args. -static Context makeSpanContext(llvm::Twine Name, llvm::json::Object *Args) { +static Context makeSpanContext(llvm::Twine Name, llvm::json::Object *Args, + const Metric &LatencyMetric) { if (!T) return Context::current().clone(); WithContextValue WithArgs{std::unique_ptr(Args)}; + llvm::Optional WithLatency; + using Clock = std::chrono::high_resolution_clock; + WithLatency.emplace(llvm::make_scope_exit( + [StartTime = Clock::now(), Name = Name.str(), &LatencyMetric] { + LatencyMetric.record( + std::chrono::duration_cast(Clock::now() - + StartTime) + .count(), + Name); + })); return T->beginSpan(Name.isSingleStringRef() ? Name.getSingleStringRef() : llvm::StringRef(Name.str()), Args); } +// Fallback metric that measures latencies for spans without an explicit latency +// metric. Labels are span names. +constexpr Metric SpanLatency("span_latency", Metric::Distribution, "span_name"); + // Span keeps a non-owning pointer to the args, which is how users access them. // The args are owned by the context though. They stick around until the // beginSpan() context is destroyed, when the tracing engine will consume them. -Span::Span(llvm::Twine Name) +Span::Span(llvm::Twine Name) : Span(Name, SpanLatency) {} +Span::Span(llvm::Twine Name, const Metric &LatencyMetric) : Args(T ? new llvm::json::Object() : nullptr), - RestoreCtx(makeSpanContext(Name, Args)) {} + RestoreCtx(makeSpanContext(Name, Args, LatencyMetric)) {} Span::~Span() { if (T) T->endSpan(); } +void Metric::record(double Value, llvm::StringRef Label) const { + if (!T) + return; + assert((LabelName.empty() == Label.empty()) && + "recording a measurement with inconsistent labeling"); + T->record(*this, Value, Label); +} + +Context EventTracer::beginSpan(llvm::StringRef Name, llvm::json::Object *Args) { + return Context::current().clone(); +} } // namespace trace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/Trace.h b/clang-tools-extra/clangd/support/Trace.h similarity index 61% rename from clang-tools-extra/clangd/Trace.h rename to clang-tools-extra/clangd/support/Trace.h index 55dc0ef0580c1..90a11bb1feb48 100644 --- a/clang-tools-extra/clangd/Trace.h +++ b/clang-tools-extra/clangd/support/Trace.h @@ -14,19 +14,62 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_TRACE_H_ -#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_TRACE_H_ +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_TRACE_H_ +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_TRACE_H_ -#include "Context.h" +#include "support/Context.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include namespace clang { namespace clangd { namespace trace { -/// A consumer of trace events. The events are produced by Spans and trace::log. +/// Represents measurements of clangd events, e.g. operation latency. Those +/// measurements are recorded per-label, defaulting to an empty one for metrics +/// that don't care about it. This enables aggregation of measurements across +/// labels. For example a metric tracking accesses to a cache can have labels +/// named hit and miss. +struct Metric { + enum MetricType { + /// A number whose value is meaningful, and may vary over time. + /// Each measurement replaces the current value. + Value, + + /// An aggregate number whose rate of change over time is meaningful. + /// Each measurement is an increment for the counter. + Counter, + + /// A distribution of values with a meaningful mean and count. + /// Each measured value is a sample for the distribution. + /// The distribution is assumed not to vary, samples are aggregated over + /// time. + Distribution, + }; + constexpr Metric(llvm::StringLiteral Name, MetricType Type, + llvm::StringLiteral LabelName = llvm::StringLiteral("")) + : Name(Name), Type(Type), LabelName(LabelName) {} + + /// Records a measurement for this metric to active tracer. + void record(double Value, llvm::StringRef Label = "") const; + + /// Uniquely identifies the metric. Should use snake_case identifiers, can use + /// dots for hierarchy if needed. e.g. method_latency, foo.bar. + const llvm::StringLiteral Name; + const MetricType Type; + /// Indicates what measurement labels represent, e.g. "operation_name" for a + /// metric tracking latencies. If non empty all measurements must also have a + /// non-empty label. + const llvm::StringLiteral LabelName; +}; + +/// A consumer of trace events and measurements. The events are produced by +/// Spans and trace::log, the measurements are produced by Metrics::record. /// Implementations of this interface must be thread-safe. class EventTracer { public: @@ -37,16 +80,20 @@ class EventTracer { /// Usually implementations will store an object in the returned context /// whose destructor records the end of the event. /// The args are *Args, only complete when the event ends. - virtual Context beginSpan(llvm::StringRef Name, llvm::json::Object *Args) = 0; + virtual Context beginSpan(llvm::StringRef Name, llvm::json::Object *Args); // Called when a Span is destroyed (it may still be active on other threads). // beginSpan() and endSpan() will always form a proper stack on each thread. // The Context returned by beginSpan is active, but Args is not ready. // Tracers should not override this unless they need to observe strict // per-thread nesting. Instead they should observe context destruction. - virtual void endSpan(){}; + virtual void endSpan() {} /// Called for instant events. - virtual void instant(llvm::StringRef Name, llvm::json::Object &&Args) = 0; + virtual void instant(llvm::StringRef Name, llvm::json::Object &&Args) {} + + /// Called whenever a metrics records a measurement. + virtual void record(const Metric &Metric, double Value, + llvm::StringRef Label) {} }; /// Sets up a global EventTracer that consumes events produced by Span and @@ -81,6 +128,9 @@ void log(const llvm::Twine &Name); class Span { public: Span(llvm::Twine Name); + /// Records span's duration in seconds to \p LatencyMetric with \p Name as the + /// label. + Span(llvm::Twine Name, const Metric &LatencyMetric); ~Span(); /// Mutable metadata, if this span is interested. @@ -104,4 +154,4 @@ class Span { } // namespace clangd } // namespace clang -#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_TRACE_H_ +#endif diff --git a/clang-tools-extra/clangd/tool/CMakeLists.txt b/clang-tools-extra/clangd/tool/CMakeLists.txt index b81f90a23b787..3368013f50798 100644 --- a/clang-tools-extra/clangd/tool/CMakeLists.txt +++ b/clang-tools-extra/clangd/tool/CMakeLists.txt @@ -32,5 +32,6 @@ target_link_libraries(clangd PRIVATE clangTidy clangDaemon + clangdSupport ${CLANGD_XPC_LIBS} ) diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 92e46a662953e..575722e232c9d 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -9,15 +9,15 @@ #include "ClangdLSPServer.h" #include "CodeComplete.h" #include "Features.inc" -#include "Path.h" #include "PathMapping.h" #include "Protocol.h" -#include "Shutdown.h" -#include "Trace.h" #include "Transport.h" #include "index/Background.h" #include "index/Serialization.h" #include "refactor/Rename.h" +#include "support/Path.h" +#include "support/Shutdown.h" +#include "support/Trace.h" #include "clang/Basic/Version.h" #include "clang/Format/Format.h" #include "llvm/ADT/Optional.h" diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index 4119445b85a05..cccb3ce7308a1 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -27,7 +27,6 @@ add_unittest(ClangdUnitTests ClangdTests Annotations.cpp ASTTests.cpp BackgroundIndexTests.cpp - CancellationTests.cpp CanonicalIncludesTests.cpp ClangdTests.cpp ClangdLSPServerTests.cpp @@ -35,7 +34,6 @@ add_unittest(ClangdUnitTests ClangdTests CodeCompletionStringsTests.cpp CollectMacrosTests.cpp CompileCommandsTests.cpp - ContextTests.cpp DexTests.cpp DiagnosticsTests.cpp DraftStoreTests.cpp @@ -44,10 +42,8 @@ add_unittest(ClangdUnitTests ClangdTests FileIndexTests.cpp FindSymbolsTests.cpp FindTargetTests.cpp - FormattedStringTests.cpp FormatTests.cpp FSTests.cpp - FunctionTests.cpp FuzzyMatchTests.cpp GlobalCompilationDatabaseTests.cpp HeadersTests.cpp @@ -76,14 +72,20 @@ add_unittest(ClangdUnitTests ClangdTests TestFS.cpp TestIndex.cpp TestTU.cpp - ThreadingTests.cpp - TraceTests.cpp TypeHierarchyTests.cpp TweakTests.cpp TweakTesting.cpp URITests.cpp XRefsTests.cpp + support/CancellationTests.cpp + support/ContextTests.cpp + support/FunctionTests.cpp + support/MarkupTests.cpp + support/ThreadingTests.cpp + support/TestTracer.cpp + support/TraceTests.cpp + $ ) @@ -107,6 +109,7 @@ clang_target_link_libraries(ClangdTests target_link_libraries(ClangdTests PRIVATE clangDaemon + clangdSupport clangTidy LLVMSupport LLVMTestingSupport diff --git a/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp b/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp index 5f169fefd3bd0..c60b264baa9c4 100644 --- a/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp +++ b/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp @@ -10,10 +10,13 @@ #include "ClangdLSPServer.h" #include "CodeComplete.h" #include "LSPClient.h" -#include "Logger.h" #include "Protocol.h" #include "TestFS.h" #include "refactor/Rename.h" +#include "support/Logger.h" +#include "support/TestTracer.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" #include "llvm/Testing/Support/SupportHelpers.h" #include "gmock/gmock.h" @@ -147,6 +150,15 @@ TEST_F(LSPTest, DiagnosticsHeaderSaved) { DiagMessage("Use of undeclared identifier 'changed'")))); } +TEST_F(LSPTest, RecordsLatencies) { + trace::TestTracer Tracer; + auto &Client = start(); + llvm::StringLiteral MethodName = "method_name"; + EXPECT_THAT(Tracer.takeMetric("lsp_latency", MethodName), testing::SizeIs(0)); + llvm::consumeError(Client.call(MethodName, {}).take().takeError()); + Client.sync(); + EXPECT_THAT(Tracer.takeMetric("lsp_latency", MethodName), testing::SizeIs(1)); +} } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/ClangdTests.cpp b/clang-tools-extra/clangd/unittests/ClangdTests.cpp index 8bb4cad52dbac..81075ff1bbe73 100644 --- a/clang-tools-extra/clangd/unittests/ClangdTests.cpp +++ b/clang-tools-extra/clangd/unittests/ClangdTests.cpp @@ -14,8 +14,8 @@ #include "Matchers.h" #include "SyncAPI.h" #include "TestFS.h" -#include "Threading.h" #include "URI.h" +#include "support/Threading.h" #include "clang/Config/config.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "llvm/ADT/SmallVector.h" diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 47637024ab915..1f30d4314d78c 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -18,9 +18,9 @@ #include "TestFS.h" #include "TestIndex.h" #include "TestTU.h" -#include "Threading.h" #include "index/Index.h" #include "index/MemIndex.h" +#include "support/Threading.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/Support/Error.h" @@ -59,7 +59,9 @@ MATCHER_P(Labeled, Label, "") { } MATCHER_P(SigHelpLabeled, Label, "") { return arg.label == Label; } MATCHER_P(Kind, K, "") { return arg.Kind == K; } -MATCHER_P(Doc, D, "") { return arg.Documentation == D; } +MATCHER_P(Doc, D, "") { + return arg.Documentation && arg.Documentation->asPlainText() == D; +} MATCHER_P(ReturnType, D, "") { return arg.ReturnType == D; } MATCHER_P(HasInclude, IncludeHeader, "") { return !arg.Includes.empty() && arg.Includes[0].Header == IncludeHeader; @@ -83,7 +85,7 @@ Matcher &> Has(std::string Name, CompletionItemKind K) { return Contains(AllOf(Named(std::move(Name)), Kind(K))); } -MATCHER(IsDocumented, "") { return !arg.Documentation.empty(); } +MATCHER(IsDocumented, "") { return arg.Documentation.hasValue(); } MATCHER(Deprecated, "") { return arg.Deprecated; } std::unique_ptr memIndex(std::vector Symbols) { @@ -842,7 +844,7 @@ TEST(CompletionTest, Documentation) { Results.Completions, Contains(AllOf(Named("bar"), Doc("Doxygen comment.\n\\param int a")))); EXPECT_THAT(Results.Completions, - Contains(AllOf(Named("baz"), Doc("Multi-line\nblock comment")))); + Contains(AllOf(Named("baz"), Doc("Multi-line block comment")))); } TEST(CompletionTest, CommentsFromSystemHeaders) { @@ -1506,8 +1508,10 @@ TEST(CompletionTest, OverloadBundling) { EXPECT_EQ(A.Kind, CompletionItemKind::Method); EXPECT_EQ(A.ReturnType, "int"); // All overloads return int. // For now we just return one of the doc strings arbitrarily. - EXPECT_THAT(A.Documentation, AnyOf(HasSubstr("Overload with int"), - HasSubstr("Overload with bool"))); + ASSERT_TRUE(A.Documentation); + EXPECT_THAT( + A.Documentation->asPlainText(), + AnyOf(HasSubstr("Overload with int"), HasSubstr("Overload with bool"))); EXPECT_EQ(A.SnippetSuffix, "($0)"); } @@ -1641,7 +1645,8 @@ TEST(CompletionTest, Render) { C.ReturnType = "int"; C.RequiredQualifier = "Foo::"; C.Scope = "ns::Foo::"; - C.Documentation = "This is x()."; + C.Documentation.emplace(); + C.Documentation->addParagraph().appendText("This is ").appendCode("x()"); C.Includes.emplace_back(); auto &Include = C.Includes.back(); Include.Header = "\"foo.h\""; @@ -1660,8 +1665,8 @@ TEST(CompletionTest, Render) { EXPECT_EQ(R.insertText, "Foo::x"); EXPECT_EQ(R.insertTextFormat, InsertTextFormat::PlainText); EXPECT_EQ(R.filterText, "x"); - EXPECT_EQ(R.detail, "int\n\"foo.h\""); - EXPECT_EQ(R.documentation, "This is x()."); + EXPECT_EQ(R.detail, "int"); + EXPECT_EQ(R.documentation->value, "From \"foo.h\"\nThis is x()"); EXPECT_THAT(R.additionalTextEdits, IsEmpty()); EXPECT_EQ(R.sortText, sortText(1.0, "x")); EXPECT_FALSE(R.deprecated); @@ -1683,11 +1688,16 @@ TEST(CompletionTest, Render) { C.BundleSize = 2; R = C.render(Opts); - EXPECT_EQ(R.detail, "[2 overloads]\n\"foo.h\""); + EXPECT_EQ(R.detail, "[2 overloads]"); + EXPECT_EQ(R.documentation->value, "From \"foo.h\"\nThis is x()"); C.Deprecated = true; R = C.render(Opts); EXPECT_TRUE(R.deprecated); + + Opts.DocumentationFormat = MarkupKind::Markdown; + R = C.render(Opts); + EXPECT_EQ(R.documentation->value, "From `\"foo.h\"` \nThis is `x()`"); } TEST(CompletionTest, IgnoreRecoveryResults) { diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index 7f4cff1475355..90d3621286db1 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -9,13 +9,13 @@ #include "Annotations.h" #include "Diagnostics.h" #include "ParsedAST.h" -#include "Path.h" #include "Protocol.h" #include "SourceCode.h" #include "TestFS.h" #include "TestIndex.h" #include "TestTU.h" #include "index/MemIndex.h" +#include "support/Path.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticSema.h" #include "llvm/Support/ScopedPrinter.h" diff --git a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp index 9631c920fb27c..371388ef8e221 100644 --- a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp @@ -528,11 +528,11 @@ TEST(FileShardedIndexTest, Sharding) { B.insert(Sym1); // Should be stored in both b.h and b.cc B.insert(Sym2); - IF.Symbols = std::move(B).build(); + IF.Symbols.emplace(std::move(B).build()); } { // Should be stored in b.cc - IF.Refs = std::move(*refSlab(Sym1.ID, BSourceUri.c_str())); + IF.Refs.emplace(std::move(*refSlab(Sym1.ID, BSourceUri.c_str()))); } { RelationSlab::Builder B; @@ -542,7 +542,7 @@ TEST(FileShardedIndexTest, Sharding) { B.insert(Relation{Sym2.ID, RelationKind::BaseOf, Sym1.ID}); // Dangling relation should be dropped. B.insert(Relation{symbol("3").ID, RelationKind::BaseOf, Sym1.ID}); - IF.Relations = std::move(B).build(); + IF.Relations.emplace(std::move(B).build()); } IF.Sources.emplace(); @@ -568,46 +568,47 @@ TEST(FileShardedIndexTest, Sharding) { IF.Cmd = tooling::CompileCommand(testRoot(), "b.cc", {"clang"}, "out"); - FileShardedIndex ShardedIndex(std::move(IF), testPath("b.cc")); - ASSERT_THAT( - ShardedIndex.getAllFiles(), - UnorderedElementsAre(testPath("a.h"), testPath("b.h"), testPath("b.cc"))); + FileShardedIndex ShardedIndex(std::move(IF)); + ASSERT_THAT(ShardedIndex.getAllSources(), + UnorderedElementsAre(AHeaderUri, BHeaderUri, BSourceUri)); { - auto Shard = ShardedIndex.getShard(testPath("a.h")); - EXPECT_THAT(Shard.Symbols.getValue(), UnorderedElementsAre(QName("1"))); - EXPECT_THAT(Shard.Refs.getValue(), IsEmpty()); + auto Shard = ShardedIndex.getShard(AHeaderUri); + ASSERT_TRUE(Shard); + EXPECT_THAT(*Shard->Symbols, UnorderedElementsAre(QName("1"))); + EXPECT_THAT(*Shard->Refs, IsEmpty()); EXPECT_THAT( - Shard.Relations.getValue(), + *Shard->Relations, UnorderedElementsAre(Relation{Sym1.ID, RelationKind::BaseOf, Sym2.ID})); - ASSERT_THAT(Shard.Sources.getValue().keys(), - UnorderedElementsAre(AHeaderUri)); - EXPECT_THAT(Shard.Sources->lookup(AHeaderUri).DirectIncludes, IsEmpty()); - EXPECT_TRUE(Shard.Cmd.hasValue()); + ASSERT_THAT(Shard->Sources->keys(), UnorderedElementsAre(AHeaderUri)); + EXPECT_THAT(Shard->Sources->lookup(AHeaderUri).DirectIncludes, IsEmpty()); + EXPECT_TRUE(Shard->Cmd.hasValue()); } { - auto Shard = ShardedIndex.getShard(testPath("b.h")); - EXPECT_THAT(Shard.Symbols.getValue(), UnorderedElementsAre(QName("2"))); - EXPECT_THAT(Shard.Refs.getValue(), IsEmpty()); + auto Shard = ShardedIndex.getShard(BHeaderUri); + ASSERT_TRUE(Shard); + EXPECT_THAT(*Shard->Symbols, UnorderedElementsAre(QName("2"))); + EXPECT_THAT(*Shard->Refs, IsEmpty()); EXPECT_THAT( - Shard.Relations.getValue(), + *Shard->Relations, UnorderedElementsAre(Relation{Sym2.ID, RelationKind::BaseOf, Sym1.ID})); - ASSERT_THAT(Shard.Sources.getValue().keys(), + ASSERT_THAT(Shard->Sources->keys(), UnorderedElementsAre(BHeaderUri, AHeaderUri)); - EXPECT_THAT(Shard.Sources->lookup(BHeaderUri).DirectIncludes, + EXPECT_THAT(Shard->Sources->lookup(BHeaderUri).DirectIncludes, UnorderedElementsAre(AHeaderUri)); - EXPECT_TRUE(Shard.Cmd.hasValue()); + EXPECT_TRUE(Shard->Cmd.hasValue()); } { - auto Shard = ShardedIndex.getShard(testPath("b.cc")); - EXPECT_THAT(Shard.Symbols.getValue(), UnorderedElementsAre(QName("2"))); - EXPECT_THAT(Shard.Refs.getValue(), UnorderedElementsAre(Pair(Sym1.ID, _))); - EXPECT_THAT(Shard.Relations.getValue(), IsEmpty()); - ASSERT_THAT(Shard.Sources.getValue().keys(), + auto Shard = ShardedIndex.getShard(BSourceUri); + ASSERT_TRUE(Shard); + EXPECT_THAT(*Shard->Symbols, UnorderedElementsAre(QName("2"))); + EXPECT_THAT(*Shard->Refs, UnorderedElementsAre(Pair(Sym1.ID, _))); + EXPECT_THAT(*Shard->Relations, IsEmpty()); + ASSERT_THAT(Shard->Sources->keys(), UnorderedElementsAre(BSourceUri, BHeaderUri)); - EXPECT_THAT(Shard.Sources->lookup(BSourceUri).DirectIncludes, + EXPECT_THAT(Shard->Sources->lookup(BSourceUri).DirectIncludes, UnorderedElementsAre(BHeaderUri)); - EXPECT_TRUE(Shard.Cmd.hasValue()); + EXPECT_TRUE(Shard->Cmd.hasValue()); } } } // namespace diff --git a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp index aaae58bacd8e6..e68b8d727172e 100644 --- a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp +++ b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp @@ -9,8 +9,8 @@ #include "GlobalCompilationDatabase.h" #include "Matchers.h" -#include "Path.h" #include "TestFS.h" +#include "support/Path.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" diff --git a/clang-tools-extra/clangd/unittests/HeadersTests.cpp b/clang-tools-extra/clangd/unittests/HeadersTests.cpp index 7f87573d9a115..a6464c64fd284 100644 --- a/clang-tools-extra/clangd/unittests/HeadersTests.cpp +++ b/clang-tools-extra/clangd/unittests/HeadersTests.cpp @@ -127,7 +127,7 @@ class HeadersTest : public ::testing::Test { MATCHER_P(Written, Name, "") { return arg.Written == Name; } MATCHER_P(Resolved, Name, "") { return arg.Resolved == Name; } -MATCHER_P(IncludeLine, N, "") { return arg.R.start.line == N; } +MATCHER_P(IncludeLine, N, "") { return arg.HashLine == N; } MATCHER_P(Directive, D, "") { return arg.Directive == D; } MATCHER_P2(Distance, File, D, "") { diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp index 4b988488b6112..04842ba8d1412 100644 --- a/clang-tools-extra/clangd/unittests/HoverTests.cpp +++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp @@ -1958,7 +1958,7 @@ def)", } } -TEST(Hover, DocCommentLineBreakConversion) { +TEST(Hover, ParseDocumentation) { struct Case { llvm::StringRef Documentation; llvm::StringRef ExpectedRenderMarkdown; @@ -2017,6 +2017,21 @@ TEST(Hover, DocCommentLineBreakConversion) { "foo\nbar", "foo bar", "foo bar", + }, + { + "Tests primality of `p`.", + "Tests primality of `p`.", + "Tests primality of `p`.", + }, + { + "'`' should not occur in `Code`", + "'\\`' should not occur in `Code`", + "'`' should not occur in `Code`", + }, + { + "`not\nparsed`", + "\\`not parsed\\`", + "`not parsed`", }}; for (const auto &C : Cases) { diff --git a/clang-tools-extra/clangd/unittests/JSONTransportTests.cpp b/clang-tools-extra/clangd/unittests/JSONTransportTests.cpp index 2a167bd8fb3cd..07ab70edbe3f4 100644 --- a/clang-tools-extra/clangd/unittests/JSONTransportTests.cpp +++ b/clang-tools-extra/clangd/unittests/JSONTransportTests.cpp @@ -5,9 +5,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "Cancellation.h" #include "Protocol.h" #include "Transport.h" +#include "support/Cancellation.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include diff --git a/clang-tools-extra/clangd/unittests/LSPClient.cpp b/clang-tools-extra/clangd/unittests/LSPClient.cpp index 5e43314d1fe51..d2931cb9c24fb 100644 --- a/clang-tools-extra/clangd/unittests/LSPClient.cpp +++ b/clang-tools-extra/clangd/unittests/LSPClient.cpp @@ -4,8 +4,8 @@ #include "Protocol.h" #include "TestFS.h" -#include "Threading.h" #include "Transport.h" +#include "support/Threading.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include @@ -20,7 +20,9 @@ llvm::Expected clang::clangd::LSPClient::CallResult::take() { ADD_FAILURE() << "No result from call after 10 seconds!"; return llvm::json::Value(nullptr); } - return std::move(*Value); + auto Res = std::move(*Value); + Value.reset(); + return Res; } llvm::json::Value LSPClient::CallResult::takeValue() { diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index eaf74cf062af7..2d2835a12ff7c 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -13,6 +13,7 @@ #include "TestTU.h" #include "index/Ref.h" #include "refactor/Rename.h" +#include "support/TestTracer.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/MemoryBuffer.h" @@ -24,9 +25,11 @@ namespace clang { namespace clangd { namespace { +using testing::ElementsAre; using testing::Eq; -using testing::Pair; using testing::IsEmpty; +using testing::Pair; +using testing::SizeIs; using testing::UnorderedElementsAre; using testing::UnorderedElementsAreArray; @@ -1016,7 +1019,8 @@ TEST(CrossFileRenameTests, WithUpToDateIndex) { }, }; - for (const auto& T : Cases) { + trace::TestTracer Tracer; + for (const auto &T : Cases) { SCOPED_TRACE(T.FooH); Annotations FooH(T.FooH); Annotations FooCC(T.FooCC); @@ -1038,8 +1042,10 @@ TEST(CrossFileRenameTests, WithUpToDateIndex) { llvm::StringRef NewName = "NewName"; for (const auto &RenamePos : FooH.points()) { + EXPECT_THAT(Tracer.takeMetric("rename_files"), SizeIs(0)); auto FileEditsList = llvm::cantFail(runRename( Server, FooHPath, RenamePos, NewName, {/*CrossFile=*/true})); + EXPECT_THAT(Tracer.takeMetric("rename_files"), ElementsAre(2)); EXPECT_THAT( applyEdits(std::move(FileEditsList)), UnorderedElementsAre( diff --git a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp index 71721fe81cd90..0521327b623bc 100644 --- a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp +++ b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// #include "Annotations.h" -#include "Context.h" #include "Protocol.h" #include "SourceCode.h" #include "TestTU.h" +#include "support/Context.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/TokenKinds.h" diff --git a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp index 6f50a5acd4e3d..9e6ad6c9b6e1e 100644 --- a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp +++ b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp @@ -7,17 +7,18 @@ //===----------------------------------------------------------------------===// #include "Annotations.h" -#include "Cancellation.h" #include "ClangdServer.h" -#include "Context.h" #include "Diagnostics.h" #include "Matchers.h" #include "ParsedAST.h" -#include "Path.h" #include "Preamble.h" #include "TUScheduler.h" #include "TestFS.h" -#include "Threading.h" +#include "support/Cancellation.h" +#include "support/Context.h" +#include "support/Path.h" +#include "support/TestTracer.h" +#include "support/Threading.h" #include "clang/Basic/DiagnosticDriver.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -41,6 +42,7 @@ using ::testing::Eq; using ::testing::Field; using ::testing::IsEmpty; using ::testing::Pointee; +using ::testing::SizeIs; using ::testing::UnorderedElementsAre; MATCHER_P2(TUState, PreambleActivity, ASTActivity, "") { @@ -502,6 +504,7 @@ TEST_F(TUSchedulerTests, EvictedAST) { auto Opts = optsForTest(); Opts.AsyncThreadsCount = 1; Opts.RetentionPolicy.MaxRetainedASTs = 2; + trace::TestTracer Tracer; TUScheduler S(CDB, Opts); llvm::StringLiteral SourceContents = R"cpp( @@ -517,12 +520,16 @@ TEST_F(TUSchedulerTests, EvictedAST) { auto Bar = testPath("bar.cpp"); auto Baz = testPath("baz.cpp"); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "hit"), SizeIs(0)); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "miss"), SizeIs(0)); // Build one file in advance. We will not access it later, so it will be the // one that the cache will evict. updateWithCallback(S, Foo, SourceContents, WantDiagnostics::Yes, [&BuiltASTCounter]() { ++BuiltASTCounter; }); ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10))); ASSERT_EQ(BuiltASTCounter.load(), 1); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "hit"), SizeIs(0)); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "miss"), SizeIs(1)); // Build two more files. Since we can retain only 2 ASTs, these should be // the ones we see in the cache later. @@ -532,6 +539,8 @@ TEST_F(TUSchedulerTests, EvictedAST) { [&BuiltASTCounter]() { ++BuiltASTCounter; }); ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10))); ASSERT_EQ(BuiltASTCounter.load(), 3); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "hit"), SizeIs(0)); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "miss"), SizeIs(2)); // Check only the last two ASTs are retained. ASSERT_THAT(S.getFilesWithCachedAST(), UnorderedElementsAre(Bar, Baz)); @@ -541,6 +550,8 @@ TEST_F(TUSchedulerTests, EvictedAST) { [&BuiltASTCounter]() { ++BuiltASTCounter; }); ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10))); ASSERT_EQ(BuiltASTCounter.load(), 4); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "hit"), SizeIs(0)); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "miss"), SizeIs(1)); // Check the AST for foo.cpp is retained now and one of the others got // evicted. @@ -758,11 +769,16 @@ TEST_F(TUSchedulerTests, ForceRebuild) { EXPECT_EQ(DiagCount, 2U); } TEST_F(TUSchedulerTests, NoChangeDiags) { + trace::TestTracer Tracer; TUScheduler S(CDB, optsForTest(), captureDiags()); auto FooCpp = testPath("foo.cpp"); - auto Contents = "int a; int b;"; + const auto *Contents = "int a; int b;"; + EXPECT_THAT(Tracer.takeMetric("ast_access_read", "hit"), SizeIs(0)); + EXPECT_THAT(Tracer.takeMetric("ast_access_read", "miss"), SizeIs(0)); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "hit"), SizeIs(0)); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "miss"), SizeIs(0)); updateWithDiags( S, FooCpp, Contents, WantDiagnostics::No, [](std::vector) { ADD_FAILURE() << "Should not be called."; }); @@ -771,6 +787,8 @@ TEST_F(TUSchedulerTests, NoChangeDiags) { cantFail(std::move(IA)); }); ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10))); + EXPECT_THAT(Tracer.takeMetric("ast_access_read", "hit"), SizeIs(0)); + EXPECT_THAT(Tracer.takeMetric("ast_access_read", "miss"), SizeIs(1)); // Even though the inputs didn't change and AST can be reused, we need to // report the diagnostics, as they were not reported previously. @@ -779,6 +797,8 @@ TEST_F(TUSchedulerTests, NoChangeDiags) { [&](std::vector) { SeenDiags = true; }); ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10))); ASSERT_TRUE(SeenDiags); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "hit"), SizeIs(1)); + EXPECT_THAT(Tracer.takeMetric("ast_access_diag", "miss"), SizeIs(0)); // Subsequent request does not get any diagnostics callback because the same // diags have previously been reported and the inputs didn't change. diff --git a/clang-tools-extra/clangd/unittests/TestFS.cpp b/clang-tools-extra/clangd/unittests/TestFS.cpp index aad08f2b61fd4..c436e9a7a9510 100644 --- a/clang-tools-extra/clangd/unittests/TestFS.cpp +++ b/clang-tools-extra/clangd/unittests/TestFS.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "TestFS.h" #include "GlobalCompilationDatabase.h" -#include "Path.h" #include "URI.h" +#include "support/Path.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" diff --git a/clang-tools-extra/clangd/unittests/TestFS.h b/clang-tools-extra/clangd/unittests/TestFS.h index a635d7c53a070..7dde78d9c157c 100644 --- a/clang-tools-extra/clangd/unittests/TestFS.h +++ b/clang-tools-extra/clangd/unittests/TestFS.h @@ -13,7 +13,7 @@ #define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_TESTFS_H #include "ClangdServer.h" #include "GlobalCompilationDatabase.h" -#include "Path.h" +#include "support/Path.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" diff --git a/clang-tools-extra/clangd/unittests/TestTU.h b/clang-tools-extra/clangd/unittests/TestTU.h index 229f65a4b95c7..415326e7d2815 100644 --- a/clang-tools-extra/clangd/unittests/TestTU.h +++ b/clang-tools-extra/clangd/unittests/TestTU.h @@ -19,8 +19,8 @@ #include "Compiler.h" #include "ParsedAST.h" -#include "Path.h" #include "index/Index.h" +#include "support/Path.h" #include "llvm/ADT/StringMap.h" #include "gtest/gtest.h" #include diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index 09304ecdaf735..77e863895f803 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -1490,14 +1490,15 @@ TEST(GetNonLocalDeclRefs, All) { TEST(DocumentLinks, All) { Annotations MainCpp(R"cpp( - #include $foo[["foo.h"]] + #/*comments*/include /*comments*/ $foo[["foo.h"]] //more comments int end_of_preamble = 0; - #include $bar[["bar.h"]] + #include $bar[[]] )cpp"); TestTU TU; TU.Code = std::string(MainCpp.code()); TU.AdditionalFiles = {{"foo.h", ""}, {"bar.h", ""}}; + TU.ExtraArgs = {"-isystem."}; auto AST = TU.build(); EXPECT_THAT( diff --git a/clang-tools-extra/clangd/unittests/CancellationTests.cpp b/clang-tools-extra/clangd/unittests/support/CancellationTests.cpp similarity index 96% rename from clang-tools-extra/clangd/unittests/CancellationTests.cpp rename to clang-tools-extra/clangd/unittests/support/CancellationTests.cpp index 09f980748f91e..905ee5ecc9c0b 100644 --- a/clang-tools-extra/clangd/unittests/CancellationTests.cpp +++ b/clang-tools-extra/clangd/unittests/support/CancellationTests.cpp @@ -1,6 +1,6 @@ -#include "Cancellation.h" -#include "Context.h" -#include "Threading.h" +#include "support/Cancellation.h" +#include "support/Context.h" +#include "support/Threading.h" #include "llvm/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" diff --git a/clang-tools-extra/clangd/unittests/ContextTests.cpp b/clang-tools-extra/clangd/unittests/support/ContextTests.cpp similarity index 98% rename from clang-tools-extra/clangd/unittests/ContextTests.cpp rename to clang-tools-extra/clangd/unittests/support/ContextTests.cpp index c76d565b2a2c3..2f195fd1c5d33 100644 --- a/clang-tools-extra/clangd/unittests/ContextTests.cpp +++ b/clang-tools-extra/clangd/unittests/support/ContextTests.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "Context.h" +#include "support/Context.h" #include "gtest/gtest.h" diff --git a/clang-tools-extra/clangd/unittests/FunctionTests.cpp b/clang-tools-extra/clangd/unittests/support/FunctionTests.cpp similarity index 97% rename from clang-tools-extra/clangd/unittests/FunctionTests.cpp rename to clang-tools-extra/clangd/unittests/support/FunctionTests.cpp index 0cd8b7910463f..bfb1ea49434db 100644 --- a/clang-tools-extra/clangd/unittests/FunctionTests.cpp +++ b/clang-tools-extra/clangd/unittests/support/FunctionTests.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "Function.h" +#include "support/Function.h" #include "gmock/gmock.h" #include "gtest/gtest.h" diff --git a/clang-tools-extra/clangd/unittests/FormattedStringTests.cpp b/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp similarity index 88% rename from clang-tools-extra/clangd/unittests/FormattedStringTests.cpp rename to clang-tools-extra/clangd/unittests/support/MarkupTests.cpp index d3fa1aa8c0a74..77160e1863a5b 100644 --- a/clang-tools-extra/clangd/unittests/FormattedStringTests.cpp +++ b/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp @@ -1,11 +1,11 @@ -//===-- FormattedStringTests.cpp ------------------------------------------===// +//===-- MarkupTests.cpp ---------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "FormattedString.h" +#include "support/Markup.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" @@ -112,8 +112,10 @@ TEST(Render, Escaping) { // But we have to escape the backticks. P = Paragraph(); - P.appendCode("foo`bar`baz"); + P.appendCode("foo`bar`baz", /*Preserve=*/true); EXPECT_EQ(P.asMarkdown(), "`foo``bar``baz`"); + // In plain-text, we fall back to different quotes. + EXPECT_EQ(P.asPlainText(), "'foo`bar`baz'"); // Inline code blocks starting or ending with backticks should add spaces. P = Paragraph(); @@ -149,23 +151,38 @@ TEST(Render, Escaping) { "`````"); } +TEST(Paragraph, Chunks) { + Paragraph P = Paragraph(); + P.appendText("One "); + P.appendCode("fish"); + P.appendText(", two "); + P.appendCode("fish", /*Preserve=*/true); + + EXPECT_EQ(P.asMarkdown(), "One `fish`, two `fish`"); + EXPECT_EQ(P.asPlainText(), "One fish, two `fish`"); +} + TEST(Paragraph, SeparationOfChunks) { // This test keeps appending contents to a single Paragraph and checks // expected accumulated contents after each one. // Purpose is to check for separation between different chunks. Paragraph P; - P.appendText("after"); + P.appendText("after "); EXPECT_EQ(P.asMarkdown(), "after"); EXPECT_EQ(P.asPlainText(), "after"); - P.appendCode("foobar"); + P.appendCode("foobar").appendSpace(); EXPECT_EQ(P.asMarkdown(), "after `foobar`"); EXPECT_EQ(P.asPlainText(), "after foobar"); P.appendText("bat"); EXPECT_EQ(P.asMarkdown(), "after `foobar` bat"); EXPECT_EQ(P.asPlainText(), "after foobar bat"); + + P.appendCode("no").appendCode("space"); + EXPECT_EQ(P.asMarkdown(), "after `foobar` bat`no` `space`"); + EXPECT_EQ(P.asPlainText(), "after foobar batno space"); } TEST(Paragraph, ExtraSpaces) { @@ -173,8 +190,16 @@ TEST(Paragraph, ExtraSpaces) { Paragraph P; P.appendText("foo\n \t baz"); P.appendCode(" bar\n"); - EXPECT_EQ(P.asMarkdown(), "foo baz `bar`"); - EXPECT_EQ(P.asPlainText(), "foo baz bar"); + EXPECT_EQ(P.asMarkdown(), "foo baz`bar`"); + EXPECT_EQ(P.asPlainText(), "foo bazbar"); +} + +TEST(Paragraph, SpacesCollapsed) { + Paragraph P; + P.appendText(" foo bar "); + P.appendText(" baz "); + EXPECT_EQ(P.asMarkdown(), "foo bar baz"); + EXPECT_EQ(P.asPlainText(), "foo bar baz"); } TEST(Paragraph, NewLines) { @@ -240,6 +265,17 @@ TEST(Document, Ruler) { EXPECT_EQ(D.asPlainText(), "foo\n\nfoo"); } +TEST(Document, Append) { + Document D; + D.addParagraph().appendText("foo"); + D.addRuler(); + Document E; + E.addRuler(); + E.addParagraph().appendText("bar"); + D.append(std::move(E)); + EXPECT_EQ(D.asMarkdown(), "foo \n\n---\nbar"); +} + TEST(Document, Heading) { Document D; D.addHeading(1).appendText("foo"); diff --git a/clang-tools-extra/clangd/unittests/support/TestTracer.cpp b/clang-tools-extra/clangd/unittests/support/TestTracer.cpp new file mode 100644 index 0000000000000..2a45f09f3bf13 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/support/TestTracer.cpp @@ -0,0 +1,39 @@ +//===-- TestTracer.cpp - Tracing unit tests ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "TestTracer.h" +#include "support/Trace.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace clang { +namespace clangd { +namespace trace { + +void TestTracer::record(const Metric &Metric, double Value, + llvm::StringRef Label) { + std::lock_guard Lock(Mu); + Measurements[Metric.Name][Label].push_back(Value); +} + +std::vector TestTracer::takeMetric(llvm::StringRef Metric, + llvm::StringRef Label) { + std::lock_guard Lock(Mu); + auto LabelsIt = Measurements.find(Metric); + if (LabelsIt == Measurements.end()) + return {}; + auto &Labels = LabelsIt->getValue(); + auto ValuesIt = Labels.find(Label); + if (ValuesIt == Labels.end()) + return {}; + auto Res = std::move(ValuesIt->getValue()); + ValuesIt->getValue().clear(); + return Res; +} +} // namespace trace +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/support/TestTracer.h b/clang-tools-extra/clangd/unittests/support/TestTracer.h new file mode 100644 index 0000000000000..24a7283a7ffc7 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/support/TestTracer.h @@ -0,0 +1,49 @@ +//===-- TestTracer.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Allows setting up a fake tracer for tests. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_SUPPORT_TESTTRACER_H +#define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_SUPPORT_TESTTRACER_H + +#include "support/Trace.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include +#include +#include + +namespace clang { +namespace clangd { +namespace trace { + +/// A RAII Tracer that can be used by tests. +class TestTracer : public EventTracer { +public: + TestTracer() : S(*this) {} + /// Stores all the measurements to be returned with take later on. + void record(const Metric &Metric, double Value, + llvm::StringRef Label) override; + + /// Returns recorded measurements for \p Metric and clears them. + std::vector takeMetric(llvm::StringRef Metric, + llvm::StringRef Label = ""); + +private: + std::mutex Mu; + /// Measurements recorded per metric per label. + llvm::StringMap>> Measurements; + Session S; +}; + +} // namespace trace +} // namespace clangd +} // namespace clang +#endif diff --git a/clang-tools-extra/clangd/unittests/ThreadingTests.cpp b/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp similarity index 98% rename from clang-tools-extra/clangd/unittests/ThreadingTests.cpp rename to clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp index 18b9146ef1423..015af092012a1 100644 --- a/clang-tools-extra/clangd/unittests/ThreadingTests.cpp +++ b/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "Threading.h" +#include "support/Threading.h" #include "gtest/gtest.h" #include diff --git a/clang-tools-extra/clangd/unittests/TraceTests.cpp b/clang-tools-extra/clangd/unittests/support/TraceTests.cpp similarity index 88% rename from clang-tools-extra/clangd/unittests/TraceTests.cpp rename to clang-tools-extra/clangd/unittests/support/TraceTests.cpp index 1871e6acf10cf..10670f79be1b4 100644 --- a/clang-tools-extra/clangd/unittests/TraceTests.cpp +++ b/clang-tools-extra/clangd/unittests/support/TraceTests.cpp @@ -6,10 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "Trace.h" - +#include "TestTracer.h" +#include "support/Context.h" +#include "support/Trace.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/Threading.h" #include "llvm/Support/YAMLParser.h" @@ -20,6 +22,8 @@ namespace clang { namespace clangd { namespace { +using testing::SizeIs; + MATCHER_P(StringNode, Val, "") { if (arg->getType() != llvm::yaml::Node::NK_Scalar) { *result_listener << "is a " << arg->getVerbatimTag(); @@ -122,6 +126,18 @@ TEST(TraceTest, SmokeTest) { ASSERT_EQ(++Prop, Root->end()); } +TEST(MetricsTracer, LatencyTest) { + trace::TestTracer Tracer; + constexpr llvm::StringLiteral MetricName = "span_latency"; + constexpr llvm::StringLiteral OpName = "op_name"; + { + // A span should record latencies to span_latency by default. + trace::Span SpanWithLat(OpName); + EXPECT_THAT(Tracer.takeMetric(MetricName, OpName), SizeIs(0)); + } + EXPECT_THAT(Tracer.takeMetric(MetricName, OpName), SizeIs(1)); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/xpc/CMakeLists.txt b/clang-tools-extra/clangd/xpc/CMakeLists.txt index f05767c86b21b..df8c361817a84 100644 --- a/clang-tools-extra/clangd/xpc/CMakeLists.txt +++ b/clang-tools-extra/clangd/xpc/CMakeLists.txt @@ -20,10 +20,10 @@ set(LLVM_OPTIONAL_SOURCES Conversion.cpp XPCTransport.cpp) add_clang_library(clangdXpcJsonConversions Conversion.cpp - LINK_LIBS clangDaemon + LINK_LIBS clangDaemon clangdSupport ) add_clang_library(clangdXpcTransport XPCTransport.cpp - LINK_LIBS clangDaemon clangdXpcJsonConversions + LINK_LIBS clangDaemon clangdSupport clangdXpcJsonConversions ) diff --git a/clang-tools-extra/clangd/xpc/Conversion.cpp b/clang-tools-extra/clangd/xpc/Conversion.cpp index 3e8d36b7b4308..9b52a68e64fb6 100644 --- a/clang-tools-extra/clangd/xpc/Conversion.cpp +++ b/clang-tools-extra/clangd/xpc/Conversion.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "xpc/Conversion.h" -#include "Logger.h" +#include "support/Logger.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ScopedPrinter.h" #include diff --git a/clang-tools-extra/clangd/xpc/XPCTransport.cpp b/clang-tools-extra/clangd/xpc/XPCTransport.cpp index 02c1b083701e9..50eacf2115eea 100644 --- a/clang-tools-extra/clangd/xpc/XPCTransport.cpp +++ b/clang-tools-extra/clangd/xpc/XPCTransport.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// #include "Conversion.h" -#include "Logger.h" #include "Protocol.h" // For LSPError #include "Transport.h" +#include "support/Logger.h" #include "llvm/Support/Errno.h" #include diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 011962ea1d053..97ef4271d6158 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -113,6 +113,11 @@ New checks Flags use of the `C` standard library functions ``memset``, ``memcpy`` and ``memcmp`` and similar derivatives on non-trivial types. +- New :doc:`llvmlibc-callee-namespace + ` check. + + Checks all calls resolve to functions within ``__llvm_libc`` namespace. + - New :doc:`llvmlibc-implementation-in-namespace ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst index e3ecf75a3a527..c2bd2df062b11 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst @@ -31,11 +31,10 @@ It depends on the actual platform whether plain ``char`` is handled as ``signed by default and so it is caught by this check or not. To change the default behavior you can use ``-funsigned-char`` and ``-fsigned-char`` compilation options. -Currently, this check is limited to assignments and variable declarations, -where a ``signed char`` is assigned to an integer variable and to -equality/inequality comparisons between ``signed char`` and ``unsigned char``. -There are other use cases where the unexpected value ranges might lead to -similar bogus behavior. +Currently, this check warns in the following cases: +- ``signed char`` is assigned to an integer variable +- ``signed char`` and ``unsigned char`` are compared with equality/inequality operator +- ``signed char`` is converted to an integer in the array subscript See also: `STR34-C. Cast characters to unsigned char before converting to larger integer sizes diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index a3c695dd3311b..c3d047f30292e 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -188,6 +188,7 @@ Clang-Tidy Checks `llvm-prefer-isa-or-dyn-cast-in-conditionals `_, "Yes" `llvm-prefer-register-over-unsigned `_, "Yes" `llvm-twine-local `_, "Yes" + `llvmlibc-callee-namespace `_, `llvmlibc-implementation-in-namespace `_, `llvmlibc-restrict-system-libc-headers `_, "Yes" `misc-definitions-in-headers `_, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/checks/llvmlibc-callee-namespace.rst b/clang-tools-extra/docs/clang-tidy/checks/llvmlibc-callee-namespace.rst new file mode 100644 index 0000000000000..63c1398881b4b --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/llvmlibc-callee-namespace.rst @@ -0,0 +1,24 @@ +.. title:: clang-tidy - llvmlibc-callee-namespace + +llvmlibc-callee-namespace +==================================== + +Checks all calls resolve to functions within ``__llvm_libc`` namespace. + +.. code-block:: c++ + + namespace __llvm_libc { + + // Allow calls with the fully qualified name. + __llvm_libc::strlen("hello"); + + // Allow calls to compiler provided functions. + (void)__builtin_abs(-1); + + // Bare calls are allowed as long as they resolve to the correct namespace. + strlen("world"); + + // Disallow calling into functions in the global namespace. + ::strlen("!"); + + } // namespace __llvm_libc diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp index ef42b0c85ae6d..a1cc2ae489915 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp @@ -3,6 +3,16 @@ /////////////////////////////////////////////////////////////////// /// Test cases correctly caught by the check. +typedef __SIZE_TYPE__ size_t; + +namespace std { +template +struct array { + T &operator[](size_t n); + T &at(size_t n); +}; +} // namespace std + int SimpleVarDeclaration() { signed char CCharacter = -5; int NCharacter = CCharacter; @@ -90,6 +100,16 @@ int CompareWithUnsignedNonAsciiConstant(signed char SCharacter) { return 0; } +int SignedCharCArraySubscript(signed char SCharacter) { + int Array[3] = {1, 2, 3}; + + return Array[static_cast(SCharacter)]; // CHECK-MESSAGES: [[@LINE]]:42: warning: 'signed char' to 'unsigned int' conversion in array subscript; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse] +} + +int SignedCharSTDArraySubscript(std::array Array, signed char SCharacter) { + return Array[static_cast(SCharacter)]; // CHECK-MESSAGES: [[@LINE]]:42: warning: 'signed char' to 'unsigned int' conversion in array subscript; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse] +} + /////////////////////////////////////////////////////////////////// /// Test cases correctly ignored by the check. @@ -207,3 +227,23 @@ int CompareWithUnsignedAsciiConstant(signed char SCharacter) { return 1; return 0; } + +int UnsignedCharCArraySubscript(unsigned char USCharacter) { + int Array[3] = {1, 2, 3}; + + return Array[static_cast(USCharacter)]; +} + +int CastedCArraySubscript(signed char SCharacter) { + int Array[3] = {1, 2, 3}; + + return Array[static_cast(SCharacter)]; +} + +int UnsignedCharSTDArraySubscript(std::array Array, unsigned char USCharacter) { + return Array[static_cast(USCharacter)]; +} + +int CastedSTDArraySubscript(std::array Array, signed char SCharacter) { + return Array[static_cast(SCharacter)]; +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/llvmlibc-callee-namespace.cpp b/clang-tools-extra/test/clang-tidy/checkers/llvmlibc-callee-namespace.cpp new file mode 100644 index 0000000000000..b7fee5dd7e0b3 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/llvmlibc-callee-namespace.cpp @@ -0,0 +1,42 @@ +// RUN: %check_clang_tidy %s llvmlibc-callee-namespace %t + +namespace __llvm_libc { +namespace nested { +void nested_func() {} +} // namespace nested +void libc_api_func() {} +} // namespace __llvm_libc + +// Emulate a function from the public headers like string.h +void libc_api_func() {} + +namespace __llvm_libc { +void Test() { + // Allow calls with the fully qualified name. + __llvm_libc::libc_api_func(); + __llvm_libc::nested::nested_func(); + void (*qualifiedPtr)(void) = __llvm_libc::libc_api_func; + qualifiedPtr(); + + // Should not trigger on compiler provided function calls. + (void)__builtin_abs(-1); + + // Bare calls are allowed as long as they resolve to the correct namespace. + libc_api_func(); + nested::nested_func(); + void (*barePtr)(void) = __llvm_libc::libc_api_func; + barePtr(); + + // Disallow calling into global namespace for implemented entrypoints. + ::libc_api_func(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'libc_api_func' must resolve to a function declared within the '__llvm_libc' namespace + // CHECK-MESSAGES: :11:6: note: resolves to this declaration + + // Disallow indirect references to functions in global namespace. + void (*badPtr)(void) = ::libc_api_func; + badPtr(); + // CHECK-MESSAGES: :[[@LINE-2]]:26: warning: 'libc_api_func' must resolve to a function declared within the '__llvm_libc' namespace + // CHECK-MESSAGES: :11:6: note: resolves to this declaration +} + +} // namespace __llvm_libc diff --git a/clang-tools-extra/test/lit.site.cfg.py.in b/clang-tools-extra/test/lit.site.cfg.py.in index a970e7fafc1f2..5dc112a561264 100644 --- a/clang-tools-extra/test/lit.site.cfg.py.in +++ b/clang-tools-extra/test/lit.site.cfg.py.in @@ -8,7 +8,7 @@ config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" config.clang_tools_binary_dir = "@CLANG_TOOLS_BINARY_DIR@" config.clang_tools_dir = "@CLANG_TOOLS_DIR@" config.clang_libs_dir = "@SHLIBDIR@" -config.python_executable = "@PYTHON_EXECUTABLE@" +config.python_executable = "@Python3_EXECUTABLE@" config.target_triple = "@TARGET_TRIPLE@" config.clang_staticanalyzer = @CLANG_ENABLE_STATIC_ANALYZER@ diff --git a/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp b/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp index ebf5f59ce4a69..76de7711cee9f 100644 --- a/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp @@ -221,6 +221,84 @@ TEST(TransformerClangTidyCheckTest, AddIncludeAngled) { test::runCheckOnCode>(Input)); } +class IncludeOrderCheck : public TransformerClangTidyCheck { + static RewriteRule rule() { + using namespace ::clang::ast_matchers; + RewriteRule Rule = transformer::makeRule(integerLiteral(), change(cat("5")), + cat("no message")); + addInclude(Rule, "bar.h", IncludeFormat::Quoted); + return Rule; + } + +public: + IncludeOrderCheck(StringRef Name, ClangTidyContext *Context) + : TransformerClangTidyCheck(rule(), Name, Context) {} +}; + +TEST(TransformerClangTidyCheckTest, AddIncludeObeysSortStyleLocalOption) { + std::string Input = R"cc(#include "input.h" +int h(int x) { return 3; })cc"; + + std::string TreatsAsLibraryHeader = R"cc(#include "input.h" + +#include "bar.h" +int h(int x) { return 5; })cc"; + + std::string TreatsAsNormalHeader = R"cc(#include "bar.h" +#include "input.h" +int h(int x) { return 5; })cc"; + + ClangTidyOptions Options; + std::map PathsToContent = {{"input.h", "\n"}}; + Options.CheckOptions["test-check-0.IncludeStyle"] = "llvm"; + EXPECT_EQ(TreatsAsLibraryHeader, test::runCheckOnCode( + Input, nullptr, "inputTest.cpp", None, + Options, PathsToContent)); + EXPECT_EQ(TreatsAsNormalHeader, test::runCheckOnCode( + Input, nullptr, "input_test.cpp", None, + Options, PathsToContent)); + + Options.CheckOptions["test-check-0.IncludeStyle"] = "google"; + EXPECT_EQ(TreatsAsNormalHeader, + test::runCheckOnCode( + Input, nullptr, "inputTest.cc", None, Options, PathsToContent)); + EXPECT_EQ(TreatsAsLibraryHeader, test::runCheckOnCode( + Input, nullptr, "input_test.cc", None, + Options, PathsToContent)); +} + +TEST(TransformerClangTidyCheckTest, AddIncludeObeysSortStyleGlobalOption) { + std::string Input = R"cc(#include "input.h" +int h(int x) { return 3; })cc"; + + std::string TreatsAsLibraryHeader = R"cc(#include "input.h" + +#include "bar.h" +int h(int x) { return 5; })cc"; + + std::string TreatsAsNormalHeader = R"cc(#include "bar.h" +#include "input.h" +int h(int x) { return 5; })cc"; + + ClangTidyOptions Options; + std::map PathsToContent = {{"input.h", "\n"}}; + Options.CheckOptions["IncludeStyle"] = "llvm"; + EXPECT_EQ(TreatsAsLibraryHeader, test::runCheckOnCode( + Input, nullptr, "inputTest.cpp", None, + Options, PathsToContent)); + EXPECT_EQ(TreatsAsNormalHeader, test::runCheckOnCode( + Input, nullptr, "input_test.cpp", None, + Options, PathsToContent)); + + Options.CheckOptions["IncludeStyle"] = "google"; + EXPECT_EQ(TreatsAsNormalHeader, + test::runCheckOnCode( + Input, nullptr, "inputTest.cc", None, Options, PathsToContent)); + EXPECT_EQ(TreatsAsLibraryHeader, test::runCheckOnCode( + Input, nullptr, "input_test.cc", None, + Options, PathsToContent)); +} + } // namespace } // namespace utils } // namespace tidy diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 75ab1a86a170f..6607a0f5e60db 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -130,16 +130,38 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) if(LLVM_INCLUDE_TESTS) - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR -"Unable to find Python interpreter, required for builds and testing. + if(CMAKE_VERSION VERSION_LESS 3.12) + include(FindPythonInterp) + if(NOT PYTHONINTERP_FOUND) + message(FATAL_ERROR + "Unable to find Python interpreter, required for builds and testing. -Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") - endif() + Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") + endif() + + if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 ) + message(FATAL_ERROR "Python 2.7 or newer is required") + endif() - if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 ) - message(FATAL_ERROR "Python 2.7 or newer is required") + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) + set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) + else() + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") + endif() + + # Treat python2 as python3 + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) + endif() endif() # Check prebuilt llvm/utils. diff --git a/clang/bindings/python/tests/CMakeLists.txt b/clang/bindings/python/tests/CMakeLists.txt index 626256af9c1b6..3f58c4bde8e71 100644 --- a/clang/bindings/python/tests/CMakeLists.txt +++ b/clang/bindings/python/tests/CMakeLists.txt @@ -3,7 +3,7 @@ add_custom_target(check-clang-python COMMAND ${CMAKE_COMMAND} -E env CLANG_LIBRARY_PATH=$ - ${PYTHON_EXECUTABLE} -m unittest discover + "${Python3_EXECUTABLE}" -m unittest discover DEPENDS libclang WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 6d486224e3c2c..ce26b06bf171c 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -395,6 +395,21 @@ the configuration (without a prefix: ``Auto``). return; } +**AllowShortEnumsOnASingleLine** (``bool``) + Allow short enums on a single line. + + .. code-block:: c++ + + true: + enum { A, B } myEnum; + + false: + enum + { + A, + B + } myEnum; + **AllowShortFunctionsOnASingleLine** (``ShortFunctionStyle``) Dependent on the value, ``int f() { return 0; }`` can be put on a single line. @@ -717,26 +732,6 @@ the configuration (without a prefix: ``Auto``). aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa); } - -**InsertTrailingCommas** (``TrailingCommaStyle``) can be set to ``TCS_Wrapped`` - to insert trailing commas in container literals (arrays and objects) that wrap - across multiple lines. It is currently only available for JavaScript and - disabled by default (``TCS_None``). - - ``InsertTrailingCommas`` cannot be used together with ``BinPackArguments`` as - inserting the comma disables bin-packing. - - .. code-block:: c++ - - TSC_Wrapped: - const someArray = [ - aaaaaaaaaaaaaaaaaaaaaaaaaa, - aaaaaaaaaaaaaaaaaaaaaaaaaa, - aaaaaaaaaaaaaaaaaaaaaaaaaa, - // ^ inserted - ] - - **BinPackParameters** (``bool``) If ``false``, a function declaration's or function definition's parameters will either all be on the same line or will have one line each. @@ -1770,6 +1765,38 @@ the configuration (without a prefix: ``Auto``). LoooooooooooooooooooooooooooooooooooooooongReturnType LoooooooooooooooooooooooooooooooongFunctionDeclaration(); +**InsertTrailingCommas** (``TrailingCommaStyle``) + If set to ``TCS_Wrapped`` will insert trailing commas in container + literals (arrays and objects) that wrap across multiple lines. + It is currently only available for JavaScript + and disabled by default ``TCS_None``. + ``InsertTrailingCommas`` cannot be used together with ``BinPackArguments`` + as inserting the comma disables bin-packing. + + .. code-block:: c++ + + TSC_Wrapped: + const someArray = [ + aaaaaaaaaaaaaaaaaaaaaaaaaa, + aaaaaaaaaaaaaaaaaaaaaaaaaa, + aaaaaaaaaaaaaaaaaaaaaaaaaa, + // ^ inserted + ] + + Possible values: + + * ``TCS_None`` (in configuration: ``None``) + Do not insert trailing commas. + + * ``TCS_Wrapped`` (in configuration: ``Wrapped``) + Insert trailing commas in container literals that were wrapped over + multiple lines. Note that this is conceptually incompatible with + bin-packing, because the trailing comma is used as an indicator + that a container should be formatted one-per-line (i.e. not bin-packed). + So inserting a trailing comma counteracts bin-packing. + + + **JavaImportGroups** (``std::vector``) A vector of prefixes ordered by the desired groups for Java imports. @@ -2064,7 +2091,8 @@ the configuration (without a prefix: ``Auto``). false: - (void)_aMethod { - [self.test1 t:self w:self callback:^(typeof(self) self, NSNumber *u, NSNumber *v) { + [self.test1 t:self w:self callback:^(typeof(self) self, NSNumber + *u, NSNumber *v) { u = c; }] } @@ -2072,8 +2100,8 @@ the configuration (without a prefix: ``Auto``). - (void)_aMethod { [self.test1 t:self - w:self - callback:^(typeof(self) self, NSNumber *u, NSNumber *v) { + w:self + callback:^(typeof(self) self, NSNumber *u, NSNumber *v) { u = c; }] } @@ -2538,7 +2566,8 @@ the configuration (without a prefix: ``Auto``). appears within a line (e.g. consecutive assignments and declarations). * ``UT_AlignWithSpaces`` (in configuration: ``AlignWithSpaces``) - Use tabs for line continuation and indentation, and spaces for alignment. + Use tabs for line continuation and indentation, and spaces for + alignment. * ``UT_Always`` (in configuration: ``Always``) Use tabs whenever we need to fill whitespace that spans at least from diff --git a/clang/docs/DataFlowSanitizer.rst b/clang/docs/DataFlowSanitizer.rst index cc9b8e6e16997..44956037490a8 100644 --- a/clang/docs/DataFlowSanitizer.rst +++ b/clang/docs/DataFlowSanitizer.rst @@ -24,13 +24,14 @@ How to build libc++ with DFSan ============================== DFSan requires either all of your code to be instrumented or for uninstrumented -functions to be listed as``uninstrumented`` in the `ABI list`_. +functions to be listed as ``uninstrumented`` in the `ABI list`_. If you'd like to have instrumented libc++ functions, then you need to build it with DFSan instrumentation from source. Here is an example of how to build libc++ and the libc++ ABI with data flow sanitizer instrumentation. .. code-block:: console + cd libcxx-build # An example using ninja diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index f1b7c79db6d66..11114e82a6e53 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -13,6 +13,7 @@ Clang Language Extensions BlockLanguageSpec Block-ABI-Apple AutomaticReferenceCounting + MatrixTypes Introduction ============ @@ -492,6 +493,24 @@ See also :ref:`langext-__builtin_shufflevector`, :ref:`langext-__builtin_convert 'select', they operate somewhat differently. OpenCL selects based on signedness of the condition operands, but GCC vectors use normal bool conversions (that is, != 0). +Matrix Types +============ + +Clang provides an extension for matrix types, which is currently being +implemented. See :ref:`the draft specification ` for more details. + +For example, the code below uses the matrix types extension to multiply two 4x4 +float matrices and add the result to a third 4x4 matrix. + +.. code-block:: c++ + + typedef float m4x4_t __attribute__((matrix_type(4, 4))); + + m4x4_t f(m4x4_t a, m4x4_t b, m4x4_t c) { + return a + b * c; + } + + Half-Precision Floating Point ============================= diff --git a/clang/docs/MatrixTypes.rst b/clang/docs/MatrixTypes.rst new file mode 100644 index 0000000000000..54099e5aae930 --- /dev/null +++ b/clang/docs/MatrixTypes.rst @@ -0,0 +1,285 @@ +================== +Matrix Types +================== + +.. contents:: + :local: + +.. _matrixtypes: + +Clang provides a C/C++ language extension that allows users to directly express +fixed-size 2-dimensional matrices as language values and perform arithmetic on +them. + +This feature is currently experimental, and both its design and its +implementation are in flux. + +Draft Specification +=================== + +Matrix Type +----------- + +A matrix type is a scalar type with an underlying *element type*, a constant +number of *rows*, and a constant number of *columns*. Matrix types with the same +element type, rows, and columns are the same type. A value of a matrix type +includes storage for ``rows * columns`` values of the *element type*. The +internal layout, overall size and alignment are implementation-defined. + +The maximum of the product of the number of rows and columns is +implementation-defined. If that implementation-defined limit is exceeded, the +program is ill-formed. + +Currently, the element type of a matrix is only permitted to be one of the +following types: + +* an integer type (as in C2x 6.2.5p19), but excluding enumerated types and ``_Bool`` +* the standard floating types ``float`` or ``double`` +* a half-precision floating point type, if one is supported on the target + +Other types may be supported in the future. + +Matrix Type Attribute +--------------------- + +Matrix types can be declared by adding the ``matrix_type`` attribute to the +declaration of a *typedef* (or a C++ alias declaration). The underlying type +of the *typedef* must be a valid matrix element type. The +attribute takes two arguments, both of which must be integer constant +expressions that evaluate to a value greater than zero. The first specifies the +number of rows, and the second specifies the number of columns. The underlying +type of the *typedef* becomes a matrix type with the given dimensions and an +element type of the former underlying type. + +If a declaration of a *typedef-name* has a ``matrix_type`` attribute, then all +declaration of that *typedef-name* shall have a matrix_type attribute with the +same element type, number of rows, and number of columns. + +Standard Conversions +-------------------- + +The standard conversions are extended as follows. Note that these conversions +are intentionally not listed as satisfying the constraints for assignment, +which is to say, they are only permitted as explicit casts, not as implicit +conversions. + +A value of matrix type can be converted to another matrix type if the number of +rows and columns are the same and the value's elements can be converted to the +element type of the result type. The result is a matrix where each element is +the converted corresponding element. + +A value of any real type (as in C2x 6.2.5p17) can be converted to a matrix type +if it can be converted to the element type of the matrix. The result is a +matrix where all elements are the converted original value. + +If the number of rows or columns differ between the original and resulting +type, the program is ill-formed. + + +Arithmetic Conversions +---------------------- + +The usual arithmetic conversions are extended as follows. + +Insert at the start: + +* If both operands are of matrix type, no arithmetic conversion is performed. +* If one operand is of matrix type and the other operand is of a real type, + convert the real type operand to the matrix type + according to the standard conversion rules. + +Matrix Type Element Access Operator +----------------------------------- + +An expression of the form ``E1 [E2] [E3]``, where ``E1`` has matrix type ``cv +M``, is a matrix element access expression. Let ``T`` be the element type +of ``M``, and let ``R`` and ``C`` be the number of rows and columns in ``M`` +respectively. The index expressions shall have integral or unscoped +enumeration type and shall not be uses of the comma operator unless +parenthesized. The first index expression shall evaluate to a +non-negative value less than ``R``, and the second index expression shall +evaluate to a non-negative value less than ``C``, or else the expression has +undefined behavior. If ``E1`` is a prvalue, the result is a prvalue with type +``T`` and is the value of the element at the given row and column in the matrix. +Otherwise, the result is a glvalue with type ``cv T`` and with the same value +category as ``E1`` which refers to the element at the given row and column in +the matrix. + +Programs containing a single subscript expression into a matrix are ill-formed. + +**Note**: We considered providing an expression of the form +``postfix-expression [expression]`` to access columns of a matrix. We think +that such an expression would be problematic once both column and row major +matrixes are supported: depending on the memory layout, either accessing columns +or rows can be done efficiently, but not both. Instead, we propose to provide +builtins to extract rows and columns from a matrix. This makes the operations +more explicit. + +Matrix Type Binary Operators +---------------------------- + +Each matrix type supports the following binary operators: ``+``, ``-`` and ``*``. The ``*`` +operator provides matrix multiplication, while ``+`` and ``-`` are performed +element-wise. There are also scalar versions of the operators, which take a +matrix type and the matrix element type. The operation is applied to all +elements of the matrix using the scalar value. + +For ``BIN_OP`` in ``+``, ``-``, ``*`` given the expression ``M1 BIN_OP M2`` where +at least one of ``M1`` or ``M2`` is of matrix type and, for `*`, the other is of +a real type: + +* The usual arithmetic conversions are applied to ``M1`` and ``M2``. [ Note: if ``M1`` or + ``M2`` are of a real type, they are broadcast to matrices here. — end note ] +* ``M1`` and ``M2`` shall be of the same matrix type. +* The result is equivalent to Res in the following where col is the number of + columns and row is the number of rows in the matrix type: + +.. code-block:: c++ + + decltype(M1) Res; + for (int C = 0; C < col; ++C) + for (int R = 0; R < row; ++R) + Res[R][C] = M1[R][C] BIN_OP M2[R][C]; + +Given the expression ``M1 * M2`` where ``M1`` and ``M2`` are of matrix type: + +* The usual arithmetic conversions are applied to ``M1`` and ``M2``. +* The type of ``M1`` shall have the same number of columns as the type of ``M2`` has + rows. The element types of ``M1`` and ``M2`` shall be the same type. +* The resulting type, ``MTy``, is a matrix type with the common element type, + the number of rows of ``M1`` and the number of columns of ``M2``. +* The result is equivalent to ``Res`` in the following where ``EltTy`` is the + element type of ``MTy``, ``col`` is the number of columns, ``row`` is the + number of rows in ``MTy`` and ``inner`` is the number of columns of ``M1``: + +.. code-block:: c++ + + MTy Res; + for (int C = 0; C < col; ++C) { + for (int R = 0; R < row; ++R) { + EltTy Elt = 0; + for (int K = 0; K < inner; ++K) { + Elt += M1[R][K] * M2[K][C]; + } + Res[R][C] = Elt; + } + +All operations on matrix types match the behavior of the element type with +respect to signed overflows. + +With respect to floating-point contraction, rounding and environment rules, +operations on matrix types match the behavior of the elementwise operations +in the corresponding expansions provided above. + +Operations on floating-point matrices have the same rounding and floating-point +environment behavior as ordinary floating-point operations in the expression's +context. For the purposes of floating-point contraction, all calculations done +as part of a matrix operation are considered intermediate operations, and their +results need not be rounded to the format of the element type until the final +result in the containing expression. This is subject to the normal restrictions +on contraction, such as ``#pragma STDC FP_CONTRACT``. + +For the ``+=``, ``-=`` and ``*=`` operators the semantics match their expanded +variants. + +Matrix Type Builtin Operations +------------------------------ + +Each matrix type supports a collection of builtin expressions that look like +function calls but do not form an overload set. Here they are described as +function declarations with rules for how to construct the argument list types +and return type and the library description elements from +[library.description.structure.specifications]/3 in the C++ standard. + +Definitions: + +* *M*, *M1*, *M2*, *M3* - Matrix types +* *T* - Element type +* *row*, *col* - Row and column arguments respectively. + + +``M2 __builtin_matrix_transpose(M1 matrix)`` + +**Remarks**: The return type is a cv-unqualified matrix type that has the same +element type as ``M1`` and has the the same number of rows as ``M1`` has columns and +the same number of columns as ``M1`` has rows. + +**Returns**: A matrix ``Res`` equivalent to the code below, where ``col`` refers to the +number of columns of ``M``, and ``row`` to the number of rows of ``M``. + +**Effects**: Equivalent to: + +.. code-block:: c++ + + M Res; + for (int C = 0; C < col; ++C) + for (int R = 0; R < row; ++R) + Res[C][R] = matrix[R][C]; + + +``M __builtin_matrix_column_major_load(T *ptr, size_t row, size_t col, size_t columnStride)`` + +**Mandates**: ``row`` and ``col`` shall be integral constants greater than 0. + +**Preconditions**: ``columnStride`` is greater than or equal to ``row``. + +**Remarks**: The return type is a cv-unqualified matrix type with an element +type of the cv-unqualified version of ``T`` and a number of rows and columns equal +to ``row`` and ``col`` respectively. The parameter ``columnStride`` is optional +and if omitted ``row`` is used as ``columnStride``. + +**Returns**: A matrix ``Res`` equivalent to: + +.. code-block:: c++ + + M Res; + for (size_t C = 0; C < col; ++C) { + for (size_t R = 0; R < row; ++K) + Res[R][C] = ptr[R]; + ptr += columnStride + } + + +``void __builtin_matrix_column_major_store(M matrix, T *ptr, size_t columnStride)`` + +**Preconditions**: ``columnStride`` is greater than or equal to the number of rows in ``M``. + +**Remarks**: The type ``T`` is the const-unqualified version of the matrix +argument’s element type. The paramter ``columnStride`` is optional and if +ommitted, the number of rows of ``M`` is used as ``columnStride``. + +**Effects**: Equivalent to: + +.. code-block:: c++ + + for (size_t C = 0; C < columns in M; ++C) { + for (size_t R = 0; R < rows in M; ++K) + ptr[R] = matrix[R][C]; + ptr += columnStride + } + + +TODOs +----- + +TODO: Does it make sense to allow M::element_type, M::rows, and M::columns +where M is a matrix type? We don’t support this anywhere else, but it’s +convenient. The alternative is using template deduction to extract this +information. Also add spelling for C. + +Future Work: Initialization syntax. + + +Decisions for the Implementation in Clang +========================================= + +This section details decisions taken for the implementation in Clang and is not +part of the draft specification. + +The elements of a value of a matrix type are laid out in column-major order +without padding. + +We propose to provide a Clang option to override this behavior and allow +contraction of those operations (e.g. *-ffp-contract=matrix*). + +TODO: Specify how matrix values are passed to functions. diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 88edf0092dc5d..a35c966a8b4f6 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -64,12 +64,22 @@ Non-comprehensive list of changes in this release - For the ARM target, C-language intrinsics ```` for the CDE instruction set are now provided. -* clang adds support for a set of extended integer types (``_ExtInt(N)``) that +- clang adds support for a set of extended integer types (``_ExtInt(N)``) that permit non-power of 2 integers, exposing the LLVM integer types. Since a major motivating use case for these types is to limit 'bit' usage, these types don't automatically promote to 'int' when operations are done between two ``ExtInt(N)`` types, instead math occurs at the size of the largest ``ExtInt(N)`` type. +- Users of UBSan, PGO, and coverage on Windows will now need to add clang's + library resource directory to their library search path. These features all + use runtime libraries, and Clang provides these libraries in its resource + directory. For example, if LLVM is installed in ``C:\Program Files\LLVM``, + then the profile runtime library will appear at + ``C:\Program Files\LLVM\lib\clang\11.0.0\lib\windows\clang_rt.profile-x86_64.lib``. + To ensure that the linker can find the appropriate library, users should pass + ``/LIBPATH:C:\Program Files\LLVM\lib\clang\11.0.0\lib\windows`` to the + linker. If the user links the program with the ``clang`` or ``clang-cl`` + drivers, the driver will pass this flag for them. New Compiler Flags @@ -117,6 +127,9 @@ Modified Compiler Flags ``-f[no-]sanitize-recover=undefined,integer`` and is no longer deprecated. - The argument to ``-f[no-]sanitize-trap=...`` is now optional and defaults to ``all``. +- ``-fno-char8_t`` now disables the ``char8_t`` keyword, not just the use of + ``char8_t`` as the character type of ``u8`` literals. This restores the + Clang 8 behavior that regressed in Clang 9 and 10. New Pragmas in Clang -------------------- diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 60319833c901b..97ba2aad575ee 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -3668,3 +3668,56 @@ This option is intended to be used as a temporary means to build projects where clang-cl cannot successfully compile all the files. clang-cl may fail to compile a file either because it cannot generate code for some C++ feature, or because it cannot parse some Microsoft language extension. + +Finding Clang runtime libraries +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +clang-cl supports several features that require runtime library support: + +- Address Sanitizer (ASan): ``-fsanitize=address`` +- Undefined Behavior Sanitizer (UBSan): ``-fsanitize=undefined`` +- Code coverage: ``-fprofile-instr-generate -fcoverage-mapping`` +- Profile Guided Optimization (PGO): ``-fprofile-instr-generate`` +- Certain math operations (int128 division) require the builtins library + +In order to use these features, the user must link the right runtime libraries +into their program. These libraries are distributed alongside Clang in the +library resource directory. Clang searches for the resource directory by +searching relative to the Clang executable. For example, if LLVM is installed +in ``C:\Program Files\LLVM``, then the profile runtime library will be located +at the path +``C:\Program Files\LLVM\lib\clang\11.0.0\lib\windows\clang_rt.profile-x86_64.lib``. + +For UBSan, PGO, and coverage, Clang will emit object files that auto-link the +appropriate runtime library, but the user generally needs to help the linker +(whether it is ``lld-link.exe`` or MSVC ``link.exe``) find the library resource +directory. Using the example installation above, this would mean passing +``/LIBPATH:C:\Program Files\LLVM\lib\clang\11.0.0\lib\windows`` to the linker. +If the user links the program with the ``clang`` or ``clang-cl`` drivers, the +driver will pass this flag for them. + +If the linker cannot find the appropriate library, it will emit an error like +this:: + + $ clang-cl -c -fsanitize=undefined t.cpp + + $ lld-link t.obj -dll + lld-link: error: could not open 'clang_rt.ubsan_standalone-x86_64.lib': no such file or directory + lld-link: error: could not open 'clang_rt.ubsan_standalone_cxx-x86_64.lib': no such file or directory + + $ link t.obj -dll -nologo + LINK : fatal error LNK1104: cannot open file 'clang_rt.ubsan_standalone-x86_64.lib' + +To fix the error, add the appropriate ``/libpath:`` flag to the link line. + +For ASan, as of this writing, the user is also responsible for linking against +the correct ASan libraries. + +If the user is using the dynamic CRT (``/MD``), then they should add +``clang_rt.asan_dynamic-x86_64.lib`` to the link line as a regular input. For +other architectures, replace x86_64 with the appropriate name here and below. + +If the user is using the static CRT (``/MT``), then different runtimes are used +to produce DLLs and EXEs. To link a DLL, pass +``clang_rt.asan_dll_thunk-x86_64.lib``. To link an EXE, pass +``-wholearchive:clang_rt.asan-x86_64.lib``. diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 82b8a51b9551d..cd7021a5884be 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -3002,7 +3002,7 @@ OPT_LIST(V) private: /// All OMPTraitInfo objects live in this collection, one per /// `pragma omp [begin] declare variant` directive. - SmallVector OMPTraitInfoVector; + SmallVector, 4> OMPTraitInfoVector; }; /// Utility function for constructing a nullary selector. diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 2af1189511a30..7db74e0803cec 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -3961,6 +3961,11 @@ class RecordDecl : public TagDecl { return cast_or_null(TagDecl::getDefinition()); } + /// Returns whether this record is a union, or contains (at any nesting level) + /// a union member. This is used by CMSE to warn about possible information + /// leaks. + bool isOrContainsUnion() const; + // Iterator access to field members. The field iterator only visits // the non-static data members of this class, ignoring any static // data members, functions, constructors, destructors, etc. diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index f276611e3d0c2..0b49209fb127d 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -7127,6 +7127,131 @@ class OMPExclusiveClause final } }; +/// This represents clause 'uses_allocators' in the '#pragma omp target'-based +/// directives. +/// +/// \code +/// #pragma omp target uses_allocators(default_allocator, my_allocator(traits)) +/// \endcode +/// In this example directive '#pragma omp target' has clause 'uses_allocators' +/// with the allocators 'default_allocator' and user-defined 'my_allocator'. +class OMPUsesAllocatorsClause final + : public OMPClause, + private llvm::TrailingObjects { +public: + /// Data for list of allocators. + struct Data { + /// Allocator. + Expr *Allocator = nullptr; + /// Allocator traits. + Expr *AllocatorTraits = nullptr; + /// Locations of '(' and ')' symbols. + SourceLocation LParenLoc, RParenLoc; + }; + +private: + friend class OMPClauseReader; + friend TrailingObjects; + + enum class ExprOffsets { + Allocator, + AllocatorTraits, + Total, + }; + + enum class ParenLocsOffsets { + LParen, + RParen, + Total, + }; + + /// Location of '('. + SourceLocation LParenLoc; + /// Total number of allocators in the clause. + unsigned NumOfAllocators = 0; + + /// Build clause. + /// + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + /// \param N Number of allocators asssociated with the clause. + OMPUsesAllocatorsClause(SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc, unsigned N) + : OMPClause(llvm::omp::OMPC_uses_allocators, StartLoc, EndLoc), + LParenLoc(LParenLoc), NumOfAllocators(N) {} + + /// Build an empty clause. + /// \param N Number of allocators asssociated with the clause. + /// + explicit OMPUsesAllocatorsClause(unsigned N) + : OMPClause(llvm::omp::OMPC_uses_allocators, SourceLocation(), + SourceLocation()), + NumOfAllocators(N) {} + + unsigned numTrailingObjects(OverloadToken) const { + return NumOfAllocators * static_cast(ExprOffsets::Total); + } + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Sets the allocators data for the clause. + void setAllocatorsData(ArrayRef Data); + +public: + /// Creates clause with a list of allocators \p Data. + /// + /// \param C AST context. + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + /// \param Data List of allocators. + static OMPUsesAllocatorsClause * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc, ArrayRef Data); + + /// Creates an empty clause with the place for \p N allocators. + /// + /// \param C AST context. + /// \param N The number of allocators. + static OMPUsesAllocatorsClause *CreateEmpty(const ASTContext &C, unsigned N); + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns number of allocators associated with the clause. + unsigned getNumberOfAllocators() const { return NumOfAllocators; } + + /// Returns data for the specified allocator. + OMPUsesAllocatorsClause::Data getAllocatorData(unsigned I) const; + + // Iterators + child_range children() { + Stmt **Begin = reinterpret_cast(getTrailingObjects()); + return child_range(Begin, Begin + NumOfAllocators * + static_cast(ExprOffsets::Total)); + } + const_child_range children() const { + Stmt *const *Begin = + reinterpret_cast(getTrailingObjects()); + return const_child_range( + Begin, Begin + NumOfAllocators * static_cast(ExprOffsets::Total)); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_uses_allocators; + } +}; + /// This class implements a simple visitor for OMPClause /// subclasses. template class Ptr, typename RetTy> diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index b71f7994e2fac..0680a8f70212e 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3522,6 +3522,17 @@ bool RecursiveASTVisitor::VisitOMPDetachClause(OMPDetachClause *C) { return true; } +template +bool RecursiveASTVisitor::VisitOMPUsesAllocatorsClause( + OMPUsesAllocatorsClause *C) { + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { + const OMPUsesAllocatorsClause::Data Data = C->getAllocatorData(I); + TRY_TO(TraverseStmt(Data.Allocator)); + TRY_TO(TraverseStmt(Data.AllocatorTraits)); + } + return true; +} + // FIXME: look at the following tricky-seeming exprs to see if we // need to recurse on anything. These are ones that have methods // returning decls or qualtypes or nestednamespecifier -- though I'm diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index b390bf0042f97..bd87eafc90348 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -356,6 +356,9 @@ class OMPExecutableDirective : public Stmt { /// class OMPParallelDirective : public OMPExecutableDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if the construct has inner cancel directive. bool HasCancel; @@ -381,6 +384,9 @@ class OMPParallelDirective : public OMPExecutableDirective { SourceLocation(), NumClauses, 1), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -392,11 +398,14 @@ class OMPParallelDirective : public OMPExecutableDirective { /// \param EndLoc Ending Location of the directive. /// \param Clauses List of clauses. /// \param AssociatedStmt Statement associated with the directive. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if this directive has inner cancel directive. /// static OMPParallelDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel); + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place for \a N clauses. /// @@ -406,6 +415,10 @@ class OMPParallelDirective : public OMPExecutableDirective { static OMPParallelDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -1258,7 +1271,9 @@ class OMPSimdDirective : public OMPLoopDirective { /// class OMPForDirective : public OMPLoopDirective { friend class ASTStmtReader; - + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if current directive has inner cancel directive. bool HasCancel; @@ -1286,6 +1301,9 @@ class OMPForDirective : public OMPLoopDirective { NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -1299,13 +1317,15 @@ class OMPForDirective : public OMPLoopDirective { /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if current directive has inner cancel directive. /// static OMPForDirective *Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, const HelperExprs &Exprs, - bool HasCancel); + Expr *TaskRedRef, bool HasCancel); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -1317,6 +1337,10 @@ class OMPForDirective : public OMPLoopDirective { static OMPForDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -1403,6 +1427,9 @@ class OMPForSimdDirective : public OMPLoopDirective { class OMPSectionsDirective : public OMPExecutableDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if current directive has inner cancel directive. bool HasCancel; @@ -1429,6 +1456,9 @@ class OMPSectionsDirective : public OMPExecutableDirective { SourceLocation(), NumClauses, 1), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -1440,11 +1470,14 @@ class OMPSectionsDirective : public OMPExecutableDirective { /// \param EndLoc Ending Location of the directive. /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if current directive has inner directive. /// static OMPSectionsDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel); + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place for \a NumClauses /// clauses. @@ -1455,6 +1488,10 @@ class OMPSectionsDirective : public OMPExecutableDirective { static OMPSectionsDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -1715,6 +1752,9 @@ class OMPCriticalDirective : public OMPExecutableDirective { class OMPParallelForDirective : public OMPLoopDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if current region has inner cancel directive. bool HasCancel; @@ -1743,6 +1783,9 @@ class OMPParallelForDirective : public OMPLoopDirective { SourceLocation(), CollapsedNum, NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -1756,12 +1799,15 @@ class OMPParallelForDirective : public OMPLoopDirective { /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if current directive has inner cancel directive. /// static OMPParallelForDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel); + Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -1775,6 +1821,10 @@ class OMPParallelForDirective : public OMPLoopDirective { unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -1863,6 +1913,10 @@ class OMPParallelForSimdDirective : public OMPLoopDirective { class OMPParallelMasterDirective : public OMPExecutableDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; + OMPParallelMasterDirective(SourceLocation StartLoc, SourceLocation EndLoc, unsigned NumClauses) : OMPExecutableDirective(this, OMPParallelMasterDirectiveClass, @@ -1875,6 +1929,9 @@ class OMPParallelMasterDirective : public OMPExecutableDirective { SourceLocation(), SourceLocation(), NumClauses, 1) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + public: /// Creates directive with a list of \a Clauses. /// @@ -1883,10 +1940,12 @@ class OMPParallelMasterDirective : public OMPExecutableDirective { /// \param EndLoc Ending Location of the directive. /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// static OMPParallelMasterDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt); + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef); /// Creates an empty directive with the place for \a NumClauses /// clauses. @@ -1897,6 +1956,10 @@ class OMPParallelMasterDirective : public OMPExecutableDirective { static OMPParallelMasterDirective * CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + static bool classof(const Stmt *T) { return T->getStmtClass() == OMPParallelMasterDirectiveClass; } @@ -1914,6 +1977,9 @@ class OMPParallelMasterDirective : public OMPExecutableDirective { class OMPParallelSectionsDirective : public OMPExecutableDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if current directive has inner cancel directive. bool HasCancel; @@ -1941,6 +2007,9 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective { 1), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -1952,11 +2021,14 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective { /// \param EndLoc Ending Location of the directive. /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if current directive has inner cancel directive. /// static OMPParallelSectionsDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel); + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place for \a NumClauses /// clauses. @@ -1967,6 +2039,10 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective { static OMPParallelSectionsDirective * CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -2805,6 +2881,12 @@ class OMPTargetExitDataDirective : public OMPExecutableDirective { /// class OMPTargetParallelDirective : public OMPExecutableDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; + /// true if the construct has inner cancel directive. + bool HasCancel = false; + /// Build directive with the given start and end location. /// /// \param StartLoc Starting location of the directive kind. @@ -2827,6 +2909,11 @@ class OMPTargetParallelDirective : public OMPExecutableDirective { SourceLocation(), SourceLocation(), NumClauses, /*NumChildren=*/1) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. + void setHasCancel(bool Has) { HasCancel = Has; } + public: /// Creates directive with a list of \a Clauses. /// @@ -2835,10 +2922,14 @@ class OMPTargetParallelDirective : public OMPExecutableDirective { /// \param EndLoc Ending Location of the directive. /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. + /// \param HasCancel true if this directive has inner cancel directive. /// static OMPTargetParallelDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt); + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place for \a NumClauses /// clauses. @@ -2849,6 +2940,13 @@ class OMPTargetParallelDirective : public OMPExecutableDirective { static OMPTargetParallelDirective * CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + + /// Return true if current directive has inner cancel directive. + bool hasCancel() const { return HasCancel; } + static bool classof(const Stmt *T) { return T->getStmtClass() == OMPTargetParallelDirectiveClass; } @@ -2866,6 +2964,9 @@ class OMPTargetParallelDirective : public OMPExecutableDirective { class OMPTargetParallelForDirective : public OMPLoopDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if current region has inner cancel directive. bool HasCancel; @@ -2895,6 +2996,9 @@ class OMPTargetParallelForDirective : public OMPLoopDirective { SourceLocation(), CollapsedNum, NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -2908,12 +3012,15 @@ class OMPTargetParallelForDirective : public OMPLoopDirective { /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if current directive has inner cancel directive. /// static OMPTargetParallelForDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel); + Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -2927,6 +3034,10 @@ class OMPTargetParallelForDirective : public OMPLoopDirective { unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -3699,6 +3810,9 @@ class OMPTargetUpdateDirective : public OMPExecutableDirective { /// class OMPDistributeParallelForDirective : public OMPLoopDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if the construct has inner cancel directive. bool HasCancel = false; @@ -3730,6 +3844,9 @@ class OMPDistributeParallelForDirective : public OMPLoopDirective { NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -3743,12 +3860,15 @@ class OMPDistributeParallelForDirective : public OMPLoopDirective { /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if this directive has inner cancel directive. /// static OMPDistributeParallelForDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel); + Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place /// for \a NumClauses clauses. @@ -3762,6 +3882,10 @@ class OMPDistributeParallelForDirective : public OMPLoopDirective { unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -4264,6 +4388,9 @@ class OMPTeamsDistributeParallelForSimdDirective final /// class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if the construct has inner cancel directive. bool HasCancel = false; @@ -4296,6 +4423,9 @@ class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective { NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -4309,12 +4439,15 @@ class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective { /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if this directive has inner cancel directive. /// static OMPTeamsDistributeParallelForDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel); + Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place for \a NumClauses clauses. /// @@ -4326,6 +4459,10 @@ class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective { CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } @@ -4473,6 +4610,9 @@ class OMPTargetTeamsDistributeDirective final : public OMPLoopDirective { class OMPTargetTeamsDistributeParallelForDirective final : public OMPLoopDirective { friend class ASTStmtReader; + /// Special reference expression for handling task reduction. Used to store + /// the taskgroup descriptor returned by the runtime functions. + Expr *TaskRedRef = nullptr; /// true if the construct has inner cancel directive. bool HasCancel = false; @@ -4506,6 +4646,9 @@ class OMPTargetTeamsDistributeParallelForDirective final SourceLocation(), SourceLocation(), CollapsedNum, NumClauses), HasCancel(false) {} + /// Sets special task reduction descriptor. + void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; } + /// Set cancel state. void setHasCancel(bool Has) { HasCancel = Has; } @@ -4519,12 +4662,15 @@ class OMPTargetTeamsDistributeParallelForDirective final /// \param Clauses List of clauses. /// \param AssociatedStmt Statement, associated with the directive. /// \param Exprs Helper expressions for CodeGen. + /// \param TaskRedRef Task reduction special reference expression to handle + /// taskgroup descriptor. /// \param HasCancel true if this directive has inner cancel directive. /// static OMPTargetTeamsDistributeParallelForDirective * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, - Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel); + Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef, + bool HasCancel); /// Creates an empty directive with the place for \a NumClauses clauses. /// @@ -4536,6 +4682,10 @@ class OMPTargetTeamsDistributeParallelForDirective final CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum, EmptyShell); + /// Returns special task reduction reference expression. + Expr *getTaskReductionRefExpr() { return TaskRedRef; } + const Expr *getTaskReductionRefExpr() const { return TaskRedRef; } + /// Return true if current directive has inner cancel directive. bool hasCancel() const { return HasCancel; } diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 9e96559f0e925..5aade19ddcc3b 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -3523,13 +3523,12 @@ class FunctionType : public Type { enum { NoReturnMask = 0x20 }; enum { ProducesResultMask = 0x40 }; enum { NoCallerSavedRegsMask = 0x80 }; - enum { NoCfCheckMask = 0x800 }; - enum { CmseNSCallMask = 0x1000 }; enum { - RegParmMask = ~(CallConvMask | NoReturnMask | ProducesResultMask | - NoCallerSavedRegsMask | NoCfCheckMask | CmseNSCallMask), + RegParmMask = 0x700, RegParmOffset = 8 - }; // Assumed to be the last field + }; + enum { NoCfCheckMask = 0x800 }; + enum { CmseNSCallMask = 0x1000 }; uint16_t Bits = CC_C; ExtInfo(unsigned Bits) : Bits(static_cast(Bits)) {} @@ -3562,7 +3561,7 @@ class FunctionType : public Type { bool getCmseNSCall() const { return Bits & CmseNSCallMask; } bool getNoCallerSavedRegs() const { return Bits & NoCallerSavedRegsMask; } bool getNoCfCheck() const { return Bits & NoCfCheckMask; } - bool getHasRegParm() const { return (Bits >> RegParmOffset) != 0; } + bool getHasRegParm() const { return ((Bits & RegParmMask) >> RegParmOffset) != 0; } unsigned getRegParm() const { unsigned RegParm = (Bits & RegParmMask) >> RegParmOffset; diff --git a/clang/include/clang/Analysis/Analyses/LiveVariables.h b/clang/include/clang/Analysis/Analyses/LiveVariables.h index a46c35ee5b309..2e7dd5d81678a 100644 --- a/clang/include/clang/Analysis/Analyses/LiveVariables.h +++ b/clang/include/clang/Analysis/Analyses/LiveVariables.h @@ -70,8 +70,8 @@ class LiveVariables : public ManagedAnalysis { ~LiveVariables() override; /// Compute the liveness information for a given CFG. - static LiveVariables *computeLiveness(AnalysisDeclContext &analysisContext, - bool killAtAssign); + static std::unique_ptr + computeLiveness(AnalysisDeclContext &analysisContext, bool killAtAssign); /// Return true if a variable is live at the end of a /// specified block. @@ -97,7 +97,8 @@ class LiveVariables : public ManagedAnalysis { void runOnAllBlocks(Observer &obs); - static LiveVariables *create(AnalysisDeclContext &analysisContext) { + static std::unique_ptr + create(AnalysisDeclContext &analysisContext) { return computeLiveness(analysisContext, true); } @@ -110,7 +111,8 @@ class LiveVariables : public ManagedAnalysis { class RelaxedLiveVariables : public LiveVariables { public: - static LiveVariables *create(AnalysisDeclContext &analysisContext) { + static std::unique_ptr + create(AnalysisDeclContext &analysisContext) { return computeLiveness(analysisContext, false); } diff --git a/clang/include/clang/Analysis/Analyses/PostOrderCFGView.h b/clang/include/clang/Analysis/Analyses/PostOrderCFGView.h index 08fda0982df43..1000298945602 100644 --- a/clang/include/clang/Analysis/Analyses/PostOrderCFGView.h +++ b/clang/include/clang/Analysis/Analyses/PostOrderCFGView.h @@ -108,7 +108,8 @@ class PostOrderCFGView : public ManagedAnalysis { // Used by AnalyisContext to construct this object. static const void *getTag(); - static PostOrderCFGView *create(AnalysisDeclContext &analysisContext); + static std::unique_ptr + create(AnalysisDeclContext &analysisContext); }; } // namespace clang diff --git a/clang/include/clang/Analysis/AnalysisDeclContext.h b/clang/include/clang/Analysis/AnalysisDeclContext.h index ed554feedead6..6fe1e27bda75e 100644 --- a/clang/include/clang/Analysis/AnalysisDeclContext.h +++ b/clang/include/clang/Analysis/AnalysisDeclContext.h @@ -191,18 +191,17 @@ class AnalysisDeclContext { /// necessary or nullptr if the analysis could not run. template T *getAnalysis() { const void *tag = T::getTag(); - ManagedAnalysis *&data = getAnalysisImpl(tag); - if (!data) { + std::unique_ptr &data = getAnalysisImpl(tag); + if (!data) data = T::create(*this); - } - return static_cast(data); + return static_cast(data.get()); } /// \returns Whether the root namespace of \p D is the \c std C++ namespace. static bool isInStdNamespace(const Decl *D); private: - ManagedAnalysis *&getAnalysisImpl(const void *tag); + std::unique_ptr &getAnalysisImpl(const void *tag); LocationContextManager &getLocationContextManager(); }; diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 5fb7a603fe17b..0d66535fa8ec1 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -153,8 +153,6 @@ TARGET_BUILTIN(__builtin_wasm_qfms_f64x2, "V2dV2dV2dV2d", "nc", "unimplemented-s TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128") -TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128") TARGET_BUILTIN(__builtin_wasm_narrow_s_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128") diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 87fdfc89c634d..9df3e79d183fc 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -61,6 +61,7 @@ def note_fe_backend_invalid_loc : Note<"could " "not determine the original source location for %0:%1:%2">, BackendInfo; def err_fe_backend_unsupported : Error<"%0">, BackendInfo; +def warn_fe_backend_unsupported : Warning<"%0">, BackendInfo; def err_fe_invalid_code_complete_file : Error< "cannot locate code-completion file %0">, DefaultFatal; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9153c35727a65..f4f20dfe13966 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3000,6 +3000,11 @@ def warn_objc_collection_literal_element : Warning< "object of type %0 is not compatible with " "%select{array element type|dictionary key type|dictionary value type}1 %2">, InGroup; +def warn_nsdictionary_duplicate_key : Warning< + "duplicate key in dictionary literal">, + InGroup>; +def note_nsdictionary_duplicate_key_here : Note< + "previous equal key is here">; def err_swift_param_attr_not_swiftcall : Error< "'%0' parameter can only be used with swiftcall calling convention">; def err_swift_indirect_result_not_first : Error< @@ -3164,6 +3169,10 @@ def warn_weak_identifier_undeclared : Warning< def warn_attribute_cmse_entry_static : Warning< "'cmse_nonsecure_entry' cannot be applied to functions with internal linkage">, InGroup; +def warn_cmse_nonsecure_union : Warning< + "passing union across security boundary via %select{parameter %1|return value}0 " + "may leak information">, + InGroup>; def err_attribute_weak_static : Error< "weak declaration cannot have internal linkage">; def err_attribute_selectany_non_extern_data : Error< @@ -3377,7 +3386,7 @@ def warn_thread_attribute_argument_not_lockable : Warning< InGroup, DefaultIgnore; def warn_thread_attribute_decl_not_lockable : Warning< "%0 attribute can only be applied in a context annotated " - "with 'capability(\"mutex\")' attribute">, + "with 'capability' attribute">, InGroup, DefaultIgnore; def warn_thread_attribute_decl_not_pointer : Warning< "%0 only applies to pointer types; type here is %1">, @@ -10258,8 +10267,8 @@ def note_omp_flush_order_clause_here : Note< "memory order clause '%0' is specified here">; def err_omp_non_lvalue_in_map_or_motion_clauses: Error< "expected addressable lvalue in '%0' clause">; -def err_omp_event_var_expected : Error< - "expected variable of the 'omp_event_handle_t' type%select{|, not %1}0">; +def err_omp_var_expected : Error< + "expected variable of the '%0' type%select{|, not %2}1">; def warn_nested_declare_variant : Warning<"nesting `omp begin/end declare variant` is not supported yet; " "nested context ignored">, @@ -10267,6 +10276,22 @@ def warn_nested_declare_variant def err_omp_non_pointer_type_array_shaping_base : Error< "expected expression with a pointer to a complete type as a base of an array " "shaping operation">; +def err_omp_reduction_task_not_parallel_or_worksharing : Error< + "'reduction' clause with 'task' modifier allowed only on non-simd parallel or" + " worksharing constructs">; +def err_omp_expected_array_alloctraits : Error< + "expected constant sized array of 'omp_alloctrait_t' elements, not %0">; +def err_omp_predefined_allocator_with_traits : Error< + "predefined allocator cannot have traits specified">; +def note_omp_predefined_allocator : Note< + "predefined trait '%0' used here">; +def err_omp_nonpredefined_allocator_without_traits : Error< + "non-predefined allocator must have traits specified">; +def err_omp_allocator_used_in_clauses : Error< + "allocators used in 'uses_allocators' clause cannot appear in other " + "data-sharing or data-mapping attribute clauses">; +def err_omp_allocator_not_in_uses_allocators : Error< + "allocator must be specified in the 'uses_allocators' clause">; } // end of OpenMP category let CategoryName = "Related Result Type Issue" in { @@ -10335,11 +10360,6 @@ def err_module_unimported_use : Error< "explicit specialization|partial specialization}0 of %1 must be imported " "from module '%2' before it is required">; def err_module_unimported_use_header : Error< - "missing '#include %3'; " - "%select{declaration|definition|default argument|" - "explicit specialization|partial specialization}0 of %1 must be imported " - "from module '%2' before it is required">; -def err_module_unimported_use_global_module_fragment : Error< "%select{missing '#include'|missing '#include %3'}2; " "%select{||default argument of |explicit specialization of |" "partial specialization of }0%1 must be " @@ -10349,6 +10369,10 @@ def err_module_unimported_use_multiple : Error< "%select{declaration|definition|default argument|" "explicit specialization|partial specialization}0 of %1 must be imported " "from one of the following modules before it is required:%2">; +def note_unreachable_entity : Note< + "%select{declaration|definition|default argument declared|" + "explicit specialization declared|partial specialization declared}0 here " + "is not %select{visible|reachable|reachable|reachable|reachable|reachable}0">; def ext_module_import_in_extern_c : ExtWarn< "import of C++ module '%0' appears within extern \"C\" language linkage " "specification">, DefaultError, diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def index 0ae0bc844e36c..9f2bf1abc2875 100644 --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -148,6 +148,7 @@ OPENMP_ORDER_KIND(concurrent) // Modifiers for 'reduction' clause. OPENMP_REDUCTION_MODIFIER(default) OPENMP_REDUCTION_MODIFIER(inscan) +OPENMP_REDUCTION_MODIFIER(task) #undef OPENMP_REDUCTION_MODIFIER #undef OPENMP_DEVICE_MODIFIER diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 0a06ba3e5ecc7..bf07a8950f28e 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -242,6 +242,7 @@ namespace clang { bool isReverseCompare() const { return Flags & ReverseCompare; } bool isAppendSVALL() const { return Flags & IsAppendSVALL; } bool isInsertOp1SVALL() const { return Flags & IsInsertOp1SVALL; } + bool isGatherPrefetch() const { return Flags & IsGatherPrefetch; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index ab47954040719..910a4d6846aaa 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -547,6 +547,12 @@ class TargetInfo : public virtual TransferrableTargetInfo, return (getPointerWidth(0) >= 64) || getTargetOpts().ForceEnableInt128; } // FIXME + /// Determine whether the _ExtInt type is supported on this target. This + /// limitation is put into place for ABI reasons. + virtual bool hasExtIntType() const { + return false; + } + /// Determine whether _Float16 is supported on this target. virtual bool hasLegalHalfType() const { return HasLegalHalfType; } diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 15a69e2a366a5..e828fcc5a0043 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -389,9 +389,10 @@ MODULES_KEYWORD(module) MODULES_KEYWORD(import) // C++20 keywords. -CXX20_KEYWORD(char8_t , CHAR8SUPPORT) CXX20_KEYWORD(consteval , 0) CXX20_KEYWORD(constinit , 0) +// Not a CXX20_KEYWORD because it is disabled by -fno-char8_t. +KEYWORD(char8_t , CHAR8SUPPORT) // C11 Extension KEYWORD(_Float16 , KEYALL) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a5cacd2103a82..2d2a09d4524d2 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -73,7 +73,11 @@ // // w: vector of element type promoted to 64bits, vector maintains // signedness of its element type. +// f: element type promoted to uint64_t (splat to vector type) // j: element type promoted to 64bits (splat to vector type) +// K: element type bitcast to a signed integer (splat to vector type) +// L: element type bitcast to an unsigned integer (splat to vector type) +// // i: constant uint64_t // k: int32_t // l: int64_t @@ -98,6 +102,8 @@ // G: pointer to uint32_t // H: pointer to uint64_t +// Q: const pointer to void + // S: const pointer to int8_t // T: const pointer to int16_t // U: const pointer to int32_t @@ -185,6 +191,7 @@ def IsByteIndexed : FlagType<0x01000000>; def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand. def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand. def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches. +def IsGatherPrefetch : FlagType<0x10000000>; def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped. // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h @@ -569,7 +576,47 @@ def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPclJ", "s", [IsPrefetch], MemEltTyInt16 def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPclJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">; def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPclJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">; +// Prefetch (Vector bases) +def SVPRFB_GATHER_BASES : MInst<"svprfb_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_scalar_offset">; +def SVPRFH_GATHER_BASES : MInst<"svprfh_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">; +def SVPRFW_GATHER_BASES : MInst<"svprfw_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">; +def SVPRFD_GATHER_BASES : MInst<"svprfd_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">; + +// Prefetch (Scalar base, Vector offsets) +def SVPRFB_GATHER_32B_OFFSETS_S : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_sxtw_index">; +def SVPRFH_GATHER_32B_OFFSETS_S : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_sxtw_index">; +def SVPRFW_GATHER_32B_OFFSETS_S : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_sxtw_index">; +def SVPRFD_GATHER_32B_OFFSETS_S : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_sxtw_index">; + +def SVPRFB_GATHER_64B_OFFSETS_S : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_index">; +def SVPRFH_GATHER_64B_OFFSETS_S : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_index">; +def SVPRFW_GATHER_64B_OFFSETS_S : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_index">; +def SVPRFD_GATHER_64B_OFFSETS_S : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_index">; + +def SVPRFB_GATHER_32B_OFFSETS_U : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_uxtw_index">; +def SVPRFH_GATHER_32B_OFFSETS_U : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_uxtw_index">; +def SVPRFW_GATHER_32B_OFFSETS_U : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_uxtw_index">; +def SVPRFD_GATHER_32B_OFFSETS_U : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_uxtw_index">; + +def SVPRFB_GATHER_64B_OFFSETS_U : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_index">; +def SVPRFH_GATHER_64B_OFFSETS_U : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_index">; +def SVPRFW_GATHER_64B_OFFSETS_U : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_index">; +def SVPRFD_GATHER_64B_OFFSETS_U : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_index">; + +// Prefetch (Vector bases, scalar offset) +def SVPRFB_GATHER_BASES_OFFSET : MInst<"svprfb_gather[_{2}base]_offset", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_scalar_offset">; +def SVPRFH_GATHER_BASES_OFFSET : MInst<"svprfh_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">; +def SVPRFW_GATHER_BASES_OFFSET : MInst<"svprfw_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">; +def SVPRFD_GATHER_BASES_OFFSET : MInst<"svprfd_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">; + //////////////////////////////////////////////////////////////////////////////// +// Scalar to vector + +def SVDUPQ_8 : SInst<"svdupq[_n]_{d}", "dssssssssssssssss", "cUc", MergeNone>; +def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "dssssssss", "sUsh", MergeNone>; +def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "dssss", "iUif", MergeNone>; +def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>; + // Integer arithmetic multiclass SInstZPZ flags=[]> { @@ -609,14 +656,75 @@ defm SVMULH_S : SInstZPZZ<"svmulh", "csil", "aarch64_sve_smulh">; defm SVMULH_U : SInstZPZZ<"svmulh", "UcUsUiUl", "aarch64_sve_umulh">; defm SVSUB : SInstZPZZ<"svsub", "csilUcUsUiUl", "aarch64_sve_sub">; defm SVSUBR : SInstZPZZ<"svsubr", "csilUcUsUiUl", "aarch64_sve_subr">; + +//------------------------------------------------------------------------------ + +multiclass SInstZPZZZ flags=[]> { + def _M : SInst; + def _X : SInst; + def _Z : SInst; + + def _N_M : SInst; + def _N_X : SInst; + def _N_Z : SInst; +} + +defm SVMAD : SInstZPZZZ<"svmad", "csilUcUsUiUl", "aarch64_sve_mad">; +defm SVMLA : SInstZPZZZ<"svmla", "csilUcUsUiUl", "aarch64_sve_mla">; +defm SVMLS : SInstZPZZZ<"svmls", "csilUcUsUiUl", "aarch64_sve_mls">; +defm SVMSB : SInstZPZZZ<"svmsb", "csilUcUsUiUl", "aarch64_sve_msb">; + //////////////////////////////////////////////////////////////////////////////// -// Permutations and selection -def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; +// Logical operations + +defm SVAND : SInstZPZZ<"svand", "csilUcUsUiUl", "aarch64_sve_and">; +defm SVBIC : SInstZPZZ<"svbic", "csilUcUsUiUl", "aarch64_sve_bic">; +defm SVEOR : SInstZPZZ<"sveor", "csilUcUsUiUl", "aarch64_sve_eor">; +defm SVORR : SInstZPZZ<"svorr", "csilUcUsUiUl", "aarch64_sve_orr">; + +defm SVCNOT : SInstZPZ<"svcnot", "csilUcUsUiUl", "aarch64_sve_cnot">; +defm SVNOT : SInstZPZ<"svnot", "csilUcUsUiUl", "aarch64_sve_not">; //////////////////////////////////////////////////////////////////////////////// // Shifts + +multiclass SInst_SHIFT { + def _M : SInst; + def _X : SInst; + def _Z : SInst; + + def _N_M : SInst; + def _N_X : SInst; + def _N_Z : SInst; + + def _WIDE_M : SInst; + def _WIDE_X : SInst; + def _WIDE_Z : SInst; + + def _WIDE_N_M : SInst; + def _WIDE_N_X : SInst; + def _WIDE_N_Z : SInst; +} + +defm SVASR : SInst_SHIFT<"svasr", "aarch64_sve_asr", "csil", "csi">; +defm SVLSL : SInst_SHIFT<"svlsl", "aarch64_sve_lsl", "csilUcUsUiUl", "csiUcUsUi">; +defm SVLSR : SInst_SHIFT<"svlsr", "aarch64_sve_lsr", "UcUsUiUl", "UcUsUi">; + def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +//////////////////////////////////////////////////////////////////////////////// +// Integer reductions + +def SVADDV_S : SInst<"svaddv[_{d}]", "lPd", "csil", MergeNone, "aarch64_sve_saddv">; +def SVADDV_U : SInst<"svaddv[_{d}]", "nPd", "UcUsUiUl", MergeNone, "aarch64_sve_uaddv">; +def SVANDV : SInst<"svandv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_andv">; +def SVEORV : SInst<"sveorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorv">; +def SVMAXV_S : SInst<"svmaxv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_smaxv">; +def SVMAXV_U : SInst<"svmaxv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_umaxv">; +def SVMINV_S : SInst<"svminv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_sminv">; +def SVMINV_U : SInst<"svminv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_uminv">; +def SVORV : SInst<"svorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_orv">; + //////////////////////////////////////////////////////////////////////////////// // Integer comparisons @@ -688,17 +796,115 @@ def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNon def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile]>; def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile]>; +//////////////////////////////////////////////////////////////////////////////// +// Counting bit + +multiclass SInstCLS flags=[]> { + def _M : SInst; + def _X : SInst; + def _Z : SInst; +} + +defm SVCLS : SInstCLS<"svcls", "csil", "aarch64_sve_cls">; +defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl", "aarch64_sve_clz">; +defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfd", "aarch64_sve_cnt">; + +//////////////////////////////////////////////////////////////////////////////// +// Conversion + +defm SVEXTB_S : SInstZPZ<"svextb", "sil", "aarch64_sve_sxtb">; +defm SVEXTB_U : SInstZPZ<"svextb", "UsUiUl", "aarch64_sve_uxtb">; +defm SVEXTH_S : SInstZPZ<"svexth", "il", "aarch64_sve_sxth">; +defm SVEXTH_U : SInstZPZ<"svexth", "UiUl", "aarch64_sve_uxth">; +defm SVEXTW_S : SInstZPZ<"svextw", "l", "aarch64_sve_sxtw">; +defm SVEXTW_U : SInstZPZ<"svextw", "Ul", "aarch64_sve_uxtw">; + +//////////////////////////////////////////////////////////////////////////////// +// Reversal + +defm SVRBIT : SInstZPZ<"svrbit", "csilUcUsUiUl", "aarch64_sve_rbit">; +defm SVREVB : SInstZPZ<"svrevb", "silUsUiUl", "aarch64_sve_revb">; +defm SVREVH : SInstZPZ<"svrevh", "ilUiUl", "aarch64_sve_revh">; +defm SVREVW : SInstZPZ<"svrevw", "lUl", "aarch64_sve_revw">; + //////////////////////////////////////////////////////////////////////////////// // Floating-point arithmetic defm SVABS_F : SInstZPZ<"svabs", "hfd", "aarch64_sve_fabs">; defm SVNEG_F : SInstZPZ<"svneg", "hfd", "aarch64_sve_fneg">; +defm SVABD_F : SInstZPZZ<"svabd", "hfd", "aarch64_sve_fabd">; +defm SVADD_F : SInstZPZZ<"svadd", "hfd", "aarch64_sve_fadd">; +defm SVDIV_F : SInstZPZZ<"svdiv", "hfd", "aarch64_sve_fdiv">; +defm SVDIVR_F : SInstZPZZ<"svdivr", "hfd", "aarch64_sve_fdivr">; +defm SVMAX_F : SInstZPZZ<"svmax", "hfd", "aarch64_sve_fmax">; +defm SVMAXNM : SInstZPZZ<"svmaxnm","hfd", "aarch64_sve_fmaxnm">; +defm SVMIN_F : SInstZPZZ<"svmin", "hfd", "aarch64_sve_fmin">; +defm SVMINNM : SInstZPZZ<"svminnm","hfd", "aarch64_sve_fminnm">; +defm SVMUL_F : SInstZPZZ<"svmul", "hfd", "aarch64_sve_fmul">; +defm SVMULX : SInstZPZZ<"svmulx", "hfd", "aarch64_sve_fmulx">; +defm SVSUB_F : SInstZPZZ<"svsub", "hfd", "aarch64_sve_fsub">; +defm SVSUBR_F : SInstZPZZ<"svsubr", "hfd", "aarch64_sve_fsubr">; + +defm SVRECPX : SInstZPZ<"svrecpx", "hfd", "aarch64_sve_frecpx">; +defm SVRINTA : SInstZPZ<"svrinta", "hfd", "aarch64_sve_frinta">; +defm SVRINTI : SInstZPZ<"svrinti", "hfd", "aarch64_sve_frinti">; +defm SVRINTM : SInstZPZ<"svrintm", "hfd", "aarch64_sve_frintm">; +defm SVRINTN : SInstZPZ<"svrintn", "hfd", "aarch64_sve_frintn">; +defm SVRINTP : SInstZPZ<"svrintp", "hfd", "aarch64_sve_frintp">; +defm SVRINTX : SInstZPZ<"svrintx", "hfd", "aarch64_sve_frintx">; +defm SVRINTZ : SInstZPZ<"svrintz", "hfd", "aarch64_sve_frintz">; +defm SVSQRT : SInstZPZ<"svsqrt", "hfd", "aarch64_sve_fsqrt">; + +def SVEXPA : SInst<"svexpa[_{d}]", "du", "hfd", MergeNone, "aarch64_sve_fexpa_x">; def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftmad_x", [], [ImmCheck<2, ImmCheck0_7>]>; +def SVTSMUL : SInst<"svtsmul[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftsmul_x">; +def SVTSSEL : SInst<"svtssel[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftssel_x">; + +def SVSCALE_M : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeOp1, "aarch64_sve_fscale">; +def SVSCALE_X : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeAny, "aarch64_sve_fscale">; +def SVSCALE_Z : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeZero, "aarch64_sve_fscale">; + +def SVSCALE_N_M : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeOp1, "aarch64_sve_fscale">; +def SVSCALE_N_X : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeAny, "aarch64_sve_fscale">; +def SVSCALE_N_Z : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeZero, "aarch64_sve_fscale">; + +defm SVMAD_F : SInstZPZZZ<"svmad", "hfd", "aarch64_sve_fmad">; +defm SVMLA_F : SInstZPZZZ<"svmla", "hfd", "aarch64_sve_fmla">; +defm SVMLS_F : SInstZPZZZ<"svmls", "hfd", "aarch64_sve_fmls">; +defm SVMSB_F : SInstZPZZZ<"svmsb", "hfd", "aarch64_sve_fmsb">; +defm SVNMAD_F : SInstZPZZZ<"svnmad", "hfd", "aarch64_sve_fnmad">; +defm SVNMLA_F : SInstZPZZZ<"svnmla", "hfd", "aarch64_sve_fnmla">; +defm SVNMLS_F : SInstZPZZZ<"svnmls", "hfd", "aarch64_sve_fnmls">; +defm SVNMSB_F : SInstZPZZZ<"svnmsb", "hfd", "aarch64_sve_fnmsb">; + +def SVCADD_M : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeOp1, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; +def SVCADD_X : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeAny, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; +def SVCADD_Z : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeZero, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; +def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVCMLA_X : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeAny, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVCMLA_Z : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeZero, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLS_LANE : SInst<"svmls_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMUL_LANE : SInst<"svmul_lane[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_fmul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; + +def SVRECPE : SInst<"svrecpe[_{d}]", "dd", "hfd", MergeNone, "aarch64_sve_frecpe_x">; +def SVRECPS : SInst<"svrecps[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frecps_x">; +def SVRSQRTE : SInst<"svrsqrte[_{d}]", "dd", "hfd", MergeNone, "aarch64_sve_frsqrte_x">; +def SVRSQRTS : SInst<"svrsqrts[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frsqrts_x">; + +//////////////////////////////////////////////////////////////////////////////// +// Floating-point reductions + +def SVFADDA : SInst<"svadda[_{d}]", "sPsd", "hfd", MergeNone, "aarch64_sve_fadda">; +def SVFADDV : SInst<"svaddv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_faddv">; +def SVFMAXV : SInst<"svmaxv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxv">; +def SVFMAXNMV : SInst<"svmaxnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxnmv">; +def SVFMINV : SInst<"svminv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminv">; +def SVFMINNMV : SInst<"svminnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminnmv">; //////////////////////////////////////////////////////////////////////////////// // Floating-point comparisons @@ -826,9 +1032,44 @@ def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch6 } -def SVCADD_M : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeOp1, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; -def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; +//////////////////////////////////////////////////////////////////////////////// +// Permutations and selection +def SVCLASTA : SInst<"svclasta[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clasta">; +def SVCLASTA_N : SInst<"svclasta[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clasta_n">; +def SVCLASTB : SInst<"svclastb[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb">; +def SVCLASTB_N : SInst<"svclastb[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb_n">; +def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">; +// SVDUP_LANE (to land in D78750) +def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">; +def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; +def SVLASTA : SInst<"svlasta[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lasta">; +def SVLASTB : SInst<"svlastb[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lastb">; +def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev">; +def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">; +def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">; +def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; +def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1">; +def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2">; +def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi">; +def SVUNPKHI_U : SInst<"svunpkhi[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpkhi">; +def SVUNPKLO_S : SInst<"svunpklo[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpklo">; +def SVUNPKLO_U : SInst<"svunpklo[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpklo">; +def SVUZP1 : SInst<"svuzp1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1">; +def SVUZP2 : SInst<"svuzp2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2">; +def SVZIP1 : SInst<"svzip1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1">; +def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2">; + +def SVREV_B : SInst<"svrev_{d}", "PP", "PcPsPiPl", MergeNone, "aarch64_sve_rev">; +def SVSEL_B : SInst<"svsel[_b]", "PPPP", "Pc", MergeNone, "aarch64_sve_sel">; +def SVTRN1_B : SInst<"svtrn1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_trn1">; +def SVTRN2_B : SInst<"svtrn2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_trn2">; +def SVPUNPKHI : SInst<"svunpkhi[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpkhi">; +def SVPUNPKLO : SInst<"svunpklo[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpklo">; +def SVUZP1_B : SInst<"svuzp1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_uzp1">; +def SVUZP2_B : SInst<"svuzp2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_uzp2">; +def SVZIP1_B : SInst<"svzip1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_zip1">; +def SVZIP2_B : SInst<"svzip2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_zip2">; //////////////////////////////////////////////////////////////////////////////// // Predicate creation @@ -838,6 +1079,43 @@ def SVPFALSE : SInst<"svpfalse[_b]", "P", "", MergeNone, "", [IsOverloadNone]>; def SVPTRUE_PAT : SInst<"svptrue_pat_{d}", "PI", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue">; def SVPTRUE : SInst<"svptrue_{d}", "P", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsAppendSVALL]>; +def SVDUPQ_B8 : SInst<"svdupq[_n]_{d}", "Pssssssssssssssss", "Pc", MergeNone>; +def SVDUPQ_B16 : SInst<"svdupq[_n]_{d}", "Pssssssss", "Ps", MergeNone>; +def SVDUPQ_B32 : SInst<"svdupq[_n]_{d}", "Pssss", "Pi", MergeNone>; +def SVDUPQ_B64 : SInst<"svdupq[_n]_{d}", "Pss", "Pl", MergeNone>; + + +//////////////////////////////////////////////////////////////////////////////// +// Predicate operations + +def SVAND_B_Z : SInst<"svand[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_and_z">; +def SVBIC_B_Z : SInst<"svbic[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_bic_z">; +def SVEOR_B_Z : SInst<"sveor[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_eor_z">; +def SVMOV_B_Z : SInst<"svmov[_b]_z", "PPP", "Pc", MergeNone>; // Uses custom expansion +def SVNAND_B_Z : SInst<"svnand[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_nand_z">; +def SVNOR_B_Z : SInst<"svnor[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_nor_z">; +def SVNOT_B_Z : SInst<"svnot[_b]_z", "PPP", "Pc", MergeNone>; // Uses custom expansion +def SVORN_B_Z : SInst<"svorn[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_orn_z">; +def SVORR_B_Z : SInst<"svorr[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_orr_z">; + +def SVBRKA : SInst<"svbrka[_b]_m", "PPPP", "Pc", MergeNone, "aarch64_sve_brka">; +def SVBRKA_Z : SInst<"svbrka[_b]_z", "PPP", "Pc", MergeNone, "aarch64_sve_brka_z">; +def SVBRKB : SInst<"svbrkb[_b]_m", "PPPP", "Pc", MergeNone, "aarch64_sve_brkb">; +def SVBRKB_Z : SInst<"svbrkb[_b]_z", "PPP", "Pc", MergeNone, "aarch64_sve_brkb_z">; +def SVBRKN_Z : SInst<"svbrkn[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkn_z">; +def SVBRKPA_Z : SInst<"svbrkpa[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkpa_z">; +def SVBRKPB_Z : SInst<"svbrkpb[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkpb_z">; + +def SVPFIRST : SInst<"svpfirst[_b]", "PPP", "Pc", MergeNone, "aarch64_sve_pfirst">; +def SVPNEXT : SInst<"svpnext_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_pnext">; + +//////////////////////////////////////////////////////////////////////////////// +// Testing predicates + +def SVPTEST_ANY : SInst<"svptest_any", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_any">; +def SVPTEST_FIRST : SInst<"svptest_first", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_first">; +def SVPTEST_LAST : SInst<"svptest_last", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_last">; + //////////////////////////////////////////////////////////////////////////////// // Counting elements @@ -851,6 +1129,8 @@ def SVCNTH : SInst<"svcnth", "n", "", MergeNone, "aarch64_sve_cnth", [IsAppendSV def SVCNTW : SInst<"svcntw", "n", "", MergeNone, "aarch64_sve_cntw", [IsAppendSVALL, IsOverloadNone]>; def SVCNTD : SInst<"svcntd", "n", "", MergeNone, "aarch64_sve_cntd", [IsAppendSVALL, IsOverloadNone]>; +def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfd", MergeNone>; + //////////////////////////////////////////////////////////////////////////////// // Saturating scalar arithmetic diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 560db820aaed0..e849d68740682 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -628,9 +628,9 @@ def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>, HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">; def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">; def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group, - HelpText<"ROCm installation path">; + HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group, - HelpText<"HIP device library path">; + HelpText<"HIP device library path. Alternative to rocm-path.">; def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group, HelpText<"HIP device library">; def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index cd905da0d8a38..1d4c6748f5a66 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -428,6 +428,11 @@ class ToolChain { getCompilerRTArgString(const llvm::opt::ArgList &Args, StringRef Component, FileType Type = ToolChain::FT_Static) const; + std::string getCompilerRTBasename(const llvm::opt::ArgList &Args, + StringRef Component, + FileType Type = ToolChain::FT_Static, + bool AddArch = true) const; + // Returns target specific runtime path if it exists. virtual Optional getRuntimePath() const; diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 2b2edc4adc115..011cf599d526f 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -221,6 +221,20 @@ struct FormatStyle { /// \endcode bool AllowAllParametersOfDeclarationOnNextLine; + /// Allow short enums on a single line. + /// \code + /// true: + /// enum { A, B } myEnum; + /// + /// false: + /// enum + /// { + /// A, + /// B + /// } myEnum; + /// \endcode + bool AllowShortEnumsOnASingleLine; + /// Different styles for merging short blocks containing at most one /// statement. enum ShortBlockStyle { @@ -561,6 +575,21 @@ struct FormatStyle { TCS_Wrapped, }; + /// If set to ``TCS_Wrapped`` will insert trailing commas in container + /// literals (arrays and objects) that wrap across multiple lines. + /// It is currently only available for JavaScript + /// and disabled by default ``TCS_None``. + /// ``InsertTrailingCommas`` cannot be used together with ``BinPackArguments`` + /// as inserting the comma disables bin-packing. + /// \code + /// TSC_Wrapped: + /// const someArray = [ + /// aaaaaaaaaaaaaaaaaaaaaaaaaa, + /// aaaaaaaaaaaaaaaaaaaaaaaaaa, + /// aaaaaaaaaaaaaaaaaaaaaaaaaa, + /// // ^ inserted + /// ] + /// \endcode TrailingCommaStyle InsertTrailingCommas; /// If ``false``, a function declaration's or function definition's @@ -2133,7 +2162,7 @@ struct FormatStyle { /// appears within a line (e.g. consecutive assignments and declarations). UT_ForContinuationAndIndentation, /// Use tabs for line continuation and indentation, and spaces for - /// alignemnt. + /// alignment. UT_AlignWithSpaces, /// Use tabs whenever we need to fill whitespace that spans at least from /// one tab stop to the next one. @@ -2160,6 +2189,7 @@ struct FormatStyle { R.AllowAllConstructorInitializersOnNextLine && AllowAllParametersOfDeclarationOnNextLine == R.AllowAllParametersOfDeclarationOnNextLine && + AllowShortEnumsOnASingleLine == R.AllowShortEnumsOnASingleLine && AllowShortBlocksOnASingleLine == R.AllowShortBlocksOnASingleLine && AllowShortCaseLabelsOnASingleLine == R.AllowShortCaseLabelsOnASingleLine && diff --git a/clang/include/clang/Frontend/ASTUnit.h b/clang/include/clang/Frontend/ASTUnit.h index a36655150d4ec..50ab86ebad972 100644 --- a/clang/include/clang/Frontend/ASTUnit.h +++ b/clang/include/clang/Frontend/ASTUnit.h @@ -172,7 +172,7 @@ class ASTUnit { /// Sorted (by file offset) vector of pairs of file offset/Decl. using LocDeclsTy = SmallVector, 64>; - using FileDeclsTy = llvm::DenseMap; + using FileDeclsTy = llvm::DenseMap>; /// Map from FileID to the file-level declarations that it contains. /// The files and decls are only local (and non-preamble) ones. diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h index b3113372a1d3b..28c57dbe3b8e7 100644 --- a/clang/include/clang/Lex/HeaderSearch.h +++ b/clang/include/clang/Lex/HeaderSearch.h @@ -476,6 +476,13 @@ class HeaderSearch { /// This routine does not consider the effect of \#import bool isFileMultipleIncludeGuarded(const FileEntry *File); + /// Determine whether the given file is known to have ever been \#imported + /// (or if it has been \#included and we've encountered a \#pragma once). + bool hasFileBeenImported(const FileEntry *File) { + const HeaderFileInfo *FI = getExistingFileInfo(File); + return FI && FI->isImport; + } + /// This method returns a HeaderMap for the specified /// FileEntry, uniquing them through the 'HeaderMaps' datastructure. const HeaderMap *CreateHeaderMap(const FileEntry *FE); @@ -559,6 +566,12 @@ class HeaderSearch { ModuleMap::KnownHeader findModuleForHeader(const FileEntry *File, bool AllowTextual = false) const; + /// Retrieve all the modules corresponding to the given file. + /// + /// \ref findModuleForHeader should typically be used instead of this. + ArrayRef + findAllModulesForHeader(const FileEntry *File) const; + /// Read the contents of the given module map file. /// /// \param File The module map file. diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index 454fb3e01c886..805cc1f5017be 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -419,7 +419,10 @@ class ModuleMap { Callbacks.push_back(std::move(Callback)); } - /// Retrieve the module that owns the given header file, if any. + /// Retrieve the module that owns the given header file, if any. Note that + /// this does not implicitly load module maps, except for builtin headers, + /// and does not consult the external source. (Those checks are the + /// responsibility of \ref HeaderSearch.) /// /// \param File The header file that is likely to be included. /// @@ -433,13 +436,19 @@ class ModuleMap { KnownHeader findModuleForHeader(const FileEntry *File, bool AllowTextual = false); - /// Retrieve all the modules that contain the given header file. This - /// may not include umbrella modules, nor information from external sources, - /// if they have not yet been inferred / loaded. + /// Retrieve all the modules that contain the given header file. Note that + /// this does not implicitly load module maps, except for builtin headers, + /// and does not consult the external source. (Those checks are the + /// responsibility of \ref HeaderSearch.) /// /// Typically, \ref findModuleForHeader should be used instead, as it picks /// the preferred module for the header. - ArrayRef findAllModulesForHeader(const FileEntry *File) const; + ArrayRef findAllModulesForHeader(const FileEntry *File); + + /// Like \ref findAllModulesForHeader, but do not attempt to infer module + /// ownership from umbrella headers if we've not already done so. + ArrayRef + findResolvedModulesForHeader(const FileEntry *File) const; /// Resolve all lazy header directives for the specified file. /// diff --git a/clang/include/clang/Lex/Pragma.h b/clang/include/clang/Lex/Pragma.h index e9434269c19cd..cf8cca5414eac 100644 --- a/clang/include/clang/Lex/Pragma.h +++ b/clang/include/clang/Lex/Pragma.h @@ -96,11 +96,10 @@ class EmptyPragmaHandler : public PragmaHandler { class PragmaNamespace : public PragmaHandler { /// Handlers - This is a map of the handlers in this namespace with their name /// as key. - llvm::StringMap Handlers; + llvm::StringMap> Handlers; public: explicit PragmaNamespace(StringRef Name) : PragmaHandler(Name) {} - ~PragmaNamespace() override; /// FindHandler - Check to see if there is already a handler for the /// specified name. If not, return the handler for the null name if it diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 61e5974c1f0d9..7d358bded6e76 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2278,20 +2278,22 @@ class Preprocessor { /// into a module, or is outside any module, returns nullptr. Module *getModuleForLocation(SourceLocation Loc); - /// We want to produce a diagnostic at location IncLoc concerning a - /// missing module import. - /// - /// \param IncLoc The location at which the missing import was detected. - /// \param M The desired module. - /// \param MLoc A location within the desired module at which some desired - /// effect occurred (eg, where a desired entity was declared). - /// - /// \return A file that can be #included to import a module containing MLoc. - /// Null if no such file could be determined or if a #include is not - /// appropriate. - const FileEntry *getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc, - Module *M, - SourceLocation MLoc); + /// We want to produce a diagnostic at location IncLoc concerning an + /// unreachable effect at location MLoc (eg, where a desired entity was + /// declared or defined). Determine whether the right way to make MLoc + /// reachable is by #include, and if so, what header should be included. + /// + /// This is not necessarily fast, and might load unexpected module maps, so + /// should only be called by code that intends to produce an error. + /// + /// \param IncLoc The location at which the missing effect was detected. + /// \param MLoc A location within an unimported module at which the desired + /// effect occurred. + /// \return A file that can be #included to provide the desired effect. Null + /// if no such file could be determined or if a #include is not + /// appropriate (eg, if a module should be imported instead). + const FileEntry *getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, + SourceLocation MLoc); bool isRecordingPreamble() const { return PreambleConditionalStack.isRecording(); diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index cfe2a94f806bf..f227217006182 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -3119,6 +3119,11 @@ class Parser : public CodeCompletionHandler { /// }+ ')' ExprResult ParseOpenMPIteratorsExpr(); + /// Parses allocators and traits in the context of the uses_allocator clause. + /// Expected format: + /// '(' { [ '(' ')' ] }+ ')' + OMPClause *ParseOpenMPUsesAllocatorClause(OpenMPDirectiveKind DKind); + public: /// Parses simple expression in parens for single-expression clauses of OpenMP /// constructs. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index b9f2af9e7bb0b..be883d76e03f8 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -7014,7 +7014,7 @@ class Sema final { bool IgnoreAccess = false); bool CheckDerivedToBaseConversion(QualType Derived, QualType Base, unsigned InaccessibleBaseID, - unsigned AmbigiousBaseConvID, + unsigned AmbiguousBaseConvID, SourceLocation Loc, SourceRange Range, DeclarationName Name, CXXCastPath *BasePath, @@ -10933,6 +10933,21 @@ class Sema final { SourceLocation LParenLoc, SourceLocation EndLoc); + /// Data for list of allocators. + struct UsesAllocatorsData { + /// Allocator. + Expr *Allocator = nullptr; + /// Allocator traits. + Expr *AllocatorTraits = nullptr; + /// Locations of '(' and ')' symbols. + SourceLocation LParenLoc, RParenLoc; + }; + /// Called on well-formed 'uses_allocators' clause. + OMPClause *ActOnOpenMPUsesAllocatorClause(SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc, + ArrayRef Data); + /// The kind of conversion being performed. enum CheckedConversionKind { /// An implicit conversion. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 11a537fad5d58..af063b7a867ee 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1355,7 +1355,7 @@ class ASTReader unsigned PreviousGeneration = 0); RecordLocation getLocalBitOffset(uint64_t GlobalOffset); - uint64_t getGlobalBitOffset(ModuleFile &M, uint32_t LocalOffset); + uint64_t getGlobalBitOffset(ModuleFile &M, uint64_t LocalOffset); /// Returns the first preprocessed entity ID that begins or ends after /// \arg Loc. diff --git a/clang/include/clang/Serialization/ASTRecordReader.h b/clang/include/clang/Serialization/ASTRecordReader.h index 52de383b0ebfc..7248e6fa6c21d 100644 --- a/clang/include/clang/Serialization/ASTRecordReader.h +++ b/clang/include/clang/Serialization/ASTRecordReader.h @@ -119,7 +119,7 @@ class ASTRecordReader //readExceptionSpecInfo(SmallVectorImpl &ExceptionStorage); /// Get the global offset corresponding to a local offset. - uint64_t getGlobalBitOffset(uint32_t LocalOffset) { + uint64_t getGlobalBitOffset(uint64_t LocalOffset) { return Reader->getGlobalBitOffset(*F, LocalOffset); } diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 8dc4889e3ae88..bc5a782f20cc6 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -216,7 +216,8 @@ class ASTWriter : public ASTDeserializationListener, /// indicates the index that this particular vector has in the global one. unsigned FirstDeclIndex; }; - using FileDeclIDsTy = llvm::DenseMap; + using FileDeclIDsTy = + llvm::DenseMap>; /// Map from file SLocEntries to info about the file-level declarations /// that it contains. diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h index d45c4b71e780a..51565524db1e6 100644 --- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h +++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h @@ -589,7 +589,7 @@ class BugReporter { std::vector EQClassesVector; public: - BugReporter(BugReporterData &d) : D(d) {} + BugReporter(BugReporterData &d); virtual ~BugReporter(); /// Generate and flush diagnostics for all bug reports. @@ -632,7 +632,7 @@ class BugReporter { ArrayRef Fixits = None); private: - llvm::StringMap StrBugTypes; + llvm::StringMap> StrBugTypes; /// Returns a BugType that is associated with the given name and /// category. diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h index 8b4be2784f384..bc562a4ca6f10 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h @@ -1115,9 +1115,6 @@ class ObjCMethodCall : public CallEvent { /// Returns the value of the receiver at the time of this call. SVal getReceiverSVal() const; - /// Return the value of 'self' if available. - SVal getSelfSVal() const; - /// Get the interface for the receiver. /// /// This works whether this is an instance message or a class message. diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h index 356401d77561c..2679339537e89 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h @@ -36,6 +36,10 @@ DynamicTypeInfo getDynamicTypeInfo(ProgramStateRef State, const MemRegion *MR); const DynamicTypeInfo *getRawDynamicTypeInfo(ProgramStateRef State, const MemRegion *MR); +/// Get dynamic type information stored in a class object represented by \p Sym. +DynamicTypeInfo getClassObjectDynamicTypeInfo(ProgramStateRef State, + SymbolRef Sym); + /// Get dynamic cast information from \p CastFromTy to \p CastToTy of \p MR. const DynamicCastInfo *getDynamicCastInfo(ProgramStateRef State, const MemRegion *MR, @@ -50,6 +54,16 @@ ProgramStateRef setDynamicTypeInfo(ProgramStateRef State, const MemRegion *MR, ProgramStateRef setDynamicTypeInfo(ProgramStateRef State, const MemRegion *MR, QualType NewTy, bool CanBeSubClassed = true); +/// Set constraint on a type contained in a class object; return the new state. +ProgramStateRef setClassObjectDynamicTypeInfo(ProgramStateRef State, + SymbolRef Sym, + DynamicTypeInfo NewTy); + +/// Set constraint on a type contained in a class object; return the new state. +ProgramStateRef setClassObjectDynamicTypeInfo(ProgramStateRef State, + SymbolRef Sym, QualType NewTy, + bool CanBeSubClassed = true); + /// Set dynamic type and cast information of the region; return the new state. ProgramStateRef setDynamicTypeAndCastInfo(ProgramStateRef State, const MemRegion *MR, @@ -63,6 +77,10 @@ ProgramStateRef removeDeadTypes(ProgramStateRef State, SymbolReaper &SR); /// Removes the dead cast informations from \p State. ProgramStateRef removeDeadCasts(ProgramStateRef State, SymbolReaper &SR); +/// Removes the dead Class object type informations from \p State. +ProgramStateRef removeDeadClassObjectTypes(ProgramStateRef State, + SymbolReaper &SR); + void printDynamicTypeInfoJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n", unsigned int Space = 0, bool IsDot = false); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h index 6262c4a1ce378..6d2b495dc0f50 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h @@ -33,6 +33,8 @@ class DynamicTypeInfo { /// Returns the currently inferred upper bound on the runtime type. QualType getType() const { return DynTy; } + operator bool() const { return isValid(); } + bool operator==(const DynamicTypeInfo &RHS) const { return DynTy == RHS.DynTy && CanBeASubClass == RHS.CanBeASubClass; } diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h index bdd12a3ffe330..ecb61bffe3d95 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h @@ -298,6 +298,9 @@ class ProgramState : public llvm::FoldingSetNode { LLVM_NODISCARD ProgramStateRef enterStackFrame( const CallEvent &Call, const StackFrameContext *CalleeCtx) const; + /// Return the value of 'self' if available in the given context. + SVal getSelfSVal(const LocationContext *LC) const; + /// Get the lvalue for a base class object reference. Loc getLValue(const CXXBaseSpecifier &BaseSpec, const SubRegion *Super) const; diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index d212e23da6fc1..2c505995bee0a 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -459,7 +459,8 @@ class SymSymExpr : public BinarySymExpr { class SymbolManager { using DataSetTy = llvm::FoldingSet; - using SymbolDependTy = llvm::DenseMap; + using SymbolDependTy = + llvm::DenseMap>; DataSetTy DataSet; @@ -476,7 +477,6 @@ class SymbolManager { SymbolManager(ASTContext &ctx, BasicValueFactory &bv, llvm::BumpPtrAllocator& bpalloc) : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {} - ~SymbolManager(); static bool canSymbolicate(QualType T); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index c393a36af8448..d4ee49c165fa8 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1006,9 +1006,6 @@ ASTContext::~ASTContext() { for (APValue *Value : APValueCleanups) Value->~APValue(); - - // Destroy the OMPTraitInfo objects that life here. - llvm::DeleteContainerPointers(OMPTraitInfoVector); } void ASTContext::setTraversalScope(const std::vector &TopLevelDecls) { @@ -3694,10 +3691,10 @@ ASTContext::getDependentVectorType(QualType VecType, Expr *SizeExpr, (void)CanonCheck; DependentVectorTypes.InsertNode(New, InsertPos); } else { - QualType CanonExtTy = getDependentSizedExtVectorType(CanonVecTy, SizeExpr, - SourceLocation()); + QualType CanonTy = getDependentVectorType(CanonVecTy, SizeExpr, + SourceLocation(), VecKind); New = new (*this, TypeAlignment) DependentVectorType( - *this, VecType, CanonExtTy, SizeExpr, AttrLoc, VecKind); + *this, VecType, CanonTy, SizeExpr, AttrLoc, VecKind); } } @@ -11029,6 +11026,6 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap &FeatureMap, } OMPTraitInfo &ASTContext::getNewOMPTraitInfo() { - OMPTraitInfoVector.push_back(new OMPTraitInfo()); + OMPTraitInfoVector.emplace_back(new OMPTraitInfo()); return *OMPTraitInfoVector.back(); } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 055b9c6d37ba0..9c1b99d30e788 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -4410,6 +4410,21 @@ void RecordDecl::setCapturedRecord() { addAttr(CapturedRecordAttr::CreateImplicit(getASTContext())); } +bool RecordDecl::isOrContainsUnion() const { + if (isUnion()) + return true; + + if (const RecordDecl *Def = getDefinition()) { + for (const FieldDecl *FD : Def->fields()) { + const RecordType *RT = FD->getType()->getAs(); + if (RT && RT->getDecl()->isOrContainsUnion()) + return true; + } + } + + return false; +} + RecordDecl::field_iterator RecordDecl::field_begin() const { if (hasExternalLexicalStorage() && !hasLoadedFieldsFromExternalStorage()) LoadFieldsFromExternalStorage(); diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp index f7d92bd81f2e5..27c61832fcf64 100644 --- a/clang/lib/AST/DeclPrinter.cpp +++ b/clang/lib/AST/DeclPrinter.cpp @@ -535,7 +535,7 @@ void DeclPrinter::VisitEnumDecl(EnumDecl *D) { if (Policy.SuppressDefinition) return; - if (D->isFixed() && D->getASTContext().getLangOpts().CPlusPlus11) + if (D->isFixed()) Out << " : " << D->getIntegerType().stream(Policy); if (D->isCompleteDefinition()) { diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp index fcc0b3b11e259..e9f6b88631af5 100644 --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -539,7 +539,7 @@ QualType ArgType::getRepresentativeType(ASTContext &C) const { } std::string ArgType::getRepresentativeTypeName(ASTContext &C) const { - std::string S = getRepresentativeType(C).getAsString(); + std::string S = getRepresentativeType(C).getAsString(C.getPrintingPolicy()); std::string Alias; if (Name) { diff --git a/clang/lib/AST/NestedNameSpecifier.cpp b/clang/lib/AST/NestedNameSpecifier.cpp index e28463516a9f4..87bf4e122ec84 100644 --- a/clang/lib/AST/NestedNameSpecifier.cpp +++ b/clang/lib/AST/NestedNameSpecifier.cpp @@ -464,13 +464,14 @@ static void Append(char *Start, char *End, char *&Buffer, unsigned &BufferSize, unsigned NewCapacity = std::max( (unsigned)(BufferCapacity ? BufferCapacity * 2 : sizeof(void *) * 2), (unsigned)(BufferSize + (End - Start))); - char *NewBuffer = static_cast(llvm::safe_malloc(NewCapacity)); - if (Buffer) { - memcpy(NewBuffer, Buffer, BufferSize); - if (BufferCapacity) - free(Buffer); + if (!BufferCapacity) { + char *NewBuffer = static_cast(llvm::safe_malloc(NewCapacity)); + if (Buffer) + memcpy(NewBuffer, Buffer, BufferSize); + Buffer = NewBuffer; + } else { + Buffer = static_cast(llvm::safe_realloc(Buffer, NewCapacity)); } - Buffer = NewBuffer; BufferCapacity = NewCapacity; } assert(Buffer && Start && End && End > Start && "Illegal memory buffer copy"); diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index bd98728c3e4cc..d4d398f43c9d7 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -150,6 +150,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_detach: case OMPC_inclusive: case OMPC_exclusive: + case OMPC_uses_allocators: break; } @@ -239,6 +240,7 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) case OMPC_detach: case OMPC_inclusive: case OMPC_exclusive: + case OMPC_uses_allocators: break; } @@ -1302,6 +1304,70 @@ OMPExclusiveClause *OMPExclusiveClause::CreateEmpty(const ASTContext &C, return new (Mem) OMPExclusiveClause(N); } +void OMPUsesAllocatorsClause::setAllocatorsData( + ArrayRef Data) { + assert(Data.size() == NumOfAllocators && + "Size of allocators data is not the same as the preallocated buffer."); + for (unsigned I = 0, E = Data.size(); I < E; ++I) { + const OMPUsesAllocatorsClause::Data &D = Data[I]; + getTrailingObjects()[I * static_cast(ExprOffsets::Total) + + static_cast(ExprOffsets::Allocator)] = + D.Allocator; + getTrailingObjects()[I * static_cast(ExprOffsets::Total) + + static_cast( + ExprOffsets::AllocatorTraits)] = + D.AllocatorTraits; + getTrailingObjects< + SourceLocation>()[I * static_cast(ParenLocsOffsets::Total) + + static_cast(ParenLocsOffsets::LParen)] = + D.LParenLoc; + getTrailingObjects< + SourceLocation>()[I * static_cast(ParenLocsOffsets::Total) + + static_cast(ParenLocsOffsets::RParen)] = + D.RParenLoc; + } +} + +OMPUsesAllocatorsClause::Data +OMPUsesAllocatorsClause::getAllocatorData(unsigned I) const { + OMPUsesAllocatorsClause::Data Data; + Data.Allocator = + getTrailingObjects()[I * static_cast(ExprOffsets::Total) + + static_cast(ExprOffsets::Allocator)]; + Data.AllocatorTraits = + getTrailingObjects()[I * static_cast(ExprOffsets::Total) + + static_cast( + ExprOffsets::AllocatorTraits)]; + Data.LParenLoc = getTrailingObjects< + SourceLocation>()[I * static_cast(ParenLocsOffsets::Total) + + static_cast(ParenLocsOffsets::LParen)]; + Data.RParenLoc = getTrailingObjects< + SourceLocation>()[I * static_cast(ParenLocsOffsets::Total) + + static_cast(ParenLocsOffsets::RParen)]; + return Data; +} + +OMPUsesAllocatorsClause * +OMPUsesAllocatorsClause::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc, + ArrayRef Data) { + void *Mem = C.Allocate(totalSizeToAlloc( + static_cast(ExprOffsets::Total) * Data.size(), + static_cast(ParenLocsOffsets::Total) * Data.size())); + auto *Clause = new (Mem) + OMPUsesAllocatorsClause(StartLoc, LParenLoc, EndLoc, Data.size()); + Clause->setAllocatorsData(Data); + return Clause; +} + +OMPUsesAllocatorsClause * +OMPUsesAllocatorsClause::CreateEmpty(const ASTContext &C, unsigned N) { + void *Mem = C.Allocate(totalSizeToAlloc( + static_cast(ExprOffsets::Total) * N, + static_cast(ParenLocsOffsets::Total) * N)); + return new (Mem) OMPUsesAllocatorsClause(N); +} + //===----------------------------------------------------------------------===// // OpenMP clauses printing methods //===----------------------------------------------------------------------===// @@ -1884,6 +1950,25 @@ void OMPClausePrinter::VisitOMPExclusiveClause(OMPExclusiveClause *Node) { } } +void OMPClausePrinter::VisitOMPUsesAllocatorsClause( + OMPUsesAllocatorsClause *Node) { + if (Node->getNumberOfAllocators() == 0) + return; + OS << "uses_allocators("; + for (unsigned I = 0, E = Node->getNumberOfAllocators(); I < E; ++I) { + OMPUsesAllocatorsClause::Data Data = Node->getAllocatorData(I); + Data.Allocator->printPretty(OS, nullptr, Policy); + if (Data.AllocatorTraits) { + OS << "("; + Data.AllocatorTraits->printPretty(OS, nullptr, Policy); + OS << ")"; + } + if (I < E - 1) + OS << ","; + } + OS << ")"; +} + void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx, VariantMatchInfo &VMI) const { for (const OMPTraitSet &Set : Sets) { diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 995f710876af5..788fac789270f 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -161,7 +161,8 @@ void OMPLoopDirective::setFinalsConditions(ArrayRef A) { OMPParallelDirective *OMPParallelDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel) { + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPParallelDirective), alignof(OMPClause *)); void *Mem = @@ -170,6 +171,7 @@ OMPParallelDirective *OMPParallelDirective::Create( new (Mem) OMPParallelDirective(StartLoc, EndLoc, Clauses.size()); Dir->setClauses(Clauses); Dir->setAssociatedStmt(AssociatedStmt); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -227,11 +229,10 @@ OMPSimdDirective *OMPSimdDirective::CreateEmpty(const ASTContext &C, return new (Mem) OMPSimdDirective(CollapsedNum, NumClauses); } -OMPForDirective * -OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc, - SourceLocation EndLoc, unsigned CollapsedNum, - ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { +OMPForDirective *OMPForDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPForDirective), alignof(OMPClause *)); void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + @@ -264,6 +265,7 @@ OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc, Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -334,7 +336,8 @@ OMPForSimdDirective *OMPForSimdDirective::CreateEmpty(const ASTContext &C, OMPSectionsDirective *OMPSectionsDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel) { + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPSectionsDirective), alignof(OMPClause *)); void *Mem = @@ -343,6 +346,7 @@ OMPSectionsDirective *OMPSectionsDirective::Create( new (Mem) OMPSectionsDirective(StartLoc, EndLoc, Clauses.size()); Dir->setClauses(Clauses); Dir->setAssociatedStmt(AssociatedStmt); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -449,7 +453,7 @@ OMPCriticalDirective *OMPCriticalDirective::CreateEmpty(const ASTContext &C, OMPParallelForDirective *OMPParallelForDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPParallelForDirective), alignof(OMPClause *)); void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + @@ -483,6 +487,7 @@ OMPParallelForDirective *OMPParallelForDirective::Create( Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -552,7 +557,7 @@ OMPParallelForSimdDirective::CreateEmpty(const ASTContext &C, OMPParallelMasterDirective *OMPParallelMasterDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt) { + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef) { unsigned Size = llvm::alignTo(sizeof(OMPParallelMasterDirective), alignof(OMPClause *)); void *Mem = @@ -561,6 +566,7 @@ OMPParallelMasterDirective *OMPParallelMasterDirective::Create( new (Mem) OMPParallelMasterDirective(StartLoc, EndLoc, Clauses.size()); Dir->setClauses(Clauses); Dir->setAssociatedStmt(AssociatedStmt); + Dir->setTaskReductionRefExpr(TaskRedRef); return Dir; } @@ -576,7 +582,8 @@ OMPParallelMasterDirective *OMPParallelMasterDirective::CreateEmpty(const ASTCon OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel) { + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPParallelSectionsDirective), alignof(OMPClause *)); void *Mem = @@ -585,6 +592,7 @@ OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create( new (Mem) OMPParallelSectionsDirective(StartLoc, EndLoc, Clauses.size()); Dir->setClauses(Clauses); Dir->setAssociatedStmt(AssociatedStmt); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -887,7 +895,8 @@ OMPTargetDirective *OMPTargetDirective::CreateEmpty(const ASTContext &C, OMPTargetParallelDirective *OMPTargetParallelDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - ArrayRef Clauses, Stmt *AssociatedStmt) { + ArrayRef Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef, + bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelDirective), alignof(OMPClause *)); void *Mem = @@ -896,6 +905,8 @@ OMPTargetParallelDirective *OMPTargetParallelDirective::Create( new (Mem) OMPTargetParallelDirective(StartLoc, EndLoc, Clauses.size()); Dir->setClauses(Clauses); Dir->setAssociatedStmt(AssociatedStmt); + Dir->setTaskReductionRefExpr(TaskRedRef); + Dir->setHasCancel(HasCancel); return Dir; } @@ -912,7 +923,7 @@ OMPTargetParallelDirective::CreateEmpty(const ASTContext &C, OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelForDirective), alignof(OMPClause *)); void *Mem = C.Allocate( @@ -946,6 +957,7 @@ OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create( Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->setHasCancel(HasCancel); return Dir; } @@ -1456,7 +1468,7 @@ OMPTargetUpdateDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForDirective), alignof(OMPClause *)); void *Mem = C.Allocate( @@ -1505,6 +1517,7 @@ OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create( Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->HasCancel = HasCancel; return Dir; } @@ -1931,7 +1944,7 @@ OMPTeamsDistributeParallelForDirective * OMPTeamsDistributeParallelForDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { auto Size = llvm::alignTo(sizeof(OMPTeamsDistributeParallelForDirective), alignof(OMPClause *)); void *Mem = C.Allocate( @@ -1980,6 +1993,7 @@ OMPTeamsDistributeParallelForDirective::Create( Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->HasCancel = HasCancel; return Dir; } @@ -2083,7 +2097,7 @@ OMPTargetTeamsDistributeParallelForDirective * OMPTargetTeamsDistributeParallelForDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, - const HelperExprs &Exprs, bool HasCancel) { + const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) { auto Size = llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForDirective), alignof(OMPClause *)); @@ -2134,6 +2148,7 @@ OMPTargetTeamsDistributeParallelForDirective::Create( Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); + Dir->setTaskReductionRefExpr(TaskRedRef); Dir->HasCancel = HasCancel; return Dir; } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index be0bc13ccb4d0..869d2fa15b25b 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -800,6 +800,15 @@ void OMPClauseProfiler::VisitOMPInclusiveClause(const OMPInclusiveClause *C) { void OMPClauseProfiler::VisitOMPExclusiveClause(const OMPExclusiveClause *C) { VisitOMPClauseList(C); } +void OMPClauseProfiler::VisitOMPUsesAllocatorsClause( + const OMPUsesAllocatorsClause *C) { + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { + OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); + Profiler->VisitStmt(D.Allocator); + if (D.AllocatorTraits) + Profiler->VisitStmt(D.AllocatorTraits); + } +} void OMPClauseProfiler::VisitOMPOrderClause(const OMPOrderClause *C) {} } // namespace diff --git a/clang/lib/Analysis/AnalysisDeclContext.cpp b/clang/lib/Analysis/AnalysisDeclContext.cpp index 96d5807bcdfc0..783de64426458 100644 --- a/clang/lib/Analysis/AnalysisDeclContext.cpp +++ b/clang/lib/Analysis/AnalysisDeclContext.cpp @@ -50,7 +50,7 @@ using namespace clang; -using ManagedAnalysisMap = llvm::DenseMap; +using ManagedAnalysisMap = llvm::DenseMap>; AnalysisDeclContext::AnalysisDeclContext(AnalysisDeclContextManager *ADCMgr, const Decl *D, @@ -617,7 +617,7 @@ AnalysisDeclContext::getReferencedBlockVars(const BlockDecl *BD) { return llvm::make_range(V->begin(), V->end()); } -ManagedAnalysis *&AnalysisDeclContext::getAnalysisImpl(const void *tag) { +std::unique_ptr &AnalysisDeclContext::getAnalysisImpl(const void *tag) { if (!ManagedAnalyses) ManagedAnalyses = new ManagedAnalysisMap(); ManagedAnalysisMap *M = (ManagedAnalysisMap*) ManagedAnalyses; @@ -633,12 +633,7 @@ ManagedAnalysis::~ManagedAnalysis() = default; AnalysisDeclContext::~AnalysisDeclContext() { delete forcedBlkExprs; delete ReferencedBlockVars; - // Release the managed analyses. - if (ManagedAnalyses) { - ManagedAnalysisMap *M = (ManagedAnalysisMap*) ManagedAnalyses; - llvm::DeleteContainerSeconds(*M); - delete M; - } + delete (ManagedAnalysisMap*) ManagedAnalyses; } LocationContext::~LocationContext() = default; diff --git a/clang/lib/Analysis/LiveVariables.cpp b/clang/lib/Analysis/LiveVariables.cpp index f910338b0ad3e..d24c40b457b4b 100644 --- a/clang/lib/Analysis/LiveVariables.cpp +++ b/clang/lib/Analysis/LiveVariables.cpp @@ -490,9 +490,8 @@ LiveVariables::~LiveVariables() { delete (LiveVariablesImpl*) impl; } -LiveVariables * -LiveVariables::computeLiveness(AnalysisDeclContext &AC, - bool killAtAssign) { +std::unique_ptr +LiveVariables::computeLiveness(AnalysisDeclContext &AC, bool killAtAssign) { // No CFG? Bail out. CFG *cfg = AC.getCFG(); @@ -565,7 +564,7 @@ LiveVariables::computeLiveness(AnalysisDeclContext &AC, worklist.enqueuePredecessors(block); } - return new LiveVariables(LV); + return std::unique_ptr(new LiveVariables(LV)); } void LiveVariables::dumpBlockLiveness(const SourceManager &M) { diff --git a/clang/lib/Analysis/PostOrderCFGView.cpp b/clang/lib/Analysis/PostOrderCFGView.cpp index f79d0007cb3d9..0c09c0f97ff68 100644 --- a/clang/lib/Analysis/PostOrderCFGView.cpp +++ b/clang/lib/Analysis/PostOrderCFGView.cpp @@ -29,11 +29,12 @@ PostOrderCFGView::PostOrderCFGView(const CFG *cfg) { } } -PostOrderCFGView *PostOrderCFGView::create(AnalysisDeclContext &ctx) { +std::unique_ptr +PostOrderCFGView::create(AnalysisDeclContext &ctx) { const CFG *cfg = ctx.getCFG(); if (!cfg) return nullptr; - return new PostOrderCFGView(cfg); + return std::make_unique(cfg); } const void *PostOrderCFGView::getTag() { static int x; return &x; } diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 77a452d26f75a..039f76f0a62d8 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -148,6 +148,8 @@ static KeywordStatus getKeywordStatus(const LangOptions &LangOpts, if (LangOpts.Coroutines && (Flags & KEYCOROUTINES)) return KS_Enabled; if (LangOpts.ModulesTS && (Flags & KEYMODULES)) return KS_Enabled; if (LangOpts.CPlusPlus && (Flags & KEYALLCXX)) return KS_Future; + if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus20 && (Flags & CHAR8SUPPORT)) + return KS_Future; return KS_Disabled; } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 888666b30d2e7..841d76ba27ed7 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -174,6 +174,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, case OMPC_detach: case OMPC_inclusive: case OMPC_exclusive: + case OMPC_uses_allocators: break; } llvm_unreachable("Invalid OpenMP simple clause kind"); @@ -420,6 +421,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_detach: case OMPC_inclusive: case OMPC_exclusive: + case OMPC_uses_allocators: break; } llvm_unreachable("Invalid OpenMP simple clause kind"); diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index a68109a604d59..39ccac96a49d8 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -435,6 +435,8 @@ class LLVM_LIBRARY_VISIBILITY X86_32TargetInfo : public X86TargetInfo { } ArrayRef getTargetBuiltins() const override; + + bool hasExtIntType() const override { return true; } }; class LLVM_LIBRARY_VISIBILITY NetBSDI386TargetInfo @@ -737,6 +739,8 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { } ArrayRef getTargetBuiltins() const override; + + bool hasExtIntType() const override { return true; } }; // x86-64 Windows target diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c527de306a6ba..18db7b86076c7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7548,80 +7548,96 @@ llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) { // Return the llvm predicate vector type corresponding to the specified element // TypeFlags. -llvm::VectorType* CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) { +llvm::ScalableVectorType * +CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) { switch (TypeFlags.getEltType()) { default: llvm_unreachable("Unhandled SVETypeFlag!"); case SVETypeFlags::EltTyInt8: - return llvm::VectorType::get(Builder.getInt1Ty(), { 16, true }); + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); case SVETypeFlags::EltTyInt16: - return llvm::VectorType::get(Builder.getInt1Ty(), { 8, true }); + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); case SVETypeFlags::EltTyInt32: - return llvm::VectorType::get(Builder.getInt1Ty(), { 4, true }); + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); case SVETypeFlags::EltTyInt64: - return llvm::VectorType::get(Builder.getInt1Ty(), { 2, true }); + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); case SVETypeFlags::EltTyFloat16: - return llvm::VectorType::get(Builder.getInt1Ty(), { 8, true }); + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); case SVETypeFlags::EltTyFloat32: - return llvm::VectorType::get(Builder.getInt1Ty(), { 4, true }); + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); case SVETypeFlags::EltTyFloat64: - return llvm::VectorType::get(Builder.getInt1Ty(), { 2, true }); + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); + + case SVETypeFlags::EltTyBool8: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + case SVETypeFlags::EltTyBool16: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); + case SVETypeFlags::EltTyBool32: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); + case SVETypeFlags::EltTyBool64: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); } } // Return the llvm vector type corresponding to the specified element TypeFlags. -llvm::VectorType *CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { +llvm::ScalableVectorType * +CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { switch (TypeFlags.getEltType()) { default: llvm_unreachable("Invalid SVETypeFlag!"); case SVETypeFlags::EltTyInt8: - return llvm::VectorType::get(Builder.getInt8Ty(), {16, true}); + return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16); case SVETypeFlags::EltTyInt16: - return llvm::VectorType::get(Builder.getInt16Ty(), {8, true}); + return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8); case SVETypeFlags::EltTyInt32: - return llvm::VectorType::get(Builder.getInt32Ty(), {4, true}); + return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4); case SVETypeFlags::EltTyInt64: - return llvm::VectorType::get(Builder.getInt64Ty(), {2, true}); + return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2); case SVETypeFlags::EltTyFloat16: - return llvm::VectorType::get(Builder.getHalfTy(), {8, true}); + return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8); case SVETypeFlags::EltTyFloat32: - return llvm::VectorType::get(Builder.getFloatTy(), {4, true}); + return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4); case SVETypeFlags::EltTyFloat64: - return llvm::VectorType::get(Builder.getDoubleTy(), {2, true}); + return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2); case SVETypeFlags::EltTyBool8: - return llvm::VectorType::get(Builder.getInt1Ty(), {16, true}); + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); case SVETypeFlags::EltTyBool16: - return llvm::VectorType::get(Builder.getInt1Ty(), {8, true}); + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); case SVETypeFlags::EltTyBool32: - return llvm::VectorType::get(Builder.getInt1Ty(), {4, true}); + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); case SVETypeFlags::EltTyBool64: - return llvm::VectorType::get(Builder.getInt1Ty(), {2, true}); + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); } } +llvm::Value *CodeGenFunction::EmitSVEAllTruePred(SVETypeFlags TypeFlags) { + Function *Ptrue = + CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); + return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); +} + constexpr unsigned SVEBitsPerBlock = 128; -static llvm::VectorType* getSVEVectorForElementType(llvm::Type *EltTy) { +static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) { unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits(); - return llvm::VectorType::get(EltTy, { NumElts, true }); + return llvm::ScalableVectorType::get(EltTy, NumElts); } // Reinterpret the input predicate so that it can be used to correctly isolate // the elements of the specified datatype. Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, - llvm::VectorType *VTy) { - llvm::VectorType *RTy = llvm::VectorType::get( - IntegerType::get(getLLVMContext(), 1), VTy->getElementCount()); + llvm::ScalableVectorType *VTy) { + auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy); if (Pred->getType() == RTy) return Pred; unsigned IntID; llvm::Type *IntrinsicTy; - switch (VTy->getNumElements()) { + switch (VTy->getMinNumElements()) { default: llvm_unreachable("unsupported element count!"); case 2: @@ -7646,8 +7662,8 @@ Value *CodeGenFunction::EmitSVEGatherLoad(SVETypeFlags TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { auto *ResultTy = getSVEType(TypeFlags); - auto *OverloadedTy = llvm::VectorType::get(SVEBuiltinMemEltTy(TypeFlags), - ResultTy->getElementCount()); + auto *OverloadedTy = + llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy); // At the ACLE level there's only one predicate type, svbool_t, which is // mapped to . However, this might be incompatible with the @@ -7698,8 +7714,8 @@ Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { auto *SrcDataTy = getSVEType(TypeFlags); - auto *OverloadedTy = llvm::VectorType::get(SVEBuiltinMemEltTy(TypeFlags), - SrcDataTy->getElementCount()); + auto *OverloadedTy = + llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy); // In ACLE the source data is passed in the last argument, whereas in LLVM IR // it's the first argument. Move it accordingly. @@ -7749,12 +7765,45 @@ Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags, return Builder.CreateCall(F, Ops); } +Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + // The gather prefetches are overloaded on the vector input - this can either + // be the vector of base addresses or vector of offsets. + auto *OverloadedTy = dyn_cast(Ops[1]->getType()); + if (!OverloadedTy) + OverloadedTy = cast(Ops[2]->getType()); + + // Cast the predicate from svbool_t to the right number of elements. + Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy); + + // vector + imm addressing modes + if (Ops[1]->getType()->isVectorTy()) { + if (Ops.size() == 3) { + // Pass 0 for 'vector+imm' when the index is omitted. + Ops.push_back(ConstantInt::get(Int64Ty, 0)); + + // The sv_prfop is the last operand in the builtin and IR intrinsic. + std::swap(Ops[2], Ops[3]); + } else { + // Index needs to be passed as scaled offset. + llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); + unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8; + Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt); + Ops[2] = Builder.CreateMul(Ops[2], Scale); + } + } + + Function *F = CGM.getIntrinsic(IntID, OverloadedTy); + return Builder.CreateCall(F, Ops); +} + Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID) { auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); auto *VectorTy = getSVEVectorForElementType(MemEltTy); - auto *MemoryTy = llvm::VectorType::get(MemEltTy, VectorTy->getElementCount()); + auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); Value *BasePtr = Ops[1]; @@ -7784,8 +7833,8 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, // The vector type that is returned may be different from the // eventual type loaded from memory. - auto VectorTy = cast(ReturnTy); - auto MemoryTy = llvm::VectorType::get(MemEltTy, VectorTy->getElementCount()); + auto VectorTy = cast(ReturnTy); + auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo()); @@ -7809,8 +7858,8 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, // The vector type that is stored may be different from the // eventual type stored to memory. - auto VectorTy = cast(Ops.back()->getType()); - auto MemoryTy = llvm::VectorType::get(MemEltTy, VectorTy->getElementCount()); + auto VectorTy = cast(Ops.back()->getType()); + auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo()); @@ -7874,6 +7923,19 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); + llvm::Type *Ty = ConvertType(E->getType()); + if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && + BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) { + Value *Val = EmitScalarExpr(E->getArg(0)); + // FIXME: For big endian this needs an additional REV, or needs a separate + // intrinsic that is code-generated as a no-op, because the LLVM bitcast + // instruction is defined as 'bitwise' equivalent from memory point of + // view (when storing/reloading), whereas the svreinterpret builtin + // implements bitwise equivalent cast from register point of view. + // LLVM CodeGen for a bitcast must add an explicit REV for big-endian. + return Builder.CreateBitCast(Val, Ty); + } + llvm::SmallVector Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { if ((ICEArguments & (1 << i)) == 0) @@ -7896,7 +7958,6 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID, AArch64SVEIntrinsicsProvenSorted); SVETypeFlags TypeFlags(Builtin->TypeModifier); - llvm::Type *Ty = ConvertType(E->getType()); if (TypeFlags.isLoad()) return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic, TypeFlags.isZExtReturn()); @@ -7908,6 +7969,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isPrefetch()) return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isGatherPrefetch()) + return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (Builtin->LLVMIntrinsic != 0) { if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) InsertExplicitZeroOperand(Builder, Ty, Ops); @@ -7952,7 +8015,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, // Predicate results must be converted to svbool_t. if (auto PredTy = dyn_cast(Call->getType())) if (PredTy->getScalarType()->isIntegerTy(1)) - Call = EmitSVEPredicateCast(Call, cast(Ty)); + Call = EmitSVEPredicateCast(Call, cast(Ty)); return Call; } @@ -7960,8 +8023,102 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, switch (BuiltinID) { default: return nullptr; + + case SVE::BI__builtin_sve_svmov_b_z: { + // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op) + SVETypeFlags TypeFlags(Builtin->TypeModifier); + llvm::Type* OverloadedTy = getSVEType(TypeFlags); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy); + return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]}); + } + + case SVE::BI__builtin_sve_svnot_b_z: { + // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg) + SVETypeFlags TypeFlags(Builtin->TypeModifier); + llvm::Type* OverloadedTy = getSVEType(TypeFlags); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy); + return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]}); + } + + case SVE::BI__builtin_sve_svdupq_n_b8: + case SVE::BI__builtin_sve_svdupq_n_b16: + case SVE::BI__builtin_sve_svdupq_n_b32: + case SVE::BI__builtin_sve_svdupq_n_b64: + case SVE::BI__builtin_sve_svdupq_n_u8: + case SVE::BI__builtin_sve_svdupq_n_s8: + case SVE::BI__builtin_sve_svdupq_n_u64: + case SVE::BI__builtin_sve_svdupq_n_f64: + case SVE::BI__builtin_sve_svdupq_n_s64: + case SVE::BI__builtin_sve_svdupq_n_u16: + case SVE::BI__builtin_sve_svdupq_n_f16: + case SVE::BI__builtin_sve_svdupq_n_s16: + case SVE::BI__builtin_sve_svdupq_n_u32: + case SVE::BI__builtin_sve_svdupq_n_f32: + case SVE::BI__builtin_sve_svdupq_n_s32: { + // These builtins are implemented by storing each element to an array and using + // ld1rq to materialize a vector. + unsigned NumOpnds = Ops.size(); + + bool IsBoolTy = + cast(Ty)->getElementType()->isIntegerTy(1); + + // For svdupq_n_b* the element type of is an integer of type 128/numelts, + // so that the compare can use the width that is natural for the expected + // number of predicate lanes. + llvm::Type *EltTy = Ops[0]->getType(); + if (IsBoolTy) + EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds); + + Address Alloca = CreateTempAlloca(llvm::ArrayType::get(EltTy, NumOpnds), + CharUnits::fromQuantity(16)); + for (unsigned I = 0; I < NumOpnds; ++I) + Builder.CreateDefaultAlignedStore( + IsBoolTy ? Builder.CreateZExt(Ops[I], EltTy) : Ops[I], + Builder.CreateGEP(Alloca.getPointer(), + {Builder.getInt64(0), Builder.getInt64(I)})); + + SVETypeFlags TypeFlags(Builtin->TypeModifier); + Value *Pred = EmitSVEAllTruePred(TypeFlags); + + llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_ld1rq, OverloadedTy); + Value *Alloca0 = Builder.CreateGEP( + Alloca.getPointer(), {Builder.getInt64(0), Builder.getInt64(0)}); + Value *LD1RQ = Builder.CreateCall(F, {Pred, Alloca0}); + + if (!IsBoolTy) + return LD1RQ; + + // For svdupq_n_b* we need to add an additional 'cmpne' with '0'. + F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne + : Intrinsic::aarch64_sve_cmpne_wide, + OverloadedTy); + Value *Call = + Builder.CreateCall(F, {Pred, LD1RQ, EmitSVEDupX(Builder.getInt64(0))}); + return EmitSVEPredicateCast(Call, cast(Ty)); + } + case SVE::BI__builtin_sve_svpfalse_b: return ConstantInt::getFalse(Ty); + + case SVE::BI__builtin_sve_svlen_f16: + case SVE::BI__builtin_sve_svlen_f32: + case SVE::BI__builtin_sve_svlen_f64: + case SVE::BI__builtin_sve_svlen_s8: + case SVE::BI__builtin_sve_svlen_s16: + case SVE::BI__builtin_sve_svlen_s32: + case SVE::BI__builtin_sve_svlen_s64: + case SVE::BI__builtin_sve_svlen_u8: + case SVE::BI__builtin_sve_svlen_u16: + case SVE::BI__builtin_sve_svlen_u32: + case SVE::BI__builtin_sve_svlen_u64: { + SVETypeFlags TF(Builtin->TypeModifier); + auto VTy = cast(getSVEType(TF)); + auto NumEls = llvm::ConstantInt::get(Ty, VTy->getElementCount().Min); + + Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty); + return Builder.CreateMul(NumEls, Builder.CreateCall(F)); + } } /// Should not happen @@ -15480,8 +15637,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64: - case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: - case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: { + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, @@ -15492,8 +15648,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64: - case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: - case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: { + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index ebb92047b0c40..d1ac6d1a200d6 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -16,6 +16,7 @@ #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" +#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" @@ -1021,8 +1022,8 @@ static void forConstantArrayExpansion(CodeGenFunction &CGF, } } -void CodeGenFunction::ExpandTypeFromArgs( - QualType Ty, LValue LV, SmallVectorImpl::iterator &AI) { +void CodeGenFunction::ExpandTypeFromArgs(QualType Ty, LValue LV, + llvm::Function::arg_iterator &AI) { assert(LV.isSimple() && "Unexpected non-simple lvalue during struct expansion."); @@ -1051,17 +1052,17 @@ void CodeGenFunction::ExpandTypeFromArgs( ExpandTypeFromArgs(FD->getType(), SubLV, AI); } } else if (isa(Exp.get())) { - auto realValue = *AI++; - auto imagValue = *AI++; + auto realValue = &*AI++; + auto imagValue = &*AI++; EmitStoreOfComplex(ComplexPairTy(realValue, imagValue), LV, /*init*/ true); } else { // Call EmitStoreOfScalar except when the lvalue is a bitfield to emit a // primitive store. assert(isa(Exp.get())); if (LV.isBitField()) - EmitStoreThroughLValue(RValue::get(*AI++), LV); + EmitStoreThroughLValue(RValue::get(&*AI++), LV); else - EmitStoreOfScalar(*AI++, LV); + EmitStoreOfScalar(&*AI++, LV); } } @@ -2328,19 +2329,13 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // simplify. ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), FI); - // Flattened function arguments. - SmallVector FnArgs; - FnArgs.reserve(IRFunctionArgs.totalIRArgs()); - for (auto &Arg : Fn->args()) { - FnArgs.push_back(&Arg); - } - assert(FnArgs.size() == IRFunctionArgs.totalIRArgs()); + assert(Fn->arg_size() == IRFunctionArgs.totalIRArgs()); // If we're using inalloca, all the memory arguments are GEPs off of the last // parameter, which is a pointer to the complete memory area. Address ArgStruct = Address::invalid(); if (IRFunctionArgs.hasInallocaArg()) { - ArgStruct = Address(FnArgs[IRFunctionArgs.getInallocaArgNo()], + ArgStruct = Address(Fn->getArg(IRFunctionArgs.getInallocaArgNo()), FI.getArgStructAlignment()); assert(ArgStruct.getType() == FI.getArgStruct()->getPointerTo()); @@ -2348,7 +2343,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // Name the struct return parameter. if (IRFunctionArgs.hasSRetArg()) { - auto AI = cast(FnArgs[IRFunctionArgs.getSRetArgNo()]); + auto AI = Fn->getArg(IRFunctionArgs.getSRetArgNo()); AI->setName("agg.result"); AI->addAttr(llvm::Attribute::NoAlias); } @@ -2399,7 +2394,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, case ABIArgInfo::Indirect: { assert(NumIRArgs == 1); - Address ParamAddr = Address(FnArgs[FirstIRArg], ArgI.getIndirectAlign()); + Address ParamAddr = + Address(Fn->getArg(FirstIRArg), ArgI.getIndirectAlign()); if (!hasScalarEvaluationKind(Ty)) { // Aggregates and complex variables are accessed by reference. All we @@ -2435,16 +2431,18 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, case ABIArgInfo::Extend: case ABIArgInfo::Direct: { - - // If we have the trivial case, handle it with no muss and fuss. - if (!isa(ArgI.getCoerceToType()) && - ArgI.getCoerceToType() == ConvertType(Ty) && - ArgI.getDirectOffset() == 0) { + auto AI = Fn->getArg(FirstIRArg); + llvm::Type *LTy = ConvertType(Arg->getType()); + + // Prepare parameter attributes. So far, only attributes for pointer + // parameters are prepared. See + // http://llvm.org/docs/LangRef.html#paramattrs. + if (ArgI.getDirectOffset() == 0 && LTy->isPointerTy() && + ArgI.getCoerceToType()->isPointerTy()) { assert(NumIRArgs == 1); - llvm::Value *V = FnArgs[FirstIRArg]; - auto AI = cast(V); if (const ParmVarDecl *PVD = dyn_cast(Arg)) { + // Set `nonnull` attribute if any. if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(), PVD->getFunctionScopeIndex()) && !CGM.getCodeGenOpts().NullPointerIsValid) @@ -2482,6 +2480,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, AI->addAttr(llvm::Attribute::NonNull); } + // Set `align` attribute if any. const auto *AVAttr = PVD->getAttr(); if (!AVAttr) if (const auto *TOTy = dyn_cast(OTy)) @@ -2499,14 +2498,24 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, } } + // Set 'noalias' if an argument type has the `restrict` qualifier. if (Arg->getType().isRestrictQualified() || (CurCodeDecl && CurCodeDecl->hasAttr() && Arg->getType()->isPointerType())) AI->addAttr(llvm::Attribute::NoAlias); + } + + // Prepare the argument value. If we have the trivial case, handle it + // with no muss and fuss. + if (!isa(ArgI.getCoerceToType()) && + ArgI.getCoerceToType() == ConvertType(Ty) && + ArgI.getDirectOffset() == 0) { + assert(NumIRArgs == 1); // LLVM expects swifterror parameters to be used in very restricted // ways. Copy the value into a less-restricted temporary. + llvm::Value *V = AI; if (FI.getExtParameterInfo(ArgNo).getABI() == ParameterABI::SwiftErrorResult) { QualType pointeeTy = Ty->getPointeeType(); @@ -2568,7 +2577,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, assert(STy->getNumElements() == NumIRArgs); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - auto AI = FnArgs[FirstIRArg + i]; + auto AI = Fn->getArg(FirstIRArg + i); AI->setName(Arg->getName() + ".coerce" + Twine(i)); Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i); Builder.CreateStore(AI, EltPtr); @@ -2581,7 +2590,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, } else { // Simple case, just do a coerced store of the argument into the alloca. assert(NumIRArgs == 1); - auto AI = FnArgs[FirstIRArg]; + auto AI = Fn->getArg(FirstIRArg); AI->setName(Arg->getName() + ".coerce"); CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this); } @@ -2614,7 +2623,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, continue; auto eltAddr = Builder.CreateStructGEP(alloca, i); - auto elt = FnArgs[argIndex++]; + auto elt = Fn->getArg(argIndex++); Builder.CreateStore(elt, eltAddr); } assert(argIndex == FirstIRArg + NumIRArgs); @@ -2629,11 +2638,11 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, LValue LV = MakeAddrLValue(Alloca, Ty); ArgVals.push_back(ParamValue::forIndirect(Alloca)); - auto FnArgIter = FnArgs.begin() + FirstIRArg; + auto FnArgIter = Fn->arg_begin() + FirstIRArg; ExpandTypeFromArgs(Ty, LV, FnArgIter); - assert(FnArgIter == FnArgs.begin() + FirstIRArg + NumIRArgs); + assert(FnArgIter == Fn->arg_begin() + FirstIRArg + NumIRArgs); for (unsigned i = 0, e = NumIRArgs; i != e; ++i) { - auto AI = FnArgs[FirstIRArg + i]; + auto AI = Fn->getArg(FirstIRArg + i); AI->setName(Arg->getName() + "." + Twine(i)); } break; @@ -2710,10 +2719,10 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF, bool doRetainAutorelease; - if (call->getCalledValue() == CGF.CGM.getObjCEntrypoints().objc_retain) { + if (call->getCalledOperand() == CGF.CGM.getObjCEntrypoints().objc_retain) { doRetainAutorelease = true; - } else if (call->getCalledValue() == CGF.CGM.getObjCEntrypoints() - .objc_retainAutoreleasedReturnValue) { + } else if (call->getCalledOperand() == + CGF.CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue) { doRetainAutorelease = false; // If we emitted an assembly marker for this call (and the @@ -2729,8 +2738,8 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF, assert(prev); } assert(isa(prev)); - assert(cast(prev)->getCalledValue() == - CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker); + assert(cast(prev)->getCalledOperand() == + CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker); InstsToKill.push_back(prev); } } else { @@ -2773,8 +2782,8 @@ static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF, // Look for a retain call. llvm::CallInst *retainCall = dyn_cast(result->stripPointerCasts()); - if (!retainCall || - retainCall->getCalledValue() != CGF.CGM.getObjCEntrypoints().objc_retain) + if (!retainCall || retainCall->getCalledOperand() != + CGF.CGM.getObjCEntrypoints().objc_retain) return nullptr; // Look for an ordinary load of 'self'. @@ -2880,6 +2889,213 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) { return store; } +// Helper functions for EmitCMSEClearRecord + +// Set the bits corresponding to a field having width `BitWidth` and located at +// offset `BitOffset` (from the least significant bit) within a storage unit of +// `Bits.size()` bytes. Each element of `Bits` corresponds to one target byte. +// Use little-endian layout, i.e.`Bits[0]` is the LSB. +static void setBitRange(SmallVectorImpl &Bits, int BitOffset, + int BitWidth, int CharWidth) { + assert(CharWidth <= 64); + assert(static_cast(BitWidth) <= Bits.size() * CharWidth); + + int Pos = 0; + if (BitOffset >= CharWidth) { + Pos += BitOffset / CharWidth; + BitOffset = BitOffset % CharWidth; + } + + const uint64_t Used = (uint64_t(1) << CharWidth) - 1; + if (BitOffset + BitWidth >= CharWidth) { + Bits[Pos++] |= (Used << BitOffset) & Used; + BitWidth -= CharWidth - BitOffset; + BitOffset = 0; + } + + while (BitWidth >= CharWidth) { + Bits[Pos++] = Used; + BitWidth -= CharWidth; + } + + if (BitWidth > 0) + Bits[Pos++] |= (Used >> (CharWidth - BitWidth)) << BitOffset; +} + +// Set the bits corresponding to a field having width `BitWidth` and located at +// offset `BitOffset` (from the least significant bit) within a storage unit of +// `StorageSize` bytes, located at `StorageOffset` in `Bits`. Each element of +// `Bits` corresponds to one target byte. Use target endian layout. +static void setBitRange(SmallVectorImpl &Bits, int StorageOffset, + int StorageSize, int BitOffset, int BitWidth, + int CharWidth, bool BigEndian) { + + SmallVector TmpBits(StorageSize); + setBitRange(TmpBits, BitOffset, BitWidth, CharWidth); + + if (BigEndian) + std::reverse(TmpBits.begin(), TmpBits.end()); + + for (uint64_t V : TmpBits) + Bits[StorageOffset++] |= V; +} + +static void setUsedBits(CodeGenModule &, QualType, int, + SmallVectorImpl &); + +// Set the bits in `Bits`, which correspond to the value representations of +// the actual members of the record type `RTy`. Note that this function does +// not handle base classes, virtual tables, etc, since they cannot happen in +// CMSE function arguments or return. The bit mask corresponds to the target +// memory layout, i.e. it's endian dependent. +static void setUsedBits(CodeGenModule &CGM, const RecordType *RTy, int Offset, + SmallVectorImpl &Bits) { + ASTContext &Context = CGM.getContext(); + int CharWidth = Context.getCharWidth(); + const RecordDecl *RD = RTy->getDecl()->getDefinition(); + const ASTRecordLayout &ASTLayout = Context.getASTRecordLayout(RD); + const CGRecordLayout &Layout = CGM.getTypes().getCGRecordLayout(RD); + + int Idx = 0; + for (auto I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++Idx) { + const FieldDecl *F = *I; + + if (F->isUnnamedBitfield() || F->isZeroLengthBitField(Context) || + F->getType()->isIncompleteArrayType()) + continue; + + if (F->isBitField()) { + const CGBitFieldInfo &BFI = Layout.getBitFieldInfo(F); + setBitRange(Bits, Offset + BFI.StorageOffset.getQuantity(), + BFI.StorageSize / CharWidth, BFI.Offset, + BFI.Size, CharWidth, + CGM.getDataLayout().isBigEndian()); + continue; + } + + setUsedBits(CGM, F->getType(), + Offset + ASTLayout.getFieldOffset(Idx) / CharWidth, Bits); + } +} + +// Set the bits in `Bits`, which correspond to the value representations of +// the elements of an array type `ATy`. +static void setUsedBits(CodeGenModule &CGM, const ConstantArrayType *ATy, + int Offset, SmallVectorImpl &Bits) { + const ASTContext &Context = CGM.getContext(); + + QualType ETy = Context.getBaseElementType(ATy); + int Size = Context.getTypeSizeInChars(ETy).getQuantity(); + SmallVector TmpBits(Size); + setUsedBits(CGM, ETy, 0, TmpBits); + + for (int I = 0, N = Context.getConstantArrayElementCount(ATy); I < N; ++I) { + auto Src = TmpBits.begin(); + auto Dst = Bits.begin() + Offset + I * Size; + for (int J = 0; J < Size; ++J) + *Dst++ |= *Src++; + } +} + +// Set the bits in `Bits`, which correspond to the value representations of +// the type `QTy`. +static void setUsedBits(CodeGenModule &CGM, QualType QTy, int Offset, + SmallVectorImpl &Bits) { + if (const auto *RTy = QTy->getAs()) + return setUsedBits(CGM, RTy, Offset, Bits); + + ASTContext &Context = CGM.getContext(); + if (const auto *ATy = Context.getAsConstantArrayType(QTy)) + return setUsedBits(CGM, ATy, Offset, Bits); + + int Size = Context.getTypeSizeInChars(QTy).getQuantity(); + if (Size <= 0) + return; + + std::fill_n(Bits.begin() + Offset, Size, + (uint64_t(1) << Context.getCharWidth()) - 1); +} + +static uint64_t buildMultiCharMask(const SmallVectorImpl &Bits, + int Pos, int Size, int CharWidth, + bool BigEndian) { + assert(Size > 0); + uint64_t Mask = 0; + if (BigEndian) { + for (auto P = Bits.begin() + Pos, E = Bits.begin() + Pos + Size; P != E; + ++P) + Mask = (Mask << CharWidth) | *P; + } else { + auto P = Bits.begin() + Pos + Size, End = Bits.begin() + Pos; + do + Mask = (Mask << CharWidth) | *--P; + while (P != End); + } + return Mask; +} + +// Emit code to clear the bits in a record, which aren't a part of any user +// declared member, when the record is a function return. +llvm::Value *CodeGenFunction::EmitCMSEClearRecord(llvm::Value *Src, + llvm::IntegerType *ITy, + QualType QTy) { + assert(Src->getType() == ITy); + assert(ITy->getScalarSizeInBits() <= 64); + + const llvm::DataLayout &DataLayout = CGM.getDataLayout(); + int Size = DataLayout.getTypeStoreSize(ITy); + SmallVector Bits(Size); + setUsedBits(CGM, QTy->getAs(), 0, Bits); + + int CharWidth = CGM.getContext().getCharWidth(); + uint64_t Mask = + buildMultiCharMask(Bits, 0, Size, CharWidth, DataLayout.isBigEndian()); + + return Builder.CreateAnd(Src, Mask, "cmse.clear"); +} + +// Emit code to clear the bits in a record, which aren't a part of any user +// declared member, when the record is a function argument. +llvm::Value *CodeGenFunction::EmitCMSEClearRecord(llvm::Value *Src, + llvm::ArrayType *ATy, + QualType QTy) { + const llvm::DataLayout &DataLayout = CGM.getDataLayout(); + int Size = DataLayout.getTypeStoreSize(ATy); + SmallVector Bits(Size); + setUsedBits(CGM, QTy->getAs(), 0, Bits); + + // Clear each element of the LLVM array. + int CharWidth = CGM.getContext().getCharWidth(); + int CharsPerElt = + ATy->getArrayElementType()->getScalarSizeInBits() / CharWidth; + int MaskIndex = 0; + llvm::Value *R = llvm::UndefValue::get(ATy); + for (int I = 0, N = ATy->getArrayNumElements(); I != N; ++I) { + uint64_t Mask = buildMultiCharMask(Bits, MaskIndex, CharsPerElt, CharWidth, + DataLayout.isBigEndian()); + MaskIndex += CharsPerElt; + llvm::Value *T0 = Builder.CreateExtractValue(Src, I); + llvm::Value *T1 = Builder.CreateAnd(T0, Mask, "cmse.clear"); + R = Builder.CreateInsertValue(R, T1, I); + } + + return R; +} + +// Emit code to clear the padding bits when returning or passing as an argument +// a 16-bit floating-point value. +llvm::Value *CodeGenFunction::EmitCMSEClearFP16(llvm::Value *Src) { + llvm::Type *RetTy = Src->getType(); + assert(RetTy->isFloatTy() || + (RetTy->isIntegerTy() && RetTy->getIntegerBitWidth() == 32)); + if (RetTy->isFloatTy()) { + llvm::Value *T0 = Builder.CreateBitCast(Src, Builder.getIntNTy(32)); + llvm::Value *T1 = Builder.CreateAnd(T0, 0xffff, "cmse.clear"); + return Builder.CreateBitCast(T1, RetTy); + } + return Builder.CreateAnd(Src, 0xffff, "cmse.clear"); +} + void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, bool EmitRetDbgLoc, SourceLocation EndLoc) { @@ -3046,6 +3262,21 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, llvm::Instruction *Ret; if (RV) { + if (CurFuncDecl && CurFuncDecl->hasAttr()) { + // For certain return types, clear padding bits, as they may reveal + // sensitive information. + const Type *RTy = RetTy.getCanonicalType().getTypePtr(); + if (RTy->isFloat16Type() || RTy->isHalfType()) { + // 16-bit floating-point types are passed in a 32-bit integer or float, + // with unspecified upper bits. + RV = EmitCMSEClearFP16(RV); + } else { + // Small struct/union types are passed as integers. + auto *ITy = dyn_cast(RV->getType()); + if (ITy != nullptr && isa(RetTy.getCanonicalType())) + RV = EmitCMSEClearRecord(RV, ITy, RetTy); + } + } EmitReturnValueCheck(RV); Ret = Builder.CreateRet(RV); } else { @@ -4352,8 +4583,25 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } else { // In the simple case, just pass the coerced loaded value. assert(NumIRArgs == 1); - IRCallArgs[FirstIRArg] = - CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this); + llvm::Value *Load = + CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this); + + if (CallInfo.isCmseNSCall()) { + // For certain parameter types, clear padding bits, as they may reveal + // sensitive information. + const Type *PTy = I->Ty.getCanonicalType().getTypePtr(); + // 16-bit floating-point types are passed in a 32-bit integer or + // float, with unspecified upper bits. + if (PTy->isFloat16Type() || PTy->isHalfType()) { + Load = EmitCMSEClearFP16(Load); + } else { + // Small struct/union types are passed as integer arrays. + auto *ATy = dyn_cast(Load->getType()); + if (ATy != nullptr && isa(I->Ty.getCanonicalType())) + Load = EmitCMSEClearRecord(Load, ATy, I->Ty); + } + } + IRCallArgs[FirstIRArg] = Load; } break; diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp index 5e01100db163c..70eaa321a007a 100644 --- a/clang/lib/CodeGen/CGCleanup.cpp +++ b/clang/lib/CodeGen/CGCleanup.cpp @@ -860,6 +860,9 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { // TODO: base this on the number of branch-afters and fixups const unsigned SwitchCapacity = 10; + // pass the abnormal exit flag to Fn (SEH cleanup) + cleanupFlags.setHasExitSwitch(); + llvm::LoadInst *Load = createLoadInstBefore(getNormalCleanupDestSlot(), "cleanup.dest", nullptr); diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp index 4bcf1e215bc1e..f8522ea7ee43c 100644 --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -1639,6 +1639,19 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup { llvm::Value *IsForEH = llvm::ConstantInt::get(CGF.ConvertType(ArgTys[0]), F.isForEHCleanup()); + + // Except _leave and fall-through at the end, all other exits in a _try + // (return/goto/continue/break) are considered as abnormal terminations + // since _leave/fall-through is always Indexed 0, + // just use NormalCleanupDestSlot (>= 1 for goto/return/..), + // as 1st Arg to indicate abnormal termination + if (!F.isForEHCleanup() && F.hasExitSwitch()) { + Address Addr = CGF.getNormalCleanupDestSlot(); + llvm::Value *Load = CGF.Builder.CreateLoad(Addr, "cleanup.dest"); + llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int32Ty); + IsForEH = CGF.Builder.CreateICmpNE(Load, Zero); + } + Args.add(RValue::get(IsForEH), ArgTys[0]); Args.add(RValue::get(FP), ArgTys[1]); diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index b512592e78fff..e3df22aef2c1b 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -2160,7 +2160,8 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value, if (!mandatory && isa(result)) { llvm::CallInst *call = cast(result->stripPointerCasts()); - assert(call->getCalledValue() == CGM.getObjCEntrypoints().objc_retainBlock); + assert(call->getCalledOperand() == + CGM.getObjCEntrypoints().objc_retainBlock); call->setMetadata("clang.arc.copy_on_escape", llvm::MDNode::get(Builder.getContext(), None)); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 4ad276e40c99d..b80bc30cfa0a2 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -710,6 +710,12 @@ enum OpenMPRTLFunction { // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void // *d); OMPRTL__kmpc_task_reduction_get_th_data, + // Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int + // is_ws, int num, void *data); + OMPRTL__kmpc_taskred_modifier_init, + // Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, + // int is_ws); + OMPRTL__kmpc_task_reduction_modifier_fini, // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); OMPRTL__kmpc_alloc, // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); @@ -1020,26 +1026,25 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { bool AsArraySection = isa(ClausesData[N].Ref); if (!PrivateType->isVariablyModifiedType()) { Sizes.emplace_back( - CGF.getTypeSize( - SharedAddresses[N].first.getType().getNonReferenceType()), + CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), nullptr); return; } llvm::Value *Size; llvm::Value *SizeInChars; - auto *ElemType = cast( - SharedAddresses[N].first.getPointer(CGF)->getType()) - ->getElementType(); + auto *ElemType = + cast(OrigAddresses[N].first.getPointer(CGF)->getType()) + ->getElementType(); auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); if (AsArraySection) { - Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), - SharedAddresses[N].first.getPointer(CGF)); + Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), + OrigAddresses[N].first.getPointer(CGF)); Size = CGF.Builder.CreateNUWAdd( Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); } else { - SizeInChars = CGF.getTypeSize( - SharedAddresses[N].first.getType().getNonReferenceType()); + SizeInChars = + CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); } Sizes.emplace_back(SizeInChars, Size); @@ -1459,6 +1464,8 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( bool HasCancel = false; if (const auto *OPD = dyn_cast(&D)) HasCancel = OPD->hasCancel(); + else if (const auto *OPD = dyn_cast(&D)) + HasCancel = OPD->hasCancel(); else if (const auto *OPSD = dyn_cast(&D)) HasCancel = OPSD->hasCancel(); else if (const auto *OPFD = dyn_cast(&D)) @@ -2345,6 +2352,28 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); break; } + case OMPRTL__kmpc_taskred_modifier_init: { + // Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int + // is_ws, int num_data, void *data); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy, + CGM.IntTy, CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, + /*Name=*/"__kmpc_taskred_modifier_init"); + break; + } + case OMPRTL__kmpc_task_reduction_modifier_fini: { + // Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, + // int is_ws); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, + /*Name=*/"__kmpc_task_reduction_modifier_fini"); + break; + } case OMPRTL__kmpc_alloc: { // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t // al); omp_allocator_handle_t type is void *. @@ -6782,7 +6811,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_task_red_input_t .rd_input.[Size]; Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); - ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionVars, + ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, Data.ReductionCopies, Data.ReductionOps); for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; @@ -6846,6 +6875,22 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), FlagsLVal.getType()); } + if (Data.IsReductionWithTaskMod) { + // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int + // is_ws, int num, void *data); + llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), + CGM.IntTy, /*isSigned=*/true); + llvm::Value *Args[] = { + IdentTLoc, GTid, + llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, + /*isSigned=*/true), + llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TaskRedInput.getPointer(), CGM.VoidPtrTy)}; + return CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args); + } // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); llvm::Value *Args[] = { CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, @@ -6857,6 +6902,22 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( Args); } +void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, + SourceLocation Loc, + bool IsWorksharingReduction) { + // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int + // is_ws, int num, void *data); + llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), + CGM.IntTy, /*isSigned=*/true); + llvm::Value *Args[] = {IdentTLoc, GTid, + llvm::ConstantInt::get(CGM.IntTy, + IsWorksharingReduction ? 1 : 0, + /*isSigned=*/true)}; + (void)CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args); +} + void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, @@ -11012,7 +11073,7 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, break; case Linear: Out << 'l'; - if (!!ParamAttr.StrideOrArg) + if (ParamAttr.StrideOrArg != 1) Out << ParamAttr.StrideOrArg; break; case Uniform: @@ -11089,7 +11150,7 @@ static bool getAArch64PBV(QualType QT, ASTContext &C) { /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. /// TODO: Add support for references, section 3.2.1, item 1. static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { - if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { + if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { QualType PTy = QT.getCanonicalType()->getPointeeType(); if (getAArch64PBV(PTy, C)) return C.getTypeSize(PTy); @@ -11152,7 +11213,7 @@ static std::string mangleVectorParameters(ArrayRef ParamAttrs) { Out << 'l'; // Don't print the step value if it is not present or if it is // equal to 1. - if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) + if (ParamAttr.StrideOrArg != 1) Out << ParamAttr.StrideOrArg; break; case Uniform: @@ -11390,15 +11451,24 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, for (const Expr *E : Attr->linears()) { E = E->IgnoreParenImpCasts(); unsigned Pos; + // Rescaling factor needed to compute the linear parameter + // value in the mangled name. + unsigned PtrRescalingFactor = 1; if (isa(E)) { Pos = ParamPositions[FD]; } else { const auto *PVD = cast(cast(E)->getDecl()) ->getCanonicalDecl(); Pos = ParamPositions[PVD]; + if (auto *P = dyn_cast(PVD->getType())) + PtrRescalingFactor = CGM.getContext() + .getTypeSizeInChars(P->getPointeeType()) + .getQuantity(); } ParamAttrTy &ParamAttr = ParamAttrs[Pos]; ParamAttr.Kind = Linear; + // Assuming a stride of 1, for `linear` without modifiers. + ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); if (*SI) { Expr::EvalResult Result; if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { @@ -11414,6 +11484,11 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ParamAttr.StrideOrArg = Result.Val.getInt(); } } + // If we are using a linear clause on a pointer, we need to + // rescale the value of linear_step with the byte size of the + // pointee type. + if (Linear == ParamAttr.Kind) + ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; ++SI; ++MI; } @@ -12362,6 +12437,12 @@ llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( llvm_unreachable("Not supported in SIMD-only mode"); } +void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, + SourceLocation Loc, + bool IsWorksharingReduction) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 830b1edc61f43..7a6a06aaf4a6b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -100,6 +100,7 @@ struct OMPTaskDataTy final { SmallVector LastprivateVars; SmallVector LastprivateCopies; SmallVector ReductionVars; + SmallVector ReductionOrigs; SmallVector ReductionCopies; SmallVector ReductionOps; struct DependData { @@ -118,6 +119,8 @@ struct OMPTaskDataTy final { unsigned NumberOfParts = 0; bool Tied = true; bool Nogroup = false; + bool IsReductionWithTaskMod = false; + bool IsWorksharingReduction = false; }; /// Class intended to support codegen of all kind of the reduction clauses. @@ -1418,18 +1421,34 @@ class CGOpenMPRuntime { /// should be emitted for reduction: /// \code /// - /// _task_red_item_t red_data[n]; + /// _taskred_item_t red_data[n]; /// ... - /// red_data[i].shar = &origs[i]; + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; /// red_data[i].size = sizeof(origs[i]); /// red_data[i].f_init = (void*)RedInit; /// red_data[i].f_fini = (void*)RedDest; /// red_data[i].f_comb = (void*)RedOp; /// red_data[i].flags = ; /// ... - /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data); /// \endcode + /// For reduction clause with task modifier it emits the next call: + /// \code /// + /// _taskred_item_t red_data[n]; + /// ... + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit; + /// red_data[i].f_fini = (void*)RedDest; + /// red_data[i].f_comb = (void*)RedOp; + /// red_data[i].flags = ; + /// ... + /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n, + /// red_data); + /// \endcode /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. /// \param Data Additional data for task generation like tiedness, final @@ -1440,6 +1459,13 @@ class CGOpenMPRuntime { ArrayRef RHSExprs, const OMPTaskDataTy &Data); + /// Emits the following code for reduction clause with task modifier: + /// \code + /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing); + /// \endcode + virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, + bool IsWorksharingReduction); + /// Required to resolve existing problems in the runtime. Emits threadprivate /// variables to store the size of the VLAs/array sections for /// initializer/combiner/finalizer functions. @@ -2192,18 +2218,34 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime { /// should be emitted for reduction: /// \code /// - /// _task_red_item_t red_data[n]; + /// _taskred_item_t red_data[n]; /// ... - /// red_data[i].shar = &origs[i]; + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; /// red_data[i].size = sizeof(origs[i]); /// red_data[i].f_init = (void*)RedInit; /// red_data[i].f_fini = (void*)RedDest; /// red_data[i].f_comb = (void*)RedOp; /// red_data[i].flags = ; /// ... - /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data); /// \endcode + /// For reduction clause with task modifier it emits the next call: + /// \code /// + /// _taskred_item_t red_data[n]; + /// ... + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit; + /// red_data[i].f_fini = (void*)RedDest; + /// red_data[i].f_comb = (void*)RedOp; + /// red_data[i].flags = ; + /// ... + /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n, + /// red_data); + /// \endcode /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. /// \param Data Additional data for task generation like tiedness, final @@ -2213,6 +2255,13 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime { ArrayRef RHSExprs, const OMPTaskDataTy &Data) override; + /// Emits the following code for reduction clause with task modifier: + /// \code + /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing); + /// \endcode + void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, + bool IsWorksharingReduction) override; + /// Required to resolve existing problems in the runtime. Emits threadprivate /// variables to store the size of the VLAs/array sections for /// initializer/combiner/finalizer functions + emits threadprivate variable to diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index 75af05623b036..c9a1b1d4fd149 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -730,8 +730,8 @@ CGBitFieldInfo CGBitFieldInfo::MakeInfo(CodeGenTypes &Types, return CGBitFieldInfo(Offset, Size, IsSigned, StorageSize, StorageOffset); } -CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, - llvm::StructType *Ty) { +std::unique_ptr +CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, llvm::StructType *Ty) { CGRecordLowering Builder(*this, D, /*Packed=*/false); Builder.lower(/*NonVirtualBaseType=*/false); @@ -758,9 +758,9 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, // but we may need to recursively layout D while laying D out as a base type. Ty->setBody(Builder.FieldTypes, Builder.Packed); - CGRecordLayout *RL = - new CGRecordLayout(Ty, BaseTy, Builder.IsZeroInitializable, - Builder.IsZeroInitializableAsBase); + auto RL = std::make_unique( + Ty, BaseTy, (bool)Builder.IsZeroInitializable, + (bool)Builder.IsZeroInitializableAsBase); RL->NonVirtualBases.swap(Builder.NonVirtualBases); RL->CompleteObjectVirtualBases.swap(Builder.VirtualBases); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index cbc08ac2efb4e..02075be36dd5e 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1169,21 +1169,23 @@ void CodeGenFunction::EmitOMPReductionClauseInit( SmallVector ReductionOps; SmallVector LHSs; SmallVector RHSs; + OMPTaskDataTy Data; + SmallVector TaskLHSs; + SmallVector TaskRHSs; for (const auto *C : D.getClausesOfKind()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Shareds.emplace_back(Ref); - Privates.emplace_back(*IPriv); - ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); + Shareds.append(C->varlist_begin(), C->varlist_end()); + Privates.append(C->privates().begin(), C->privates().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + if (C->getModifier() == OMPC_REDUCTION_task) { + Data.ReductionVars.append(C->privates().begin(), C->privates().end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } } ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); @@ -1261,6 +1263,117 @@ void CodeGenFunction::EmitOMPReductionClauseInit( ++IPriv; ++Count; } + if (!Data.ReductionVars.empty()) { + Data.IsReductionWithTaskMod = true; + Data.IsWorksharingReduction = + isOpenMPWorksharingDirective(D.getDirectiveKind()); + llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( + *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); + const Expr *TaskRedRef = nullptr; + switch (D.getDirectiveKind()) { + case OMPD_parallel: + TaskRedRef = cast(D).getTaskReductionRefExpr(); + break; + case OMPD_for: + TaskRedRef = cast(D).getTaskReductionRefExpr(); + break; + case OMPD_sections: + TaskRedRef = cast(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_for: + TaskRedRef = cast(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_master: + TaskRedRef = + cast(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_sections: + TaskRedRef = + cast(D).getTaskReductionRefExpr(); + break; + case OMPD_target_parallel: + TaskRedRef = + cast(D).getTaskReductionRefExpr(); + break; + case OMPD_target_parallel_for: + TaskRedRef = + cast(D).getTaskReductionRefExpr(); + break; + case OMPD_distribute_parallel_for: + TaskRedRef = + cast(D).getTaskReductionRefExpr(); + break; + case OMPD_teams_distribute_parallel_for: + TaskRedRef = cast(D) + .getTaskReductionRefExpr(); + break; + case OMPD_target_teams_distribute_parallel_for: + TaskRedRef = cast(D) + .getTaskReductionRefExpr(); + break; + case OMPD_simd: + case OMPD_for_simd: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_parallel_for_simd: + case OMPD_task: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: + case OMPD_ordered: + case OMPD_atomic: + case OMPD_teams: + case OMPD_target: + case OMPD_cancellation_point: + case OMPD_cancel: + case OMPD_target_data: + case OMPD_target_enter_data: + case OMPD_target_exit_data: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: + case OMPD_distribute: + case OMPD_target_update: + case OMPD_distribute_parallel_for_simd: + case OMPD_distribute_simd: + case OMPD_target_parallel_for_simd: + case OMPD_target_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_teams: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_target_teams_distribute_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_threadprivate: + case OMPD_allocate: + case OMPD_declare_reduction: + case OMPD_declare_mapper: + case OMPD_declare_simd: + case OMPD_requires: + case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: + case OMPD_unknown: + llvm_unreachable("Enexpected directive with task reductions."); + } + + const auto *VD = cast(cast(TaskRedRef)->getDecl()); + EmitVarDecl(*VD); + EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), + /*Volatile=*/false, TaskRedRef->getType()); + } } void CodeGenFunction::EmitOMPReductionClauseFinal( @@ -1272,14 +1385,22 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( llvm::SmallVector RHSExprs; llvm::SmallVector ReductionOps; bool HasAtLeastOneReduction = false; + bool IsReductionWithTaskMod = false; for (const auto *C : D.getClausesOfKind()) { HasAtLeastOneReduction = true; Privates.append(C->privates().begin(), C->privates().end()); LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + IsReductionWithTaskMod = + IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; } if (HasAtLeastOneReduction) { + if (IsReductionWithTaskMod) { + CGM.getOpenMPRuntime().emitTaskReductionFini( + *this, D.getBeginLoc(), + isOpenMPWorksharingDirective(D.getDirectiveKind())); + } bool WithNowait = D.getSingleClause() || isOpenMPParallelDirective(D.getDirectiveKind()) || ReductionKind == OMPD_simd; @@ -3382,21 +3503,13 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( SmallVector LHSs; SmallVector RHSs; for (const auto *C : S.getClausesOfKind()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Data.ReductionVars.emplace_back(Ref); - Data.ReductionCopies.emplace_back(*IPriv); - Data.ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); - } + Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( *this, S.getBeginLoc(), LHSs, RHSs, Data); @@ -3776,21 +3889,13 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( SmallVector RHSs; OMPTaskDataTy Data; for (const auto *C : S.getClausesOfKind()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Data.ReductionVars.emplace_back(Ref); - Data.ReductionCopies.emplace_back(*IPriv); - Data.ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); - } + Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } llvm::Value *ReductionDesc = CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), @@ -4627,6 +4732,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_detach: case OMPC_inclusive: case OMPC_exclusive: + case OMPC_uses_allocators: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 4e1fe1308119f..ee317ea7c1af0 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -646,8 +646,9 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( void BackendConsumer::UnsupportedDiagHandler( const llvm::DiagnosticInfoUnsupported &D) { - // We only support errors. - assert(D.getSeverity() == llvm::DS_Error); + // We only support warnings or errors. + assert(D.getSeverity() == llvm::DS_Error || + D.getSeverity() == llvm::DS_Warning); StringRef Filename; unsigned Line, Column; @@ -665,7 +666,11 @@ void BackendConsumer::UnsupportedDiagHandler( DiagnosticPrinterRawOStream DP(MsgStream); D.print(DP); } - Diags.Report(Loc, diag::err_fe_backend_unsupported) << MsgStream.str(); + + auto DiagType = D.getSeverity() == llvm::DS_Error + ? diag::err_fe_backend_unsupported + : diag::warn_fe_backend_unsupported; + Diags.Report(Loc, DiagType) << MsgStream.str(); if (BadDebugInfo) // If we were not able to translate the file:line:col information diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index e368351987ce8..f3c9c5b561fbe 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2551,7 +2551,7 @@ void CodeGenFunction::emitAlignmentAssumptionCheck( llvm::Value *OffsetValue, llvm::Value *TheCheck, llvm::Instruction *Assumption) { assert(Assumption && isa(Assumption) && - cast(Assumption)->getCalledValue() == + cast(Assumption)->getCalledOperand() == llvm::Intrinsic::getDeclaration( Builder.GetInsertBlock()->getParent()->getParent(), llvm::Intrinsic::assume) && diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 3bf24844735ee..9c891f27e0fa8 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3877,6 +3877,11 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch); + llvm::Value *EmitCMSEClearRecord(llvm::Value *V, llvm::IntegerType *ITy, + QualType RTy); + llvm::Value *EmitCMSEClearRecord(llvm::Value *V, llvm::ArrayType *ATy, + QualType RTy); + llvm::Value *EmitCMSEClearFP16(llvm::Value *V); llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, @@ -3911,10 +3916,12 @@ class CodeGenFunction : public CodeGenTypeCache { SmallVector getSVEOverloadTypes(SVETypeFlags TypeFlags, ArrayRef Ops); llvm::Type *getEltType(SVETypeFlags TypeFlags); - llvm::VectorType *getSVEType(const SVETypeFlags &TypeFlags); - llvm::VectorType *getSVEPredType(SVETypeFlags TypeFlags); + llvm::ScalableVectorType *getSVEType(const SVETypeFlags &TypeFlags); + llvm::ScalableVectorType *getSVEPredType(SVETypeFlags TypeFlags); + llvm::Value *EmitSVEAllTruePred(SVETypeFlags TypeFlags); llvm::Value *EmitSVEDupX(llvm::Value *Scalar); - llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::VectorType *VTy); + llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, + llvm::ScalableVectorType *VTy); llvm::Value *EmitSVEGatherLoad(SVETypeFlags TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); @@ -3930,6 +3937,9 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitSVEPrefetchLoad(SVETypeFlags TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID); + llvm::Value *EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, @@ -4357,7 +4367,7 @@ class CodeGenFunction : public CodeGenTypeCache { /// /// \param AI - The first function argument of the expansion. void ExpandTypeFromArgs(QualType Ty, LValue Dst, - SmallVectorImpl::iterator &AI); + llvm::Function::arg_iterator &AI); /// ExpandTypeToArgs - Expand an CallArg \arg Arg, with the LLVM type for \arg /// Ty, into individual arguments on the provided vector \arg IRCallArgs, diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index bad796bf92dcf..3c91a04d54642 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1051,8 +1051,7 @@ llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond, if (!PGO.haveRegionCounts()) return nullptr; Optional CondCount = PGO.getStmtCount(Cond); - assert(CondCount.hasValue() && "missing expected loop condition count"); - if (*CondCount == 0) + if (!CondCount || *CondCount == 0) return nullptr; return createProfileWeights(LoopCount, std::max(*CondCount, LoopCount) - LoopCount); diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 9e7c2c836ba43..5da2ced0cd98a 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -36,8 +36,6 @@ CodeGenTypes::CodeGenTypes(CodeGenModule &cgm) } CodeGenTypes::~CodeGenTypes() { - llvm::DeleteContainerSeconds(CGRecordLayouts); - for (llvm::FoldingSet::iterator I = FunctionInfos.begin(), E = FunctionInfos.end(); I != E; ) delete &*I++; @@ -807,8 +805,8 @@ llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) { } // Layout fields. - CGRecordLayout *Layout = ComputeRecordLayout(RD, Ty); - CGRecordLayouts[Key] = Layout; + std::unique_ptr Layout = ComputeRecordLayout(RD, Ty); + CGRecordLayouts[Key] = std::move(Layout); // We're done laying out this struct. bool EraseResult = RecordsBeingLaidOut.erase(Key); (void)EraseResult; @@ -834,17 +832,18 @@ const CGRecordLayout & CodeGenTypes::getCGRecordLayout(const RecordDecl *RD) { const Type *Key = Context.getTagDeclType(RD).getTypePtr(); - const CGRecordLayout *Layout = CGRecordLayouts.lookup(Key); - if (!Layout) { - // Compute the type information. - ConvertRecordDeclType(RD); + auto I = CGRecordLayouts.find(Key); + if (I != CGRecordLayouts.end()) + return *I->second; + // Compute the type information. + ConvertRecordDeclType(RD); - // Now try again. - Layout = CGRecordLayouts.lookup(Key); - } + // Now try again. + I = CGRecordLayouts.find(Key); - assert(Layout && "Unable to find record layout information for type"); - return *Layout; + assert(I != CGRecordLayouts.end() && + "Unable to find record layout information for type"); + return *I->second; } bool CodeGenTypes::isPointerZeroInitializable(QualType T) { diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h index 394e2fdf8d65d..f8f7542e4c831 100644 --- a/clang/lib/CodeGen/CodeGenTypes.h +++ b/clang/lib/CodeGen/CodeGenTypes.h @@ -75,7 +75,7 @@ class CodeGenTypes { llvm::DenseMap InterfaceTypes; /// Maps clang struct type with corresponding record layout info. - llvm::DenseMap CGRecordLayouts; + llvm::DenseMap> CGRecordLayouts; /// Contains the LLVM IR type for any converted RecordDecl. llvm::DenseMap RecordDeclTypes; @@ -272,8 +272,8 @@ class CodeGenTypes { RequiredArgs args); /// Compute a new LLVM record layout object for the given record. - CGRecordLayout *ComputeRecordLayout(const RecordDecl *D, - llvm::StructType *Ty); + std::unique_ptr ComputeRecordLayout(const RecordDecl *D, + llvm::StructType *Ty); /// addRecordTypeName - Compute a name from the given record decl with an /// optional suffix and name the given LLVM type using it. diff --git a/clang/lib/CodeGen/EHScopeStack.h b/clang/lib/CodeGen/EHScopeStack.h index 0ed67aabcd621..0fa0b54be2f0d 100644 --- a/clang/lib/CodeGen/EHScopeStack.h +++ b/clang/lib/CodeGen/EHScopeStack.h @@ -158,9 +158,10 @@ class EHScopeStack { /// Generation flags. class Flags { enum { - F_IsForEH = 0x1, + F_IsForEH = 0x1, F_IsNormalCleanupKind = 0x2, - F_IsEHCleanupKind = 0x4 + F_IsEHCleanupKind = 0x4, + F_HasExitSwitch = 0x8, }; unsigned flags; @@ -179,8 +180,10 @@ class EHScopeStack { /// cleanup. bool isEHCleanupKind() const { return flags & F_IsEHCleanupKind; } void setIsEHCleanupKind() { flags |= F_IsEHCleanupKind; } - }; + bool hasExitSwitch() const { return flags & F_HasExitSwitch; } + void setHasExitSwitch() { flags |= F_HasExitSwitch; } + }; /// Emit the cleanup. For normal cleanups, this is run in the /// same EH context as when the cleanup was pushed, i.e. the diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index b99602154a1a5..11cc35a015748 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -385,7 +385,7 @@ static Address emitMergePHI(CodeGenFunction &CGF, return Address(PHI, Align); } -TargetCodeGenInfo::~TargetCodeGenInfo() { delete Info; } +TargetCodeGenInfo::~TargetCodeGenInfo() = default; // If someone can figure out a general rule for this, that would be great. // It's probably just doomed to be platform-dependent, though. @@ -682,7 +682,7 @@ class DefaultABIInfo : public ABIInfo { class DefaultTargetCodeGenInfo : public TargetCodeGenInfo { public: DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique(CGT)) {} }; ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { @@ -772,7 +772,7 @@ class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { public: explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, WebAssemblyABIInfo::ABIKind K) - : TargetCodeGenInfo(new WebAssemblyABIInfo(CGT, K)) {} + : TargetCodeGenInfo(std::make_unique(CGT, K)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -898,8 +898,8 @@ class PNaClABIInfo : public ABIInfo { class PNaClTargetCodeGenInfo : public TargetCodeGenInfo { public: - PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(new PNaClABIInfo(CGT)) {} + PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique(CGT)) {} }; void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const { @@ -1140,7 +1140,7 @@ class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters, bool SoftFloatABI) - : TargetCodeGenInfo(new X86_32ABIInfo( + : TargetCodeGenInfo(std::make_unique( CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, NumRegisterParameters, SoftFloatABI)) {} @@ -1822,6 +1822,15 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, return ABIArgInfo::getExtend(Ty); } + if (const auto * EIT = Ty->getAs()) { + if (EIT->getNumBits() <= 64) { + if (InReg) + return ABIArgInfo::getDirectInReg(); + return ABIArgInfo::getDirect(); + } + return getIndirectResult(Ty, /*ByVal=*/false, State); + } + if (InReg) return ABIArgInfo::getDirectInReg(); return ABIArgInfo::getDirect(); @@ -2334,7 +2343,7 @@ class WinX86_64ABIInfo : public SwiftABIInfo { class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : TargetCodeGenInfo(new X86_64ABIInfo(CGT, AVXLevel)) {} + : TargetCodeGenInfo(std::make_unique(CGT, AVXLevel)) {} const X86_64ABIInfo &getABIInfo() const { return static_cast(TargetCodeGenInfo::getABIInfo()); @@ -2478,7 +2487,7 @@ class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT, AVXLevel)) {} + : TargetCodeGenInfo(std::make_unique(CGT, AVXLevel)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; @@ -2785,6 +2794,15 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, return; } + if (const auto *EITy = Ty->getAs()) { + if (EITy->getNumBits() <= 64) + Current = Integer; + else if (EITy->getNumBits() <= 128) + Lo = Hi = Integer; + // Larger values need to get passed in memory. + return; + } + if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { // Arrays are treated like structures. @@ -2959,8 +2977,9 @@ ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); + if (!Ty->isExtIntType()) + return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } return getNaturalAlignIndirect(Ty); @@ -2992,7 +3011,8 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, // the argument in the free register. This does not seem to happen currently, // but this code would be much safer if we could mark the argument with // 'onstack'. See PR12193. - if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty)) { + if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) && + !Ty->isExtIntType()) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); @@ -4083,6 +4103,17 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, } } + if (Ty->isExtIntType()) { + // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is + // not 1, 2, 4, or 8 bytes, must be passed by reference." + // However, non-power-of-two _ExtInts will be passed as 1,2,4 or 8 bytes + // anyway as long is it fits in them, so we don't have to check the power of + // 2. + if (Width <= 64) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } + return ABIArgInfo::getDirect(); } @@ -4204,8 +4235,8 @@ class PPC32TargetCodeGenInfo : public TargetCodeGenInfo { public: PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI, bool RetSmallStructInRegABI) - : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT, SoftFloatABI, - RetSmallStructInRegABI)) {} + : TargetCodeGenInfo(std::make_unique( + CGT, SoftFloatABI, RetSmallStructInRegABI)) {} static bool isStructReturnInRegABI(const llvm::Triple &Triple, const CodeGenOptions &Opts); @@ -4221,7 +4252,7 @@ class PPC32TargetCodeGenInfo : public TargetCodeGenInfo { } CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { - // Complex types are passed just like their elements + // Complex types are passed just like their elements. if (const ComplexType *CTy = Ty->getAs()) Ty = CTy->getElementType(); @@ -4595,8 +4626,8 @@ class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo { PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT, PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX, bool SoftFloatABI) - : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX, - SoftFloatABI)) {} + : TargetCodeGenInfo(std::make_unique( + CGT, Kind, HasQPX, SoftFloatABI)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. @@ -5154,7 +5185,7 @@ class AArch64ABIInfo : public SwiftABIInfo { class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { public: AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind) - : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {} + : TargetCodeGenInfo(std::make_unique(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue"; @@ -5821,7 +5852,7 @@ class ARMABIInfo : public SwiftABIInfo { class ARMTargetCodeGenInfo : public TargetCodeGenInfo { public: ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K) - :TargetCodeGenInfo(new ARMABIInfo(CGT, K)) {} + : TargetCodeGenInfo(std::make_unique(CGT, K)) {} const ARMABIInfo &getABIInfo() const { return static_cast(TargetCodeGenInfo::getABIInfo()); @@ -6502,7 +6533,7 @@ class NVPTXABIInfo : public ABIInfo { class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { public: NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new NVPTXABIInfo(CGT, *this)) {} + : TargetCodeGenInfo(std::make_unique(CGT, *this)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; @@ -6794,7 +6825,8 @@ class SystemZABIInfo : public SwiftABIInfo { class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { public: SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector, bool SoftFloatABI) - : TargetCodeGenInfo(new SystemZABIInfo(CGT, HasVector, SoftFloatABI)) {} + : TargetCodeGenInfo( + std::make_unique(CGT, HasVector, SoftFloatABI)) {} }; } @@ -7117,7 +7149,7 @@ namespace { class MSP430TargetCodeGenInfo : public TargetCodeGenInfo { public: MSP430TargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; @@ -7176,8 +7208,8 @@ class MIPSTargetCodeGenInfo : public TargetCodeGenInfo { unsigned SizeOfUnwindException; public: MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32) - : TargetCodeGenInfo(new MipsABIInfo(CGT, IsO32)), - SizeOfUnwindException(IsO32 ? 24 : 32) {} + : TargetCodeGenInfo(std::make_unique(CGT, IsO32)), + SizeOfUnwindException(IsO32 ? 24 : 32) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 29; @@ -7562,7 +7594,7 @@ namespace { class AVRTargetCodeGenInfo : public TargetCodeGenInfo { public: AVRTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { } + : TargetCodeGenInfo(std::make_unique(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -7675,7 +7707,7 @@ class HexagonABIInfo : public DefaultABIInfo { class HexagonTargetCodeGenInfo : public TargetCodeGenInfo { public: HexagonTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new HexagonABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique(CGT)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 29; @@ -8168,7 +8200,7 @@ namespace { class LanaiTargetCodeGenInfo : public TargetCodeGenInfo { public: LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(new LanaiABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique(CGT)) {} }; } @@ -8436,7 +8468,7 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; @@ -8695,7 +8727,7 @@ namespace { class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo { public: SparcV8TargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new SparcV8ABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique(CGT)) {} }; } // end anonymous namespace @@ -8957,7 +8989,7 @@ namespace { class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo { public: SparcV9TargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new SparcV9ABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique(CGT)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 14; @@ -9050,7 +9082,7 @@ class ARCABIInfo : public DefaultABIInfo { class ARCTargetCodeGenInfo : public TargetCodeGenInfo { public: ARCTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new ARCABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique(CGT)) {} }; @@ -9243,7 +9275,7 @@ class XCoreTargetCodeGenInfo : public TargetCodeGenInfo { mutable TypeStringCache TSC; public: XCoreTargetCodeGenInfo(CodeGenTypes &CGT) - :TargetCodeGenInfo(new XCoreABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique(CGT)) {} void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; @@ -9430,7 +9462,7 @@ namespace { class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { public: SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique(CGT)) {} unsigned getOpenCLKernelCallingConv() const override; }; @@ -10219,7 +10251,7 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { public: RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) - : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen, FLen)) {} + : TargetCodeGenInfo(std::make_unique(CGT, XLen, FLen)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index e7c842bae4a98..85565475eee79 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -43,11 +43,10 @@ class CGFunctionInfo; /// codegeneration issues, like target-specific attributes, builtins and so /// on. class TargetCodeGenInfo { - ABIInfo *Info; + std::unique_ptr Info = nullptr; public: - // WARNING: Acquires the ownership of ABIInfo. - TargetCodeGenInfo(ABIInfo *info = nullptr) : Info(info) {} + TargetCodeGenInfo(std::unique_ptr Info) : Info(std::move(Info)) {} virtual ~TargetCodeGenInfo(); /// getABIInfo() - Returns ABI info helper for the target. diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 8b2a2eda49547..b5f02e5347f96 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1817,6 +1817,9 @@ void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const { // If configuration file was used, print its path. if (!ConfigFile.empty()) OS << "Configuration file: " << ConfigFile << '\n'; + + // Print the registered targets. + llvm::TargetRegistry::printRegisteredTargetsForVersion(OS); } /// PrintDiagnosticCategories - Implement the --print-diagnostic-categories diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 78a4d9486562c..c7760f0862847 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -117,6 +117,19 @@ static std::string describeSanitizeArg(const llvm::opt::Arg *A, /// Sanitizers set. static std::string toString(const clang::SanitizerSet &Sanitizers); +static void validateSpecialCaseListFormat(const Driver &D, + std::vector &SCLFiles, + unsigned MalformedSCLErrorDiagID) { + if (SCLFiles.empty()) + return; + + std::string BLError; + std::unique_ptr SCL( + llvm::SpecialCaseList::create(SCLFiles, D.getVFS(), BLError)); + if (!SCL.get()) + D.Diag(MalformedSCLErrorDiagID) << BLError; +} + static void addDefaultBlacklists(const Driver &D, SanitizerMask Kinds, std::vector &BlacklistFiles) { struct Blacklist { @@ -147,6 +160,8 @@ static void addDefaultBlacklists(const Driver &D, SanitizerMask Kinds, // should fail. D.Diag(clang::diag::err_drv_no_such_file) << Path; } + validateSpecialCaseListFormat( + D, BlacklistFiles, clang::diag::err_drv_malformed_sanitizer_blacklist); } /// Parse -f(no-)?sanitize-(coverage-)?(white|black)list argument's values, @@ -173,14 +188,7 @@ static void parseSpecialCaseListArg(const Driver &D, SCLFiles.clear(); } } - // Validate special case list format. - { - std::string BLError; - std::unique_ptr SCL( - llvm::SpecialCaseList::create(SCLFiles, D.getVFS(), BLError)); - if (!SCL.get()) - D.Diag(MalformedSCLErrorDiagID) << BLError; - } + validateSpecialCaseListFormat(D, SCLFiles, MalformedSCLErrorDiagID); } /// Sets group bits for every group that has at least one representative already @@ -566,16 +574,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, RecoverableKinds &= ~TrappingKinds; // Setup blacklist files. - // Add default blacklist from resource directory. - addDefaultBlacklists(D, Kinds, SystemBlacklistFiles); + // Add default blacklist from resource directory for activated sanitizers, and + // validate special case lists format. + if (!Args.hasArgNoClaim(options::OPT_fno_sanitize_blacklist)) + addDefaultBlacklists(D, Kinds, SystemBlacklistFiles); // Parse -f(no-)?sanitize-blacklist options. // This also validates special case lists format. - // Here, OptSpecifier() acts as a never-matching command-line argument. - // So, there is no way to append to system blacklist but it can be cleared. - parseSpecialCaseListArg(D, Args, SystemBlacklistFiles, OptSpecifier(), - options::OPT_fno_sanitize_blacklist, - clang::diag::err_drv_malformed_sanitizer_blacklist); parseSpecialCaseListArg(D, Args, UserBlacklistFiles, options::OPT_fsanitize_blacklist, options::OPT_fno_sanitize_blacklist, @@ -955,22 +960,24 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, if (TC.getTriple().isOSWindows() && needsUbsanRt()) { // Instruct the code generator to embed linker directives in the object file // that cause the required runtime libraries to be linked. - CmdArgs.push_back(Args.MakeArgString( - "--dependent-lib=" + TC.getCompilerRT(Args, "ubsan_standalone"))); + CmdArgs.push_back( + Args.MakeArgString("--dependent-lib=" + + TC.getCompilerRTBasename(Args, "ubsan_standalone"))); if (types::isCXX(InputType)) CmdArgs.push_back(Args.MakeArgString( - "--dependent-lib=" + TC.getCompilerRT(Args, "ubsan_standalone_cxx"))); + "--dependent-lib=" + + TC.getCompilerRTBasename(Args, "ubsan_standalone_cxx"))); } if (TC.getTriple().isOSWindows() && needsStatsRt()) { - CmdArgs.push_back(Args.MakeArgString("--dependent-lib=" + - TC.getCompilerRT(Args, "stats_client"))); + CmdArgs.push_back(Args.MakeArgString( + "--dependent-lib=" + TC.getCompilerRTBasename(Args, "stats_client"))); // The main executable must export the stats runtime. // FIXME: Only exporting from the main executable (e.g. based on whether the // translation unit defines main()) would save a little space, but having // multiple copies of the runtime shouldn't hurt. - CmdArgs.push_back(Args.MakeArgString("--dependent-lib=" + - TC.getCompilerRT(Args, "stats"))); + CmdArgs.push_back(Args.MakeArgString( + "--dependent-lib=" + TC.getCompilerRTBasename(Args, "stats"))); addIncludeLinkerOption(TC, Args, CmdArgs, "__sanitizer_stats_register"); } diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index dc8b05f2b1cce..955a8ee43b091 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -448,8 +448,9 @@ std::string ToolChain::getCompilerRTPath() const { return std::string(Path.str()); } -std::string ToolChain::getCompilerRT(const ArgList &Args, StringRef Component, - FileType Type) const { +std::string ToolChain::getCompilerRTBasename(const ArgList &Args, + StringRef Component, FileType Type, + bool AddArch) const { const llvm::Triple &TT = getTriple(); bool IsITANMSVCWindows = TT.isWindowsMSVCEnvironment() || TT.isWindowsItaniumEnvironment(); @@ -471,18 +472,32 @@ std::string ToolChain::getCompilerRT(const ArgList &Args, StringRef Component, break; } + std::string ArchAndEnv; + if (AddArch) { + StringRef Arch = getArchNameForCompilerRTLib(*this, Args); + const char *Env = TT.isAndroid() ? "-android" : ""; + ArchAndEnv = ("-" + Arch + Env).str(); + } + return (Prefix + Twine("clang_rt.") + Component + ArchAndEnv + Suffix).str(); +} + +std::string ToolChain::getCompilerRT(const ArgList &Args, StringRef Component, + FileType Type) const { + // Check for runtime files in the new layout without the architecture first. + std::string CRTBasename = + getCompilerRTBasename(Args, Component, Type, /*AddArch=*/false); for (const auto &LibPath : getLibraryPaths()) { SmallString<128> P(LibPath); - llvm::sys::path::append(P, Prefix + Twine("clang_rt.") + Component + Suffix); + llvm::sys::path::append(P, CRTBasename); if (getVFS().exists(P)) return std::string(P.str()); } - StringRef Arch = getArchNameForCompilerRTLib(*this, Args); - const char *Env = TT.isAndroid() ? "-android" : ""; + // Fall back to the old expected compiler-rt name if the new one does not + // exist. + CRTBasename = getCompilerRTBasename(Args, Component, Type, /*AddArch=*/true); SmallString<128> Path(getCompilerRTPath()); - llvm::sys::path::append(Path, Prefix + Twine("clang_rt.") + Component + "-" + - Arch + Env + Suffix); + llvm::sys::path::append(Path, CRTBasename); return std::string(Path.str()); } diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 4c034d40aaf4b..e71655bcbb979 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -409,6 +409,9 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, if (Args.hasArg(options::OPT_ffixed_x28)) Features.push_back("+reserve-x28"); + if (Args.hasArg(options::OPT_ffixed_x30)) + Features.push_back("+reserve-x30"); + if (Args.hasArg(options::OPT_fcall_saved_x8)) Features.push_back("+call-saved-x8"); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 6e6f1c1c3e830..b2b9658ea25ac 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -822,8 +822,8 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, CmdArgs.push_back("-fprofile-instrument=clang"); if (TC.getTriple().isWindowsMSVCEnvironment()) { // Add dependent lib for clang_rt.profile - CmdArgs.push_back(Args.MakeArgString("--dependent-lib=" + - TC.getCompilerRT(Args, "profile"))); + CmdArgs.push_back(Args.MakeArgString( + "--dependent-lib=" + TC.getCompilerRTBasename(Args, "profile"))); } } @@ -840,8 +840,9 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, } if (PGOGenArg) { if (TC.getTriple().isWindowsMSVCEnvironment()) { - CmdArgs.push_back(Args.MakeArgString("--dependent-lib=" + - TC.getCompilerRT(Args, "profile"))); + // Add dependent lib for clang_rt.profile + CmdArgs.push_back(Args.MakeArgString( + "--dependent-lib=" + TC.getCompilerRTBasename(Args, "profile"))); } if (PGOGenArg->getOption().matches( PGOGenerateArg ? options::OPT_fprofile_generate_EQ diff --git a/clang/lib/Driver/ToolChains/Hurd.cpp b/clang/lib/Driver/ToolChains/Hurd.cpp index ce1806c4043b1..3448e4f4a2943 100644 --- a/clang/lib/Driver/ToolChains/Hurd.cpp +++ b/clang/lib/Driver/ToolChains/Hurd.cpp @@ -63,7 +63,19 @@ static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) { Hurd::Hurd(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : Generic_ELF(D, Triple, Args) { + GCCInstallation.init(Triple, Args); + Multilibs = GCCInstallation.getMultilibs(); + SelectedMultilib = GCCInstallation.getMultilib(); std::string SysRoot = computeSysRoot(); + ToolChain::path_list &PPaths = getProgramPaths(); + + Generic_GCC::PushPPaths(PPaths); + + // The selection of paths to try here is designed to match the patterns which + // the GCC driver itself uses, as this is part of the GCC-compatible driver. + // This was determined by running GCC in a fake filesystem, creating all + // possible permutations of these directories, and seeing which ones it added + // to the link paths. path_list &Paths = getFilePaths(); const std::string OSLibDir = std::string(getOSLibDir(Triple, Args)); @@ -73,8 +85,11 @@ Hurd::Hurd(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) ExtraOpts.push_back("--build-id"); #endif - // If we are currently running Clang inside of the requested system root, add - // its parent library paths to those searched. + Generic_GCC::AddMultilibPaths(D, SysRoot, OSLibDir, MultiarchTriple, Paths); + + // Similar to the logic for GCC above, if we currently running Clang inside + // of the requested system root, add its parent library paths to + // those searched. // FIXME: It's not clear whether we should use the driver's installed // directory ('Dir' below) or the ResourceDir. if (StringRef(D.Dir).startswith(SysRoot)) { @@ -88,8 +103,11 @@ Hurd::Hurd(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) addPathIfExists(D, SysRoot + "/usr/lib/" + MultiarchTriple, Paths); addPathIfExists(D, SysRoot + "/usr/lib/../" + OSLibDir, Paths); - // If we are currently running Clang inside of the requested system root, add - // its parent library path to those searched. + Generic_GCC::AddMultiarchPaths(D, SysRoot, OSLibDir, Paths); + + // Similar to the logic for GCC above, if we are currently running Clang + // inside of the requested system root, add its parent library path to those + // searched. // FIXME: It's not clear whether we should use the driver's installed // directory ('Dir' below) or the ResourceDir. if (StringRef(D.Dir).startswith(SysRoot)) @@ -156,6 +174,9 @@ void Hurd::AddClangSystemIncludeArgs(const ArgList &DriverArgs, // Lacking those, try to detect the correct set of system includes for the // target triple. + + AddMultilibIncludeArgs(DriverArgs, CC1Args); + if (getTriple().getArch() == llvm::Triple::x86) { std::string Path = SysRoot + "/usr/include/i386-gnu"; if (D.getVFS().exists(Path)) diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 198800c437e37..2d4a604191ba3 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -398,6 +398,16 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString(std::string("-libpath:") + WindowsSdkLibPath)); } + // Add the compiler-rt library directories to libpath if they exist to help + // the linker find the various sanitizer, builtin, and profiling runtimes. + for (const auto &LibPath : TC.getLibraryPaths()) { + if (TC.getVFS().exists(LibPath)) + CmdArgs.push_back(Args.MakeArgString("-libpath:" + LibPath)); + } + auto CRTPath = TC.getCompilerRTPath(); + if (TC.getVFS().exists(CRTPath)) + CmdArgs.push_back(Args.MakeArgString("-libpath:" + CRTPath)); + if (!C.getDriver().IsCLMode() && Args.hasArg(options::OPT_L)) for (const auto &LibPath : Args.getAllArgValues(options::OPT_L)) CmdArgs.push_back(Args.MakeArgString("-libpath:" + LibPath)); diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index 6139764e46015..ce01c88162635 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -18,6 +18,7 @@ #include "llvm/Option/ArgList.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +#include "llvm/Support/VirtualFileSystem.h" #include using namespace clang::diag; @@ -198,6 +199,17 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_L); TC.AddFilePathLibArgs(Args, CmdArgs); + + // Add the compiler-rt library directories if they exist to help + // the linker find the various sanitizer, builtin, and profiling runtimes. + for (const auto &LibPath : TC.getLibraryPaths()) { + if (TC.getVFS().exists(LibPath)) + CmdArgs.push_back(Args.MakeArgString("-L" + LibPath)); + } + auto CRTPath = TC.getCompilerRTPath(); + if (TC.getVFS().exists(CRTPath)) + CmdArgs.push_back(Args.MakeArgString("-L" + CRTPath)); + AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); // TODO: Add profile stuff here diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index e70ae7efb0c3c..5fdb1dbc433ce 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1466,10 +1466,11 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, ParenState(&Current, NewIndent, LastSpace, AvoidBinPacking, NoLineBreak)); State.Stack.back().NestedBlockIndent = NestedBlockIndent; State.Stack.back().BreakBeforeParameter = BreakBeforeParameter; - State.Stack.back().HasMultipleNestedBlocks = (Current.BlockParameterCount > 1); + State.Stack.back().HasMultipleNestedBlocks = + (Current.BlockParameterCount > 1); - if (Style.BraceWrapping.BeforeLambdaBody && - Current.Next != nullptr && Current.Tok.is(tok::l_paren)) { + if (Style.BraceWrapping.BeforeLambdaBody && Current.Next != nullptr && + Current.Tok.is(tok::l_paren)) { // Search for any parameter that is a lambda FormatToken const *next = Current.Next; while (next != nullptr) { diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index eead2b4a520a3..961cb92ebd409 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -195,11 +195,13 @@ struct ScalarEnumerationTraits< static void enumeration(IO &IO, FormatStyle::BraceWrappingAfterControlStatementStyle &Value) { - IO.enumCase(Value, "false", FormatStyle::BWACS_Never); - IO.enumCase(Value, "true", FormatStyle::BWACS_Always); IO.enumCase(Value, "Never", FormatStyle::BWACS_Never); IO.enumCase(Value, "MultiLine", FormatStyle::BWACS_MultiLine); IO.enumCase(Value, "Always", FormatStyle::BWACS_Always); + + // For backward compatibility. + IO.enumCase(Value, "false", FormatStyle::BWACS_Never); + IO.enumCase(Value, "true", FormatStyle::BWACS_Always); } }; @@ -397,6 +399,8 @@ template <> struct MappingTraits { Style.AllowAllConstructorInitializersOnNextLine); IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", Style.AllowAllParametersOfDeclarationOnNextLine); + IO.mapOptional("AllowShortEnumsOnASingleLine", + Style.AllowShortEnumsOnASingleLine); IO.mapOptional("AllowShortBlocksOnASingleLine", Style.AllowShortBlocksOnASingleLine); IO.mapOptional("AllowShortCaseLabelsOnASingleLine", @@ -752,6 +756,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.AllowAllArgumentsOnNextLine = true; LLVMStyle.AllowAllConstructorInitializersOnNextLine = true; LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; + LLVMStyle.AllowShortEnumsOnASingleLine = true; LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; LLVMStyle.AllowShortBlocksOnASingleLine = FormatStyle::SBS_Never; LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; @@ -1137,6 +1142,7 @@ FormatStyle getMicrosoftStyle(FormatStyle::LanguageKind Language) { Style.BraceWrapping.BeforeCatch = true; Style.BraceWrapping.BeforeElse = true; Style.PenaltyReturnTypeOnItsOwnLine = 1000; + Style.AllowShortEnumsOnASingleLine = false; Style.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; Style.AllowShortCaseLabelsOnASingleLine = false; Style.AllowShortIfStatementsOnASingleLine = FormatStyle::SIS_Never; @@ -2622,6 +2628,7 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOpts.CPlusPlus14 = LexingStd >= FormatStyle::LS_Cpp14; LangOpts.CPlusPlus17 = LexingStd >= FormatStyle::LS_Cpp17; LangOpts.CPlusPlus20 = LexingStd >= FormatStyle::LS_Cpp20; + LangOpts.Char8 = LexingStd >= FormatStyle::LS_Cpp20; LangOpts.LineComment = 1; bool AlternativeOperators = Style.isCpp(); diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 6532f8108f08f..734dbdc1b6f31 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -395,7 +395,7 @@ class AnnotatingParser { if (!AttrTok) return false; - + // Allow an attribute to be the only content of a file. AttrTok = AttrTok->Next; if (!AttrTok) @@ -1852,6 +1852,10 @@ class AnnotatingParser { if (Style.Language == FormatStyle::LK_JavaScript) return TT_BinaryOperator; + // && in C# must be a binary operator. + if (Style.isCSharp() && Tok.is(tok::ampamp)) + return TT_BinaryOperator; + const FormatToken *PrevToken = Tok.getPreviousNonComment(); if (!PrevToken) return TT_UnaryOperator; @@ -2281,6 +2285,10 @@ static bool isFunctionDeclarationName(const FormatToken &Current, Next = Next->Next; continue; } + if (Next->is(TT_TemplateOpener) && Next->MatchingParen) { + Next = Next->MatchingParen; + continue; + } break; } @@ -2810,6 +2818,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, tok::l_square)); if (Right.is(tok::star) && Left.is(tok::l_paren)) return false; + if (Right.is(tok::star) && Left.is(tok::star)) + return false; if (Right.isOneOf(tok::star, tok::amp, tok::ampamp)) { const FormatToken *Previous = &Left; while (Previous && !Previous->is(tok::kw_operator)) { @@ -2837,9 +2847,10 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, // operator std::Foo*() // operator C::D*() // dependent on PointerAlignment style. - if (Previous && (Previous->endsSequence(tok::kw_operator) || - Previous->endsSequence(tok::kw_const, tok::kw_operator) || - Previous->endsSequence(tok::kw_volatile, tok::kw_operator))) + if (Previous && + (Previous->endsSequence(tok::kw_operator) || + Previous->endsSequence(tok::kw_const, tok::kw_operator) || + Previous->endsSequence(tok::kw_volatile, tok::kw_operator))) return (Style.PointerAlignment != FormatStyle::PAS_Left); } const auto SpaceRequiredForArrayInitializerLSquare = @@ -3052,6 +3063,10 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Left.is(TT_CSharpNullConditionalLSquare)) return Style.SpacesInSquareBrackets; + // space after var in `var (key, value)` + if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren)) + return true; + // space between keywords and paren e.g. "using (" if (Right.is(tok::l_paren)) if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when)) @@ -3259,12 +3274,13 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) + // Put a space between < and :: in vector< ::std::string > return (Left.is(TT_TemplateOpener) && - Style.Standard < FormatStyle::LS_Cpp11) || + (Style.Standard < FormatStyle::LS_Cpp11 || Style.SpacesInAngles)) || !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, - tok::kw___super, TT_TemplateCloser, - TT_TemplateOpener)) || - (Left.is(tok ::l_paren) && Style.SpacesInParentheses); + tok::kw___super, TT_TemplateOpener, + TT_TemplateCloser)) || + (Left.is(tok::l_paren) && Style.SpacesInParentheses); if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) return Style.SpacesInAngles; // Space before TT_StructuredBindingLSquare. @@ -3325,22 +3341,20 @@ static bool isOneChildWithoutMustBreakBefore(const FormatToken &Tok) { if (Tok.Children.size() != 1) return false; FormatToken *curElt = Tok.Children[0]->First; - while (curElt) { - if (curElt->MustBreakBefore) - return false; - curElt = curElt->Next; - } + while (curElt) { + if (curElt->MustBreakBefore) + return false; + curElt = curElt->Next; + } return true; } -static bool -isAllmanLambdaBrace(const FormatToken &Tok) { +static bool isAllmanLambdaBrace(const FormatToken &Tok) { return (Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block && - !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral)); + !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral)); } -static bool -isAllmanBraceIncludedBreakableLambda(const FormatToken &Tok, - FormatStyle::ShortLambdaStyle ShortLambdaOption) { +static bool isAllmanBraceIncludedBreakableLambda( + const FormatToken &Tok, FormatStyle::ShortLambdaStyle ShortLambdaOption) { if (!isAllmanLambdaBrace(Tok)) return false; @@ -3497,7 +3511,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, if (Style.BraceWrapping.BeforeLambdaBody && (isAllmanBraceIncludedBreakableLambda(Left, ShortLambdaOption) || isAllmanBraceIncludedBreakableLambda(Right, ShortLambdaOption))) { - return true; + return true; } if (isAllmanBrace(Left) || isAllmanBrace(Right)) @@ -3929,7 +3943,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, Right.isMemberAccess() || Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless, tok::colon, tok::l_square, tok::at) || - (Style.BraceWrapping.BeforeLambdaBody && Right.is(tok::l_brace)) || + (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) || (Left.is(tok::r_paren) && Right.isOneOf(tok::identifier, tok::kw_const)) || (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index c84c951fcaa85..96e0bd2276fa8 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "UnwrappedLineParser.h" +#include "FormatToken.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -1470,7 +1471,7 @@ void UnwrappedLineParser::parseStructuralElement() { } else if (Style.Language == FormatStyle::LK_Proto && FormatTok->Tok.is(tok::less)) { nextToken(); - parseBracedList(/*ContinueOnSemicolons=*/false, + parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, /*ClosingBraceKind=*/tok::greater); } break; @@ -1495,9 +1496,7 @@ bool UnwrappedLineParser::tryToParsePropertyAccessor() { if (FormatTok->Previous->isNot(tok::identifier)) return false; - // Try to parse the property accessor braces and contents: - // `{ get; set; } = new MyType(defaultValue);` - // ^^^^^^^^^^^^^ + // See if we are inside a property accessor. // // Record the current tokenPosition so that we can advance and // reset the current token. `Next` is not set yet so we need @@ -1505,7 +1504,11 @@ bool UnwrappedLineParser::tryToParsePropertyAccessor() { unsigned int StoredPosition = Tokens->getPosition(); FormatToken *Tok = Tokens->getNextToken(); + // A trivial property accessor is of the form: + // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] } + // Track these as they do not require line breaks to be introduced. bool HasGetOrSet = false; + bool IsTrivialPropertyAccessor = true; while (!eof()) { if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, @@ -1515,10 +1518,9 @@ bool UnwrappedLineParser::tryToParsePropertyAccessor() { Tok = Tokens->getNextToken(); continue; } - if (Tok->is(tok::r_brace)) - break; - Tokens->setPosition(StoredPosition); - return false; + if (Tok->isNot(tok::r_brace)) + IsTrivialPropertyAccessor = false; + break; } if (!HasGetOrSet) { @@ -1526,33 +1528,51 @@ bool UnwrappedLineParser::tryToParsePropertyAccessor() { return false; } + // Try to parse the property accessor: + // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties Tokens->setPosition(StoredPosition); - while (FormatTok->isNot(tok::r_brace)) { - nextToken(); - } - - // Try to parse (optional) assignment to default value: - // `{ get; set; } = new MyType(defaultValue);` - // ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - // There may be some very complicated expressions inside default value - // assignment, the simple parse block below will not handle them. - // The parse block below would need extending to handle opening parens etc. - StoredPosition = Tokens->getPosition(); - Tok = Tokens->getNextToken(); - bool NextTokenIsEqual = Tok->is(tok::equal); - Tokens->setPosition(StoredPosition); - - if (NextTokenIsEqual) { - do { + nextToken(); + do { + switch (FormatTok->Tok.getKind()) { + case tok::r_brace: nextToken(); - if (FormatTok->is(tok::semi)) + if (FormatTok->is(tok::equal)) { + while (!eof() && FormatTok->isNot(tok::semi)) + nextToken(); + nextToken(); + } + addUnwrappedLine(); + return true; + case tok::l_brace: + ++Line->Level; + parseBlock(/*MustBeDeclaration=*/true); + addUnwrappedLine(); + --Line->Level; + break; + case tok::equal: + if (FormatTok->is(TT_JsFatArrow)) { + ++Line->Level; + do { + nextToken(); + } while (!eof() && FormatTok->isNot(tok::semi)); + nextToken(); + addUnwrappedLine(); + --Line->Level; break; - } while (!eof()); - } + } + nextToken(); + break; + default: + if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) && + !IsTrivialPropertyAccessor) { + // Non-trivial get/set needs to be on its own line. + addUnwrappedLine(); + } + nextToken(); + } + } while (!eof()); - // Add an unwrapped line for the whole property accessor. - nextToken(); - addUnwrappedLine(); + // Unreachable for well-formed code (paired '{' and '}'). return true; } @@ -1626,6 +1646,7 @@ bool UnwrappedLineParser::tryToParseLambda() { case tok::lessequal: case tok::question: case tok::colon: + case tok::ellipsis: case tok::kw_true: case tok::kw_false: if (SeenArrow) { @@ -1724,9 +1745,10 @@ bool UnwrappedLineParser::tryToParseBracedList() { } bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, + bool IsEnum, tok::TokenKind ClosingBraceKind) { bool HasError = false; - + // FIXME: Once we have an expression parser in the UnwrappedLineParser, // replace this by using parseAssigmentExpression() inside. do { @@ -1764,6 +1786,8 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, } } if (FormatTok->Tok.getKind() == ClosingBraceKind) { + if (IsEnum && !Style.AllowShortEnumsOnASingleLine) + addUnwrappedLine(); nextToken(); return !HasError; } @@ -1800,7 +1824,7 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, case tok::less: if (Style.Language == FormatStyle::LK_Proto) { nextToken(); - parseBracedList(/*ContinueOnSemicolons=*/false, + parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, /*ClosingBraceKind=*/tok::greater); } else { nextToken(); @@ -1822,6 +1846,8 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, break; case tok::comma: nextToken(); + if (IsEnum && !Style.AllowShortEnumsOnASingleLine) + addUnwrappedLine(); break; default: nextToken(); @@ -2280,9 +2306,18 @@ bool UnwrappedLineParser::parseEnum() { return true; } + if (!Style.AllowShortEnumsOnASingleLine) + addUnwrappedLine(); // Parse enum body. nextToken(); - bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); + if (!Style.AllowShortEnumsOnASingleLine) { + addUnwrappedLine(); + Line->Level += 1; + } + bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, + /*IsEnum=*/true); + if (!Style.AllowShortEnumsOnASingleLine) + Line->Level -= 1; if (HasError) { if (FormatTok->is(tok::semi)) nextToken(); diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index c1fe1e3e1357f..8d4118ab6dc7d 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -98,7 +98,7 @@ class UnwrappedLineParser { void readTokenWithJavaScriptASI(); void parseStructuralElement(); bool tryToParseBracedList(); - bool parseBracedList(bool ContinueOnSemicolons = false, + bool parseBracedList(bool ContinueOnSemicolons = false, bool IsEnum = false, tok::TokenKind ClosingBraceKind = tok::r_brace); void parseParens(); void parseSquare(bool LambdaIntroducer = false); diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 7920aa2b5cbe1..57d025b7c32e7 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -224,7 +224,7 @@ struct ASTUnit::ASTWriterData { }; void ASTUnit::clearFileLevelDecls() { - llvm::DeleteContainerSeconds(FileDecls); + FileDecls.clear(); } /// After failing to build a precompiled preamble (due to @@ -2436,9 +2436,9 @@ void ASTUnit::addFileLevelDecl(Decl *D) { if (FID.isInvalid()) return; - LocDeclsTy *&Decls = FileDecls[FID]; + std::unique_ptr &Decls = FileDecls[FID]; if (!Decls) - Decls = new LocDeclsTy(); + Decls = std::make_unique(); std::pair LocDecl(Offset, D); diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h index 937f1683b544d..f583c215f919e 100644 --- a/clang/lib/Headers/bmiintrin.h +++ b/clang/lib/Headers/bmiintrin.h @@ -111,7 +111,8 @@ _mm_tzcnt_64(unsigned long long __X) #undef __RELAXED_FN_ATTRS -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__BMI__) /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) @@ -420,6 +421,7 @@ __blsr_u64(unsigned long long __X) #undef __DEFAULT_FN_ATTRS -#endif /* !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) */ +#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ + || defined(__BMI__) */ #endif /* __BMIINTRIN_H */ diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h index ebce647817885..6c38b578b30e5 100644 --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -24,6 +24,10 @@ #define signature_CYRIX_ebx 0x69727943 #define signature_CYRIX_edx 0x736e4978 #define signature_CYRIX_ecx 0x64616574 +/* HYGON: "HygonGenuine" */ +#define signature_HYGON_ebx 0x6f677948 +#define signature_HYGON_edx 0x6e65476e +#define signature_HYGON_ecx 0x656e6975 /* INTEL: "GenuineIntel" */ #define signature_INTEL_ebx 0x756e6547 #define signature_INTEL_edx 0x49656e69 diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 64841412155b8..dd27ca2f6605b 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -10,198 +10,231 @@ #ifndef __IMMINTRIN_H #define __IMMINTRIN_H -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__MMX__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SSE__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SSE2__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SSE3__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SSSE3__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__SSE4_2__) || defined(__SSE4_1__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AES__) || defined(__PCLMUL__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__CLFLUSHOPT__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__CLWB__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX2__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__F16C__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__VPCLMULQDQ__) #include #endif /* No feature check desired due to internal checks */ #include -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__BMI2__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__LZCNT__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__POPCNT__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__FMA__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512F__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512VL__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512BW__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512BITALG__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512CD__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512VPOPCNTDQ__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512VNNI__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512VNNI__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512DQ__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BITALG__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BW__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512CD__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512DQ__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512ER__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512IFMA__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512IFMA__) && defined(__AVX512VL__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512VBMI__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VBMI__) && defined(__AVX512VL__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512VBMI2__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512PF__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512BF16__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BF16__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__PKU__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__VAES__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__GFNI__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__RDPID__) /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). /// /// \headerfile @@ -213,7 +246,8 @@ _rdpid_u32(void) { } #endif // __RDPID__ -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__RDRND__) static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand16_step(unsigned short *__p) { @@ -235,7 +269,8 @@ _rdrand64_step(unsigned long long *__p) #endif #endif /* __RDRND__ */ -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__FSGSBASE__) #ifdef __x86_64__ static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _readfsbase_u32(void) @@ -288,7 +323,8 @@ _writegsbase_u64(unsigned long long __V) #endif #endif /* __FSGSBASE__ */ -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__MOVBE__) /* The structs used below are to force the load/store to be unaligned. This * is accomplished with the __packed__ attribute. The __may_alias__ prevents @@ -347,35 +383,42 @@ _storebe_i64(void * __P, long long __D) { #endif #endif /* __MOVBE */ -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__RTM__) #include #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SHA__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__FXSR__) #include #endif /* No feature check desired due to internal MSC_VER checks */ #include -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__XSAVEOPT__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__XSAVEC__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__XSAVES__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SHSTK__) #include #endif @@ -383,62 +426,73 @@ _storebe_i64(void * __P, long long __D) { * whereas others are also available at all times. */ #include -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__RDSEED__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__WBNOINVD__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__CLDEMOTE__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__WAITPKG__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ - defined(__MOVDIRI__) || defined(__MOVDIR64B__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__MOVDIRI__) || defined(__MOVDIR64B__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__PCONFIG__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SGX__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__PTWRITE__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__INVPCID__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ - defined(__AVX512VP2INTERSECT__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX512VP2INTERSECT__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__ENQCMD__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__ENQCMD__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SERIALIZE__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SERIALIZE__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__TSXLDTRK__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__TSXLDTRK__) #include #endif diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h index 6f9b609f2dfeb..e58c9769e8cb0 100644 --- a/clang/lib/Headers/vecintrin.h +++ b/clang/lib/Headers/vecintrin.h @@ -43,1281 +43,1341 @@ __lcbb(const void *__ptr, unsigned short __len) /*-- vec_extract ------------------------------------------------------------*/ static inline __ATTRS_o_ai signed char -vec_extract(vector signed char __vec, int __index) { +vec_extract(__vector signed char __vec, int __index) { return __vec[__index & 15]; } static inline __ATTRS_o_ai unsigned char -vec_extract(vector bool char __vec, int __index) { +vec_extract(__vector __bool char __vec, int __index) { return __vec[__index & 15]; } static inline __ATTRS_o_ai unsigned char -vec_extract(vector unsigned char __vec, int __index) { +vec_extract(__vector unsigned char __vec, int __index) { return __vec[__index & 15]; } static inline __ATTRS_o_ai signed short -vec_extract(vector signed short __vec, int __index) { +vec_extract(__vector signed short __vec, int __index) { return __vec[__index & 7]; } static inline __ATTRS_o_ai unsigned short -vec_extract(vector bool short __vec, int __index) { +vec_extract(__vector __bool short __vec, int __index) { return __vec[__index & 7]; } static inline __ATTRS_o_ai unsigned short -vec_extract(vector unsigned short __vec, int __index) { +vec_extract(__vector unsigned short __vec, int __index) { return __vec[__index & 7]; } static inline __ATTRS_o_ai signed int -vec_extract(vector signed int __vec, int __index) { +vec_extract(__vector signed int __vec, int __index) { return __vec[__index & 3]; } static inline __ATTRS_o_ai unsigned int -vec_extract(vector bool int __vec, int __index) { +vec_extract(__vector __bool int __vec, int __index) { return __vec[__index & 3]; } static inline __ATTRS_o_ai unsigned int -vec_extract(vector unsigned int __vec, int __index) { +vec_extract(__vector unsigned int __vec, int __index) { return __vec[__index & 3]; } static inline __ATTRS_o_ai signed long long -vec_extract(vector signed long long __vec, int __index) { +vec_extract(__vector signed long long __vec, int __index) { return __vec[__index & 1]; } static inline __ATTRS_o_ai unsigned long long -vec_extract(vector bool long long __vec, int __index) { +vec_extract(__vector __bool long long __vec, int __index) { return __vec[__index & 1]; } static inline __ATTRS_o_ai unsigned long long -vec_extract(vector unsigned long long __vec, int __index) { +vec_extract(__vector unsigned long long __vec, int __index) { return __vec[__index & 1]; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai float -vec_extract(vector float __vec, int __index) { +vec_extract(__vector float __vec, int __index) { return __vec[__index & 3]; } #endif static inline __ATTRS_o_ai double -vec_extract(vector double __vec, int __index) { +vec_extract(__vector double __vec, int __index) { return __vec[__index & 1]; } /*-- vec_insert -------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_insert(signed char __scalar, vector signed char __vec, int __index) { +static inline __ATTRS_o_ai __vector signed char +vec_insert(signed char __scalar, __vector signed char __vec, int __index) { __vec[__index & 15] = __scalar; return __vec; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_insert(unsigned char __scalar, vector bool char __vec, int __index) { - vector unsigned char __newvec = (vector unsigned char)__vec; +static inline __ATTRS_o_ai __vector unsigned char +vec_insert(unsigned char __scalar, __vector __bool char __vec, int __index) { + __vector unsigned char __newvec = (__vector unsigned char)__vec; __newvec[__index & 15] = (unsigned char)__scalar; return __newvec; } -static inline __ATTRS_o_ai vector unsigned char -vec_insert(unsigned char __scalar, vector unsigned char __vec, int __index) { +static inline __ATTRS_o_ai __vector unsigned char +vec_insert(unsigned char __scalar, __vector unsigned char __vec, int __index) { __vec[__index & 15] = __scalar; return __vec; } -static inline __ATTRS_o_ai vector signed short -vec_insert(signed short __scalar, vector signed short __vec, int __index) { +static inline __ATTRS_o_ai __vector signed short +vec_insert(signed short __scalar, __vector signed short __vec, int __index) { __vec[__index & 7] = __scalar; return __vec; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_insert(unsigned short __scalar, vector bool short __vec, int __index) { - vector unsigned short __newvec = (vector unsigned short)__vec; +static inline __ATTRS_o_ai __vector unsigned short +vec_insert(unsigned short __scalar, __vector __bool short __vec, + int __index) { + __vector unsigned short __newvec = (__vector unsigned short)__vec; __newvec[__index & 7] = (unsigned short)__scalar; return __newvec; } -static inline __ATTRS_o_ai vector unsigned short -vec_insert(unsigned short __scalar, vector unsigned short __vec, int __index) { +static inline __ATTRS_o_ai __vector unsigned short +vec_insert(unsigned short __scalar, __vector unsigned short __vec, + int __index) { __vec[__index & 7] = __scalar; return __vec; } -static inline __ATTRS_o_ai vector signed int -vec_insert(signed int __scalar, vector signed int __vec, int __index) { +static inline __ATTRS_o_ai __vector signed int +vec_insert(signed int __scalar, __vector signed int __vec, int __index) { __vec[__index & 3] = __scalar; return __vec; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_insert(unsigned int __scalar, vector bool int __vec, int __index) { - vector unsigned int __newvec = (vector unsigned int)__vec; +static inline __ATTRS_o_ai __vector unsigned int +vec_insert(unsigned int __scalar, __vector __bool int __vec, int __index) { + __vector unsigned int __newvec = (__vector unsigned int)__vec; __newvec[__index & 3] = __scalar; return __newvec; } -static inline __ATTRS_o_ai vector unsigned int -vec_insert(unsigned int __scalar, vector unsigned int __vec, int __index) { +static inline __ATTRS_o_ai __vector unsigned int +vec_insert(unsigned int __scalar, __vector unsigned int __vec, int __index) { __vec[__index & 3] = __scalar; return __vec; } -static inline __ATTRS_o_ai vector signed long long -vec_insert(signed long long __scalar, vector signed long long __vec, +static inline __ATTRS_o_ai __vector signed long long +vec_insert(signed long long __scalar, __vector signed long long __vec, int __index) { __vec[__index & 1] = __scalar; return __vec; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_insert(unsigned long long __scalar, vector bool long long __vec, +static inline __ATTRS_o_ai __vector unsigned long long +vec_insert(unsigned long long __scalar, __vector __bool long long __vec, int __index) { - vector unsigned long long __newvec = (vector unsigned long long)__vec; + __vector unsigned long long __newvec = (__vector unsigned long long)__vec; __newvec[__index & 1] = __scalar; return __newvec; } -static inline __ATTRS_o_ai vector unsigned long long -vec_insert(unsigned long long __scalar, vector unsigned long long __vec, +static inline __ATTRS_o_ai __vector unsigned long long +vec_insert(unsigned long long __scalar, __vector unsigned long long __vec, int __index) { __vec[__index & 1] = __scalar; return __vec; } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_insert(float __scalar, vector float __vec, int __index) { +static inline __ATTRS_o_ai __vector float +vec_insert(float __scalar, __vector float __vec, int __index) { __vec[__index & 1] = __scalar; return __vec; } #endif -static inline __ATTRS_o_ai vector double -vec_insert(double __scalar, vector double __vec, int __index) { +static inline __ATTRS_o_ai __vector double +vec_insert(double __scalar, __vector double __vec, int __index) { __vec[__index & 1] = __scalar; return __vec; } /*-- vec_promote ------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char +static inline __ATTRS_o_ai __vector signed char vec_promote(signed char __scalar, int __index) { - const vector signed char __zero = (vector signed char)0; - vector signed char __vec = __builtin_shufflevector(__zero, __zero, + const __vector signed char __zero = (__vector signed char)0; + __vector signed char __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); __vec[__index & 15] = __scalar; return __vec; } -static inline __ATTRS_o_ai vector unsigned char +static inline __ATTRS_o_ai __vector unsigned char vec_promote(unsigned char __scalar, int __index) { - const vector unsigned char __zero = (vector unsigned char)0; - vector unsigned char __vec = __builtin_shufflevector(__zero, __zero, + const __vector unsigned char __zero = (__vector unsigned char)0; + __vector unsigned char __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); __vec[__index & 15] = __scalar; return __vec; } -static inline __ATTRS_o_ai vector signed short +static inline __ATTRS_o_ai __vector signed short vec_promote(signed short __scalar, int __index) { - const vector signed short __zero = (vector signed short)0; - vector signed short __vec = __builtin_shufflevector(__zero, __zero, + const __vector signed short __zero = (__vector signed short)0; + __vector signed short __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1); __vec[__index & 7] = __scalar; return __vec; } -static inline __ATTRS_o_ai vector unsigned short +static inline __ATTRS_o_ai __vector unsigned short vec_promote(unsigned short __scalar, int __index) { - const vector unsigned short __zero = (vector unsigned short)0; - vector unsigned short __vec = __builtin_shufflevector(__zero, __zero, + const __vector unsigned short __zero = (__vector unsigned short)0; + __vector unsigned short __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1, -1, -1, -1, -1); __vec[__index & 7] = __scalar; return __vec; } -static inline __ATTRS_o_ai vector signed int +static inline __ATTRS_o_ai __vector signed int vec_promote(signed int __scalar, int __index) { - const vector signed int __zero = (vector signed int)0; - vector signed int __vec = __builtin_shufflevector(__zero, __zero, - -1, -1, -1, -1); + const __vector signed int __zero = (__vector signed int)0; + __vector signed int __vec = __builtin_shufflevector(__zero, __zero, + -1, -1, -1, -1); __vec[__index & 3] = __scalar; return __vec; } -static inline __ATTRS_o_ai vector unsigned int +static inline __ATTRS_o_ai __vector unsigned int vec_promote(unsigned int __scalar, int __index) { - const vector unsigned int __zero = (vector unsigned int)0; - vector unsigned int __vec = __builtin_shufflevector(__zero, __zero, - -1, -1, -1, -1); + const __vector unsigned int __zero = (__vector unsigned int)0; + __vector unsigned int __vec = __builtin_shufflevector(__zero, __zero, + -1, -1, -1, -1); __vec[__index & 3] = __scalar; return __vec; } -static inline __ATTRS_o_ai vector signed long long +static inline __ATTRS_o_ai __vector signed long long vec_promote(signed long long __scalar, int __index) { - const vector signed long long __zero = (vector signed long long)0; - vector signed long long __vec = __builtin_shufflevector(__zero, __zero, - -1, -1); + const __vector signed long long __zero = (__vector signed long long)0; + __vector signed long long __vec = __builtin_shufflevector(__zero, __zero, + -1, -1); __vec[__index & 1] = __scalar; return __vec; } -static inline __ATTRS_o_ai vector unsigned long long +static inline __ATTRS_o_ai __vector unsigned long long vec_promote(unsigned long long __scalar, int __index) { - const vector unsigned long long __zero = (vector unsigned long long)0; - vector unsigned long long __vec = __builtin_shufflevector(__zero, __zero, - -1, -1); + const __vector unsigned long long __zero = (__vector unsigned long long)0; + __vector unsigned long long __vec = __builtin_shufflevector(__zero, __zero, + -1, -1); __vec[__index & 1] = __scalar; return __vec; } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float +static inline __ATTRS_o_ai __vector float vec_promote(float __scalar, int __index) { - const vector float __zero = (vector float)0.0f; - vector float __vec = __builtin_shufflevector(__zero, __zero, -1, -1, -1, -1); + const __vector float __zero = (__vector float)0.0f; + __vector float __vec = __builtin_shufflevector(__zero, __zero, + -1, -1, -1, -1); __vec[__index & 3] = __scalar; return __vec; } #endif -static inline __ATTRS_o_ai vector double +static inline __ATTRS_o_ai __vector double vec_promote(double __scalar, int __index) { - const vector double __zero = (vector double)0.0; - vector double __vec = __builtin_shufflevector(__zero, __zero, -1, -1); + const __vector double __zero = (__vector double)0.0; + __vector double __vec = __builtin_shufflevector(__zero, __zero, -1, -1); __vec[__index & 1] = __scalar; return __vec; } /*-- vec_insert_and_zero ----------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char +static inline __ATTRS_o_ai __vector signed char vec_insert_and_zero(const signed char *__ptr) { - vector signed char __vec = (vector signed char)0; + __vector signed char __vec = (__vector signed char)0; __vec[7] = *__ptr; return __vec; } -static inline __ATTRS_o_ai vector unsigned char +static inline __ATTRS_o_ai __vector unsigned char vec_insert_and_zero(const unsigned char *__ptr) { - vector unsigned char __vec = (vector unsigned char)0; + __vector unsigned char __vec = (__vector unsigned char)0; __vec[7] = *__ptr; return __vec; } -static inline __ATTRS_o_ai vector signed short +static inline __ATTRS_o_ai __vector signed short vec_insert_and_zero(const signed short *__ptr) { - vector signed short __vec = (vector signed short)0; + __vector signed short __vec = (__vector signed short)0; __vec[3] = *__ptr; return __vec; } -static inline __ATTRS_o_ai vector unsigned short +static inline __ATTRS_o_ai __vector unsigned short vec_insert_and_zero(const unsigned short *__ptr) { - vector unsigned short __vec = (vector unsigned short)0; + __vector unsigned short __vec = (__vector unsigned short)0; __vec[3] = *__ptr; return __vec; } -static inline __ATTRS_o_ai vector signed int +static inline __ATTRS_o_ai __vector signed int vec_insert_and_zero(const signed int *__ptr) { - vector signed int __vec = (vector signed int)0; + __vector signed int __vec = (__vector signed int)0; __vec[1] = *__ptr; return __vec; } -static inline __ATTRS_o_ai vector unsigned int +static inline __ATTRS_o_ai __vector unsigned int vec_insert_and_zero(const unsigned int *__ptr) { - vector unsigned int __vec = (vector unsigned int)0; + __vector unsigned int __vec = (__vector unsigned int)0; __vec[1] = *__ptr; return __vec; } -static inline __ATTRS_o_ai vector signed long long +static inline __ATTRS_o_ai __vector signed long long vec_insert_and_zero(const signed long long *__ptr) { - vector signed long long __vec = (vector signed long long)0; + __vector signed long long __vec = (__vector signed long long)0; __vec[0] = *__ptr; return __vec; } -static inline __ATTRS_o_ai vector unsigned long long +static inline __ATTRS_o_ai __vector unsigned long long vec_insert_and_zero(const unsigned long long *__ptr) { - vector unsigned long long __vec = (vector unsigned long long)0; + __vector unsigned long long __vec = (__vector unsigned long long)0; __vec[0] = *__ptr; return __vec; } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float +static inline __ATTRS_o_ai __vector float vec_insert_and_zero(const float *__ptr) { - vector float __vec = (vector float)0.0f; + __vector float __vec = (__vector float)0.0f; __vec[1] = *__ptr; return __vec; } #endif -static inline __ATTRS_o_ai vector double +static inline __ATTRS_o_ai __vector double vec_insert_and_zero(const double *__ptr) { - vector double __vec = (vector double)0.0; + __vector double __vec = (__vector double)0.0; __vec[0] = *__ptr; return __vec; } /*-- vec_perm ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_perm(vector signed char __a, vector signed char __b, - vector unsigned char __c) { - return (vector signed char)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector signed char +vec_perm(__vector signed char __a, __vector signed char __b, + __vector unsigned char __c) { + return (__vector signed char)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } -static inline __ATTRS_o_ai vector unsigned char -vec_perm(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { - return (vector unsigned char)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector unsigned char +vec_perm(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { + return (__vector unsigned char)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } -static inline __ATTRS_o_ai vector bool char -vec_perm(vector bool char __a, vector bool char __b, - vector unsigned char __c) { - return (vector bool char)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector __bool char +vec_perm(__vector __bool char __a, __vector __bool char __b, + __vector unsigned char __c) { + return (__vector __bool char)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } -static inline __ATTRS_o_ai vector signed short -vec_perm(vector signed short __a, vector signed short __b, - vector unsigned char __c) { - return (vector signed short)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector signed short +vec_perm(__vector signed short __a, __vector signed short __b, + __vector unsigned char __c) { + return (__vector signed short)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } -static inline __ATTRS_o_ai vector unsigned short -vec_perm(vector unsigned short __a, vector unsigned short __b, - vector unsigned char __c) { - return (vector unsigned short)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector unsigned short +vec_perm(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned char __c) { + return (__vector unsigned short)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } -static inline __ATTRS_o_ai vector bool short -vec_perm(vector bool short __a, vector bool short __b, - vector unsigned char __c) { - return (vector bool short)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector __bool short +vec_perm(__vector __bool short __a, __vector __bool short __b, + __vector unsigned char __c) { + return (__vector __bool short)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } -static inline __ATTRS_o_ai vector signed int -vec_perm(vector signed int __a, vector signed int __b, - vector unsigned char __c) { - return (vector signed int)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector signed int +vec_perm(__vector signed int __a, __vector signed int __b, + __vector unsigned char __c) { + return (__vector signed int)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } -static inline __ATTRS_o_ai vector unsigned int -vec_perm(vector unsigned int __a, vector unsigned int __b, - vector unsigned char __c) { - return (vector unsigned int)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector unsigned int +vec_perm(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned char __c) { + return (__vector unsigned int)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } -static inline __ATTRS_o_ai vector bool int -vec_perm(vector bool int __a, vector bool int __b, - vector unsigned char __c) { - return (vector bool int)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector __bool int +vec_perm(__vector __bool int __a, __vector __bool int __b, + __vector unsigned char __c) { + return (__vector __bool int)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } -static inline __ATTRS_o_ai vector signed long long -vec_perm(vector signed long long __a, vector signed long long __b, - vector unsigned char __c) { - return (vector signed long long)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector signed long long +vec_perm(__vector signed long long __a, __vector signed long long __b, + __vector unsigned char __c) { + return (__vector signed long long)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } -static inline __ATTRS_o_ai vector unsigned long long -vec_perm(vector unsigned long long __a, vector unsigned long long __b, - vector unsigned char __c) { - return (vector unsigned long long)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector unsigned long long +vec_perm(__vector unsigned long long __a, __vector unsigned long long __b, + __vector unsigned char __c) { + return (__vector unsigned long long)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } -static inline __ATTRS_o_ai vector bool long long -vec_perm(vector bool long long __a, vector bool long long __b, - vector unsigned char __c) { - return (vector bool long long)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector __bool long long +vec_perm(__vector __bool long long __a, __vector __bool long long __b, + __vector unsigned char __c) { + return (__vector __bool long long)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_perm(vector float __a, vector float __b, - vector unsigned char __c) { - return (vector float)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector float +vec_perm(__vector float __a, __vector float __b, + __vector unsigned char __c) { + return (__vector float)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } #endif -static inline __ATTRS_o_ai vector double -vec_perm(vector double __a, vector double __b, - vector unsigned char __c) { - return (vector double)__builtin_s390_vperm( - (vector unsigned char)__a, (vector unsigned char)__b, __c); +static inline __ATTRS_o_ai __vector double +vec_perm(__vector double __a, __vector double __b, + __vector unsigned char __c) { + return (__vector double)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } /*-- vec_permi --------------------------------------------------------------*/ // This prototype is deprecated. -extern __ATTRS_o vector signed long long -vec_permi(vector signed long long __a, vector signed long long __b, int __c) +extern __ATTRS_o __vector signed long long +vec_permi(__vector signed long long __a, __vector signed long long __b, + int __c) __constant_range(__c, 0, 3); // This prototype is deprecated. -extern __ATTRS_o vector unsigned long long -vec_permi(vector unsigned long long __a, vector unsigned long long __b, int __c) +extern __ATTRS_o __vector unsigned long long +vec_permi(__vector unsigned long long __a, __vector unsigned long long __b, + int __c) __constant_range(__c, 0, 3); // This prototype is deprecated. -extern __ATTRS_o vector bool long long -vec_permi(vector bool long long __a, vector bool long long __b, int __c) +extern __ATTRS_o __vector __bool long long +vec_permi(__vector __bool long long __a, __vector __bool long long __b, + int __c) __constant_range(__c, 0, 3); // This prototype is deprecated. -extern __ATTRS_o vector double -vec_permi(vector double __a, vector double __b, int __c) +extern __ATTRS_o __vector double +vec_permi(__vector double __a, __vector double __b, int __c) __constant_range(__c, 0, 3); #define vec_permi(X, Y, Z) ((__typeof__((vec_permi)((X), (Y), (Z)))) \ - __builtin_s390_vpdi((vector unsigned long long)(X), \ - (vector unsigned long long)(Y), \ + __builtin_s390_vpdi((__vector unsigned long long)(X), \ + (__vector unsigned long long)(Y), \ (((Z) & 2) << 1) | ((Z) & 1))) /*-- vec_bperm_u128 ---------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_ai vector unsigned long long -vec_bperm_u128(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_ai __vector unsigned long long +vec_bperm_u128(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vbperm(__a, __b); } #endif /*-- vec_revb ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed short -vec_revb(vector signed short __vec) { - return (vector signed short) - __builtin_s390_vlbrh((vector unsigned short)__vec); +static inline __ATTRS_o_ai __vector signed short +vec_revb(__vector signed short __vec) { + return (__vector signed short) + __builtin_s390_vlbrh((__vector unsigned short)__vec); } -static inline __ATTRS_o_ai vector unsigned short -vec_revb(vector unsigned short __vec) { +static inline __ATTRS_o_ai __vector unsigned short +vec_revb(__vector unsigned short __vec) { return __builtin_s390_vlbrh(__vec); } -static inline __ATTRS_o_ai vector signed int -vec_revb(vector signed int __vec) { - return (vector signed int) - __builtin_s390_vlbrf((vector unsigned int)__vec); +static inline __ATTRS_o_ai __vector signed int +vec_revb(__vector signed int __vec) { + return (__vector signed int) + __builtin_s390_vlbrf((__vector unsigned int)__vec); } -static inline __ATTRS_o_ai vector unsigned int -vec_revb(vector unsigned int __vec) { +static inline __ATTRS_o_ai __vector unsigned int +vec_revb(__vector unsigned int __vec) { return __builtin_s390_vlbrf(__vec); } -static inline __ATTRS_o_ai vector signed long long -vec_revb(vector signed long long __vec) { - return (vector signed long long) - __builtin_s390_vlbrg((vector unsigned long long)__vec); +static inline __ATTRS_o_ai __vector signed long long +vec_revb(__vector signed long long __vec) { + return (__vector signed long long) + __builtin_s390_vlbrg((__vector unsigned long long)__vec); } -static inline __ATTRS_o_ai vector unsigned long long -vec_revb(vector unsigned long long __vec) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_revb(__vector unsigned long long __vec) { return __builtin_s390_vlbrg(__vec); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_revb(vector float __vec) { - return (vector float) - __builtin_s390_vlbrf((vector unsigned int)__vec); +static inline __ATTRS_o_ai __vector float +vec_revb(__vector float __vec) { + return (__vector float) + __builtin_s390_vlbrf((__vector unsigned int)__vec); } #endif -static inline __ATTRS_o_ai vector double -vec_revb(vector double __vec) { - return (vector double) - __builtin_s390_vlbrg((vector unsigned long long)__vec); +static inline __ATTRS_o_ai __vector double +vec_revb(__vector double __vec) { + return (__vector double) + __builtin_s390_vlbrg((__vector unsigned long long)__vec); } /*-- vec_reve ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_reve(vector signed char __vec) { - return (vector signed char) { __vec[15], __vec[14], __vec[13], __vec[12], - __vec[11], __vec[10], __vec[9], __vec[8], - __vec[7], __vec[6], __vec[5], __vec[4], - __vec[3], __vec[2], __vec[1], __vec[0] }; -} - -static inline __ATTRS_o_ai vector unsigned char -vec_reve(vector unsigned char __vec) { - return (vector unsigned char) { __vec[15], __vec[14], __vec[13], __vec[12], +static inline __ATTRS_o_ai __vector signed char +vec_reve(__vector signed char __vec) { + return (__vector signed char) { __vec[15], __vec[14], __vec[13], __vec[12], __vec[11], __vec[10], __vec[9], __vec[8], __vec[7], __vec[6], __vec[5], __vec[4], __vec[3], __vec[2], __vec[1], __vec[0] }; } -static inline __ATTRS_o_ai vector bool char -vec_reve(vector bool char __vec) { - return (vector bool char) { __vec[15], __vec[14], __vec[13], __vec[12], - __vec[11], __vec[10], __vec[9], __vec[8], - __vec[7], __vec[6], __vec[5], __vec[4], - __vec[3], __vec[2], __vec[1], __vec[0] }; +static inline __ATTRS_o_ai __vector unsigned char +vec_reve(__vector unsigned char __vec) { + return (__vector unsigned char) { __vec[15], __vec[14], __vec[13], __vec[12], + __vec[11], __vec[10], __vec[9], __vec[8], + __vec[7], __vec[6], __vec[5], __vec[4], + __vec[3], __vec[2], __vec[1], __vec[0] }; } -static inline __ATTRS_o_ai vector signed short -vec_reve(vector signed short __vec) { - return (vector signed short) { __vec[7], __vec[6], __vec[5], __vec[4], - __vec[3], __vec[2], __vec[1], __vec[0] }; +static inline __ATTRS_o_ai __vector __bool char +vec_reve(__vector __bool char __vec) { + return (__vector __bool char) { __vec[15], __vec[14], __vec[13], __vec[12], + __vec[11], __vec[10], __vec[9], __vec[8], + __vec[7], __vec[6], __vec[5], __vec[4], + __vec[3], __vec[2], __vec[1], __vec[0] }; } -static inline __ATTRS_o_ai vector unsigned short -vec_reve(vector unsigned short __vec) { - return (vector unsigned short) { __vec[7], __vec[6], __vec[5], __vec[4], +static inline __ATTRS_o_ai __vector signed short +vec_reve(__vector signed short __vec) { + return (__vector signed short) { __vec[7], __vec[6], __vec[5], __vec[4], __vec[3], __vec[2], __vec[1], __vec[0] }; } -static inline __ATTRS_o_ai vector bool short -vec_reve(vector bool short __vec) { - return (vector bool short) { __vec[7], __vec[6], __vec[5], __vec[4], - __vec[3], __vec[2], __vec[1], __vec[0] }; +static inline __ATTRS_o_ai __vector unsigned short +vec_reve(__vector unsigned short __vec) { + return (__vector unsigned short) { __vec[7], __vec[6], __vec[5], __vec[4], + __vec[3], __vec[2], __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai __vector __bool short +vec_reve(__vector __bool short __vec) { + return (__vector __bool short) { __vec[7], __vec[6], __vec[5], __vec[4], + __vec[3], __vec[2], __vec[1], __vec[0] }; } -static inline __ATTRS_o_ai vector signed int -vec_reve(vector signed int __vec) { - return (vector signed int) { __vec[3], __vec[2], __vec[1], __vec[0] }; +static inline __ATTRS_o_ai __vector signed int +vec_reve(__vector signed int __vec) { + return (__vector signed int) { __vec[3], __vec[2], __vec[1], __vec[0] }; } -static inline __ATTRS_o_ai vector unsigned int -vec_reve(vector unsigned int __vec) { - return (vector unsigned int) { __vec[3], __vec[2], __vec[1], __vec[0] }; +static inline __ATTRS_o_ai __vector unsigned int +vec_reve(__vector unsigned int __vec) { + return (__vector unsigned int) { __vec[3], __vec[2], __vec[1], __vec[0] }; } -static inline __ATTRS_o_ai vector bool int -vec_reve(vector bool int __vec) { - return (vector bool int) { __vec[3], __vec[2], __vec[1], __vec[0] }; +static inline __ATTRS_o_ai __vector __bool int +vec_reve(__vector __bool int __vec) { + return (__vector __bool int) { __vec[3], __vec[2], __vec[1], __vec[0] }; } -static inline __ATTRS_o_ai vector signed long long -vec_reve(vector signed long long __vec) { - return (vector signed long long) { __vec[1], __vec[0] }; +static inline __ATTRS_o_ai __vector signed long long +vec_reve(__vector signed long long __vec) { + return (__vector signed long long) { __vec[1], __vec[0] }; } -static inline __ATTRS_o_ai vector unsigned long long -vec_reve(vector unsigned long long __vec) { - return (vector unsigned long long) { __vec[1], __vec[0] }; +static inline __ATTRS_o_ai __vector unsigned long long +vec_reve(__vector unsigned long long __vec) { + return (__vector unsigned long long) { __vec[1], __vec[0] }; } -static inline __ATTRS_o_ai vector bool long long -vec_reve(vector bool long long __vec) { - return (vector bool long long) { __vec[1], __vec[0] }; +static inline __ATTRS_o_ai __vector __bool long long +vec_reve(__vector __bool long long __vec) { + return (__vector __bool long long) { __vec[1], __vec[0] }; } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_reve(vector float __vec) { - return (vector float) { __vec[3], __vec[2], __vec[1], __vec[0] }; +static inline __ATTRS_o_ai __vector float +vec_reve(__vector float __vec) { + return (__vector float) { __vec[3], __vec[2], __vec[1], __vec[0] }; } #endif -static inline __ATTRS_o_ai vector double -vec_reve(vector double __vec) { - return (vector double) { __vec[1], __vec[0] }; +static inline __ATTRS_o_ai __vector double +vec_reve(__vector double __vec) { + return (__vector double) { __vec[1], __vec[0] }; } /*-- vec_sel ----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_sel(vector signed char __a, vector signed char __b, - vector unsigned char __c) { - return ((vector signed char)__c & __b) | (~(vector signed char)__c & __a); +static inline __ATTRS_o_ai __vector signed char +vec_sel(__vector signed char __a, __vector signed char __b, + __vector unsigned char __c) { + return (((__vector signed char)__c & __b) | + (~(__vector signed char)__c & __a)); } -static inline __ATTRS_o_ai vector signed char -vec_sel(vector signed char __a, vector signed char __b, vector bool char __c) { - return ((vector signed char)__c & __b) | (~(vector signed char)__c & __a); +static inline __ATTRS_o_ai __vector signed char +vec_sel(__vector signed char __a, __vector signed char __b, + __vector __bool char __c) { + return (((__vector signed char)__c & __b) | + (~(__vector signed char)__c & __a)); } -static inline __ATTRS_o_ai vector bool char -vec_sel(vector bool char __a, vector bool char __b, vector unsigned char __c) { - return ((vector bool char)__c & __b) | (~(vector bool char)__c & __a); +static inline __ATTRS_o_ai __vector __bool char +vec_sel(__vector __bool char __a, __vector __bool char __b, + __vector unsigned char __c) { + return (((__vector __bool char)__c & __b) | + (~(__vector __bool char)__c & __a)); } -static inline __ATTRS_o_ai vector bool char -vec_sel(vector bool char __a, vector bool char __b, vector bool char __c) { +static inline __ATTRS_o_ai __vector __bool char +vec_sel(__vector __bool char __a, __vector __bool char __b, + __vector __bool char __c) { return (__c & __b) | (~__c & __a); } -static inline __ATTRS_o_ai vector unsigned char -vec_sel(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { +static inline __ATTRS_o_ai __vector unsigned char +vec_sel(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { return (__c & __b) | (~__c & __a); } -static inline __ATTRS_o_ai vector unsigned char -vec_sel(vector unsigned char __a, vector unsigned char __b, - vector bool char __c) { - return ((vector unsigned char)__c & __b) | (~(vector unsigned char)__c & __a); +static inline __ATTRS_o_ai __vector unsigned char +vec_sel(__vector unsigned char __a, __vector unsigned char __b, + __vector __bool char __c) { + return (((__vector unsigned char)__c & __b) | + (~(__vector unsigned char)__c & __a)); } -static inline __ATTRS_o_ai vector signed short -vec_sel(vector signed short __a, vector signed short __b, - vector unsigned short __c) { - return ((vector signed short)__c & __b) | (~(vector signed short)__c & __a); +static inline __ATTRS_o_ai __vector signed short +vec_sel(__vector signed short __a, __vector signed short __b, + __vector unsigned short __c) { + return (((__vector signed short)__c & __b) | + (~(__vector signed short)__c & __a)); } -static inline __ATTRS_o_ai vector signed short -vec_sel(vector signed short __a, vector signed short __b, - vector bool short __c) { - return ((vector signed short)__c & __b) | (~(vector signed short)__c & __a); +static inline __ATTRS_o_ai __vector signed short +vec_sel(__vector signed short __a, __vector signed short __b, + __vector __bool short __c) { + return (((__vector signed short)__c & __b) | + (~(__vector signed short)__c & __a)); } -static inline __ATTRS_o_ai vector bool short -vec_sel(vector bool short __a, vector bool short __b, - vector unsigned short __c) { - return ((vector bool short)__c & __b) | (~(vector bool short)__c & __a); +static inline __ATTRS_o_ai __vector __bool short +vec_sel(__vector __bool short __a, __vector __bool short __b, + __vector unsigned short __c) { + return (((__vector __bool short)__c & __b) | + (~(__vector __bool short)__c & __a)); } -static inline __ATTRS_o_ai vector bool short -vec_sel(vector bool short __a, vector bool short __b, vector bool short __c) { +static inline __ATTRS_o_ai __vector __bool short +vec_sel(__vector __bool short __a, __vector __bool short __b, + __vector __bool short __c) { return (__c & __b) | (~__c & __a); } -static inline __ATTRS_o_ai vector unsigned short -vec_sel(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c) { +static inline __ATTRS_o_ai __vector unsigned short +vec_sel(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c) { return (__c & __b) | (~__c & __a); } -static inline __ATTRS_o_ai vector unsigned short -vec_sel(vector unsigned short __a, vector unsigned short __b, - vector bool short __c) { - return (((vector unsigned short)__c & __b) | - (~(vector unsigned short)__c & __a)); +static inline __ATTRS_o_ai __vector unsigned short +vec_sel(__vector unsigned short __a, __vector unsigned short __b, + __vector __bool short __c) { + return (((__vector unsigned short)__c & __b) | + (~(__vector unsigned short)__c & __a)); } -static inline __ATTRS_o_ai vector signed int -vec_sel(vector signed int __a, vector signed int __b, - vector unsigned int __c) { - return ((vector signed int)__c & __b) | (~(vector signed int)__c & __a); +static inline __ATTRS_o_ai __vector signed int +vec_sel(__vector signed int __a, __vector signed int __b, + __vector unsigned int __c) { + return (((__vector signed int)__c & __b) | + (~(__vector signed int)__c & __a)); } -static inline __ATTRS_o_ai vector signed int -vec_sel(vector signed int __a, vector signed int __b, vector bool int __c) { - return ((vector signed int)__c & __b) | (~(vector signed int)__c & __a); +static inline __ATTRS_o_ai __vector signed int +vec_sel(__vector signed int __a, __vector signed int __b, + __vector __bool int __c) { + return (((__vector signed int)__c & __b) | + (~(__vector signed int)__c & __a)); } -static inline __ATTRS_o_ai vector bool int -vec_sel(vector bool int __a, vector bool int __b, vector unsigned int __c) { - return ((vector bool int)__c & __b) | (~(vector bool int)__c & __a); +static inline __ATTRS_o_ai __vector __bool int +vec_sel(__vector __bool int __a, __vector __bool int __b, + __vector unsigned int __c) { + return (((__vector __bool int)__c & __b) | + (~(__vector __bool int)__c & __a)); } -static inline __ATTRS_o_ai vector bool int -vec_sel(vector bool int __a, vector bool int __b, vector bool int __c) { +static inline __ATTRS_o_ai __vector __bool int +vec_sel(__vector __bool int __a, __vector __bool int __b, + __vector __bool int __c) { return (__c & __b) | (~__c & __a); } -static inline __ATTRS_o_ai vector unsigned int -vec_sel(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c) { +static inline __ATTRS_o_ai __vector unsigned int +vec_sel(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c) { return (__c & __b) | (~__c & __a); } -static inline __ATTRS_o_ai vector unsigned int -vec_sel(vector unsigned int __a, vector unsigned int __b, vector bool int __c) { - return ((vector unsigned int)__c & __b) | (~(vector unsigned int)__c & __a); +static inline __ATTRS_o_ai __vector unsigned int +vec_sel(__vector unsigned int __a, __vector unsigned int __b, + __vector __bool int __c) { + return (((__vector unsigned int)__c & __b) | + (~(__vector unsigned int)__c & __a)); } -static inline __ATTRS_o_ai vector signed long long -vec_sel(vector signed long long __a, vector signed long long __b, - vector unsigned long long __c) { - return (((vector signed long long)__c & __b) | - (~(vector signed long long)__c & __a)); +static inline __ATTRS_o_ai __vector signed long long +vec_sel(__vector signed long long __a, __vector signed long long __b, + __vector unsigned long long __c) { + return (((__vector signed long long)__c & __b) | + (~(__vector signed long long)__c & __a)); } -static inline __ATTRS_o_ai vector signed long long -vec_sel(vector signed long long __a, vector signed long long __b, - vector bool long long __c) { - return (((vector signed long long)__c & __b) | - (~(vector signed long long)__c & __a)); +static inline __ATTRS_o_ai __vector signed long long +vec_sel(__vector signed long long __a, __vector signed long long __b, + __vector __bool long long __c) { + return (((__vector signed long long)__c & __b) | + (~(__vector signed long long)__c & __a)); } -static inline __ATTRS_o_ai vector bool long long -vec_sel(vector bool long long __a, vector bool long long __b, - vector unsigned long long __c) { - return (((vector bool long long)__c & __b) | - (~(vector bool long long)__c & __a)); +static inline __ATTRS_o_ai __vector __bool long long +vec_sel(__vector __bool long long __a, __vector __bool long long __b, + __vector unsigned long long __c) { + return (((__vector __bool long long)__c & __b) | + (~(__vector __bool long long)__c & __a)); } -static inline __ATTRS_o_ai vector bool long long -vec_sel(vector bool long long __a, vector bool long long __b, - vector bool long long __c) { +static inline __ATTRS_o_ai __vector __bool long long +vec_sel(__vector __bool long long __a, __vector __bool long long __b, + __vector __bool long long __c) { return (__c & __b) | (~__c & __a); } -static inline __ATTRS_o_ai vector unsigned long long -vec_sel(vector unsigned long long __a, vector unsigned long long __b, - vector unsigned long long __c) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_sel(__vector unsigned long long __a, __vector unsigned long long __b, + __vector unsigned long long __c) { return (__c & __b) | (~__c & __a); } -static inline __ATTRS_o_ai vector unsigned long long -vec_sel(vector unsigned long long __a, vector unsigned long long __b, - vector bool long long __c) { - return (((vector unsigned long long)__c & __b) | - (~(vector unsigned long long)__c & __a)); +static inline __ATTRS_o_ai __vector unsigned long long +vec_sel(__vector unsigned long long __a, __vector unsigned long long __b, + __vector __bool long long __c) { + return (((__vector unsigned long long)__c & __b) | + (~(__vector unsigned long long)__c & __a)); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_sel(vector float __a, vector float __b, vector unsigned int __c) { - return (vector float)((__c & (vector unsigned int)__b) | - (~__c & (vector unsigned int)__a)); +static inline __ATTRS_o_ai __vector float +vec_sel(__vector float __a, __vector float __b, __vector unsigned int __c) { + return (__vector float)((__c & (__vector unsigned int)__b) | + (~__c & (__vector unsigned int)__a)); } -static inline __ATTRS_o_ai vector float -vec_sel(vector float __a, vector float __b, vector bool int __c) { - vector unsigned int __ac = (vector unsigned int)__a; - vector unsigned int __bc = (vector unsigned int)__b; - vector unsigned int __cc = (vector unsigned int)__c; - return (vector float)((__cc & __bc) | (~__cc & __ac)); +static inline __ATTRS_o_ai __vector float +vec_sel(__vector float __a, __vector float __b, __vector __bool int __c) { + __vector unsigned int __ac = (__vector unsigned int)__a; + __vector unsigned int __bc = (__vector unsigned int)__b; + __vector unsigned int __cc = (__vector unsigned int)__c; + return (__vector float)((__cc & __bc) | (~__cc & __ac)); } #endif -static inline __ATTRS_o_ai vector double -vec_sel(vector double __a, vector double __b, vector unsigned long long __c) { - return (vector double)((__c & (vector unsigned long long)__b) | - (~__c & (vector unsigned long long)__a)); +static inline __ATTRS_o_ai __vector double +vec_sel(__vector double __a, __vector double __b, + __vector unsigned long long __c) { + return (__vector double)((__c & (__vector unsigned long long)__b) | + (~__c & (__vector unsigned long long)__a)); } -static inline __ATTRS_o_ai vector double -vec_sel(vector double __a, vector double __b, vector bool long long __c) { - vector unsigned long long __ac = (vector unsigned long long)__a; - vector unsigned long long __bc = (vector unsigned long long)__b; - vector unsigned long long __cc = (vector unsigned long long)__c; - return (vector double)((__cc & __bc) | (~__cc & __ac)); +static inline __ATTRS_o_ai __vector double +vec_sel(__vector double __a, __vector double __b, + __vector __bool long long __c) { + __vector unsigned long long __ac = (__vector unsigned long long)__a; + __vector unsigned long long __bc = (__vector unsigned long long)__b; + __vector unsigned long long __cc = (__vector unsigned long long)__c; + return (__vector double)((__cc & __bc) | (~__cc & __ac)); } /*-- vec_gather_element -----------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed int -vec_gather_element(vector signed int __vec, vector unsigned int __offset, +static inline __ATTRS_o_ai __vector signed int +vec_gather_element(__vector signed int __vec, + __vector unsigned int __offset, const signed int *__ptr, int __index) __constant_range(__index, 0, 3) { __vec[__index] = *(const signed int *)( - (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + (const char *)__ptr + __offset[__index]); return __vec; } -static inline __ATTRS_o_ai vector bool int -vec_gather_element(vector bool int __vec, vector unsigned int __offset, +static inline __ATTRS_o_ai __vector __bool int +vec_gather_element(__vector __bool int __vec, + __vector unsigned int __offset, const unsigned int *__ptr, int __index) __constant_range(__index, 0, 3) { __vec[__index] = *(const unsigned int *)( - (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + (const char *)__ptr + __offset[__index]); return __vec; } -static inline __ATTRS_o_ai vector unsigned int -vec_gather_element(vector unsigned int __vec, vector unsigned int __offset, +static inline __ATTRS_o_ai __vector unsigned int +vec_gather_element(__vector unsigned int __vec, + __vector unsigned int __offset, const unsigned int *__ptr, int __index) __constant_range(__index, 0, 3) { __vec[__index] = *(const unsigned int *)( - (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + (const char *)__ptr + __offset[__index]); return __vec; } -static inline __ATTRS_o_ai vector signed long long -vec_gather_element(vector signed long long __vec, - vector unsigned long long __offset, +static inline __ATTRS_o_ai __vector signed long long +vec_gather_element(__vector signed long long __vec, + __vector unsigned long long __offset, const signed long long *__ptr, int __index) __constant_range(__index, 0, 1) { __vec[__index] = *(const signed long long *)( - (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + (const char *)__ptr + __offset[__index]); return __vec; } -static inline __ATTRS_o_ai vector bool long long -vec_gather_element(vector bool long long __vec, - vector unsigned long long __offset, +static inline __ATTRS_o_ai __vector __bool long long +vec_gather_element(__vector __bool long long __vec, + __vector unsigned long long __offset, const unsigned long long *__ptr, int __index) __constant_range(__index, 0, 1) { __vec[__index] = *(const unsigned long long *)( - (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + (const char *)__ptr + __offset[__index]); return __vec; } -static inline __ATTRS_o_ai vector unsigned long long -vec_gather_element(vector unsigned long long __vec, - vector unsigned long long __offset, +static inline __ATTRS_o_ai __vector unsigned long long +vec_gather_element(__vector unsigned long long __vec, + __vector unsigned long long __offset, const unsigned long long *__ptr, int __index) __constant_range(__index, 0, 1) { __vec[__index] = *(const unsigned long long *)( - (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + (const char *)__ptr + __offset[__index]); return __vec; } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_gather_element(vector float __vec, vector unsigned int __offset, +static inline __ATTRS_o_ai __vector float +vec_gather_element(__vector float __vec, + __vector unsigned int __offset, const float *__ptr, int __index) __constant_range(__index, 0, 3) { __vec[__index] = *(const float *)( - (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + (const char *)__ptr + __offset[__index]); return __vec; } #endif -static inline __ATTRS_o_ai vector double -vec_gather_element(vector double __vec, vector unsigned long long __offset, +static inline __ATTRS_o_ai __vector double +vec_gather_element(__vector double __vec, + __vector unsigned long long __offset, const double *__ptr, int __index) __constant_range(__index, 0, 1) { __vec[__index] = *(const double *)( - (__INTPTR_TYPE__)__ptr + (__INTPTR_TYPE__)__offset[__index]); + (const char *)__ptr + __offset[__index]); return __vec; } /*-- vec_scatter_element ----------------------------------------------------*/ static inline __ATTRS_o_ai void -vec_scatter_element(vector signed int __vec, vector unsigned int __offset, +vec_scatter_element(__vector signed int __vec, + __vector unsigned int __offset, signed int *__ptr, int __index) __constant_range(__index, 0, 3) { - *(signed int *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + *(signed int *)((char *)__ptr + __offset[__index]) = __vec[__index]; } static inline __ATTRS_o_ai void -vec_scatter_element(vector bool int __vec, vector unsigned int __offset, +vec_scatter_element(__vector __bool int __vec, + __vector unsigned int __offset, unsigned int *__ptr, int __index) __constant_range(__index, 0, 3) { - *(unsigned int *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + *(unsigned int *)((char *)__ptr + __offset[__index]) = __vec[__index]; } static inline __ATTRS_o_ai void -vec_scatter_element(vector unsigned int __vec, vector unsigned int __offset, +vec_scatter_element(__vector unsigned int __vec, + __vector unsigned int __offset, unsigned int *__ptr, int __index) __constant_range(__index, 0, 3) { - *(unsigned int *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + *(unsigned int *)((char *)__ptr + __offset[__index]) = __vec[__index]; } static inline __ATTRS_o_ai void -vec_scatter_element(vector signed long long __vec, - vector unsigned long long __offset, +vec_scatter_element(__vector signed long long __vec, + __vector unsigned long long __offset, signed long long *__ptr, int __index) __constant_range(__index, 0, 1) { - *(signed long long *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + *(signed long long *)((char *)__ptr + __offset[__index]) = __vec[__index]; } static inline __ATTRS_o_ai void -vec_scatter_element(vector bool long long __vec, - vector unsigned long long __offset, +vec_scatter_element(__vector __bool long long __vec, + __vector unsigned long long __offset, unsigned long long *__ptr, int __index) __constant_range(__index, 0, 1) { - *(unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + *(unsigned long long *)((char *)__ptr + __offset[__index]) = __vec[__index]; } static inline __ATTRS_o_ai void -vec_scatter_element(vector unsigned long long __vec, - vector unsigned long long __offset, +vec_scatter_element(__vector unsigned long long __vec, + __vector unsigned long long __offset, unsigned long long *__ptr, int __index) __constant_range(__index, 0, 1) { - *(unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + *(unsigned long long *)((char *)__ptr + __offset[__index]) = __vec[__index]; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai void -vec_scatter_element(vector float __vec, vector unsigned int __offset, +vec_scatter_element(__vector float __vec, + __vector unsigned int __offset, float *__ptr, int __index) __constant_range(__index, 0, 3) { - *(float *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + *(float *)((char *)__ptr + __offset[__index]) = __vec[__index]; } #endif static inline __ATTRS_o_ai void -vec_scatter_element(vector double __vec, vector unsigned long long __offset, +vec_scatter_element(__vector double __vec, + __vector unsigned long long __offset, double *__ptr, int __index) __constant_range(__index, 0, 1) { - *(double *)((__INTPTR_TYPE__)__ptr + __offset[__index]) = + *(double *)((char *)__ptr + __offset[__index]) = __vec[__index]; } /*-- vec_xl -----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char +static inline __ATTRS_o_ai __vector signed char vec_xl(long __offset, const signed char *__ptr) { - return *(const vector signed char *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector signed char *) + ((const char *)__ptr + __offset); } -static inline __ATTRS_o_ai vector unsigned char +static inline __ATTRS_o_ai __vector unsigned char vec_xl(long __offset, const unsigned char *__ptr) { - return *(const vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector unsigned char *) + ((const char *)__ptr + __offset); } -static inline __ATTRS_o_ai vector signed short +static inline __ATTRS_o_ai __vector signed short vec_xl(long __offset, const signed short *__ptr) { - return *(const vector signed short *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector signed short *) + ((const char *)__ptr + __offset); } -static inline __ATTRS_o_ai vector unsigned short +static inline __ATTRS_o_ai __vector unsigned short vec_xl(long __offset, const unsigned short *__ptr) { - return *(const vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector unsigned short *) + ((const char *)__ptr + __offset); } -static inline __ATTRS_o_ai vector signed int +static inline __ATTRS_o_ai __vector signed int vec_xl(long __offset, const signed int *__ptr) { - return *(const vector signed int *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector signed int *) + ((const char *)__ptr + __offset); } -static inline __ATTRS_o_ai vector unsigned int +static inline __ATTRS_o_ai __vector unsigned int vec_xl(long __offset, const unsigned int *__ptr) { - return *(const vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector unsigned int *) + ((const char *)__ptr + __offset); } -static inline __ATTRS_o_ai vector signed long long +static inline __ATTRS_o_ai __vector signed long long vec_xl(long __offset, const signed long long *__ptr) { - return *(const vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector signed long long *) + ((const char *)__ptr + __offset); } -static inline __ATTRS_o_ai vector unsigned long long +static inline __ATTRS_o_ai __vector unsigned long long vec_xl(long __offset, const unsigned long long *__ptr) { - return *(const vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector unsigned long long *) + ((const char *)__ptr + __offset); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float +static inline __ATTRS_o_ai __vector float vec_xl(long __offset, const float *__ptr) { - return *(const vector float *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector float *) + ((const char *)__ptr + __offset); } #endif -static inline __ATTRS_o_ai vector double +static inline __ATTRS_o_ai __vector double vec_xl(long __offset, const double *__ptr) { - return *(const vector double *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector double *) + ((const char *)__ptr + __offset); } /*-- vec_xld2 ---------------------------------------------------------------*/ // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char +static inline __ATTRS_o_ai __vector signed char vec_xld2(long __offset, const signed char *__ptr) { - return *(const vector signed char *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector signed char *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char +static inline __ATTRS_o_ai __vector unsigned char vec_xld2(long __offset, const unsigned char *__ptr) { - return *(const vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector unsigned char *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short +static inline __ATTRS_o_ai __vector signed short vec_xld2(long __offset, const signed short *__ptr) { - return *(const vector signed short *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector signed short *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short +static inline __ATTRS_o_ai __vector unsigned short vec_xld2(long __offset, const unsigned short *__ptr) { - return *(const vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector unsigned short *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int +static inline __ATTRS_o_ai __vector signed int vec_xld2(long __offset, const signed int *__ptr) { - return *(const vector signed int *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector signed int *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int +static inline __ATTRS_o_ai __vector unsigned int vec_xld2(long __offset, const unsigned int *__ptr) { - return *(const vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector unsigned int *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long +static inline __ATTRS_o_ai __vector signed long long vec_xld2(long __offset, const signed long long *__ptr) { - return *(const vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector signed long long *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long +static inline __ATTRS_o_ai __vector unsigned long long vec_xld2(long __offset, const unsigned long long *__ptr) { - return *(const vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector unsigned long long *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector double +static inline __ATTRS_o_ai __vector double vec_xld2(long __offset, const double *__ptr) { - return *(const vector double *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector double *) + ((const char *)__ptr + __offset); } /*-- vec_xlw4 ---------------------------------------------------------------*/ // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char +static inline __ATTRS_o_ai __vector signed char vec_xlw4(long __offset, const signed char *__ptr) { - return *(const vector signed char *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector signed char *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char +static inline __ATTRS_o_ai __vector unsigned char vec_xlw4(long __offset, const unsigned char *__ptr) { - return *(const vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector unsigned char *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short +static inline __ATTRS_o_ai __vector signed short vec_xlw4(long __offset, const signed short *__ptr) { - return *(const vector signed short *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector signed short *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short +static inline __ATTRS_o_ai __vector unsigned short vec_xlw4(long __offset, const unsigned short *__ptr) { - return *(const vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector unsigned short *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int +static inline __ATTRS_o_ai __vector signed int vec_xlw4(long __offset, const signed int *__ptr) { - return *(const vector signed int *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector signed int *) + ((const char *)__ptr + __offset); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int +static inline __ATTRS_o_ai __vector unsigned int vec_xlw4(long __offset, const unsigned int *__ptr) { - return *(const vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset); + return *(const __vector unsigned int *) + ((const char *)__ptr + __offset); } /*-- vec_xst ----------------------------------------------------------------*/ static inline __ATTRS_o_ai void -vec_xst(vector signed char __vec, long __offset, signed char *__ptr) { - *(vector signed char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xst(__vector signed char __vec, long __offset, signed char *__ptr) { + *(__vector signed char *)((char *)__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void -vec_xst(vector unsigned char __vec, long __offset, unsigned char *__ptr) { - *(vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xst(__vector unsigned char __vec, long __offset, unsigned char *__ptr) { + *(__vector unsigned char *)((char *)__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void -vec_xst(vector signed short __vec, long __offset, signed short *__ptr) { - *(vector signed short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xst(__vector signed short __vec, long __offset, signed short *__ptr) { + *(__vector signed short *)((char *)__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void -vec_xst(vector unsigned short __vec, long __offset, unsigned short *__ptr) { - *(vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xst(__vector unsigned short __vec, long __offset, unsigned short *__ptr) { + *(__vector unsigned short *)((char *)__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void -vec_xst(vector signed int __vec, long __offset, signed int *__ptr) { - *(vector signed int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xst(__vector signed int __vec, long __offset, signed int *__ptr) { + *(__vector signed int *)((char *)__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void -vec_xst(vector unsigned int __vec, long __offset, unsigned int *__ptr) { - *(vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xst(__vector unsigned int __vec, long __offset, unsigned int *__ptr) { + *(__vector unsigned int *)((char *)__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void -vec_xst(vector signed long long __vec, long __offset, +vec_xst(__vector signed long long __vec, long __offset, signed long long *__ptr) { - *(vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; + *(__vector signed long long *)((char *)__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void -vec_xst(vector unsigned long long __vec, long __offset, +vec_xst(__vector unsigned long long __vec, long __offset, unsigned long long *__ptr) { - *(vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset) = - __vec; + *(__vector unsigned long long *)((char *)__ptr + __offset) = __vec; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai void -vec_xst(vector float __vec, long __offset, float *__ptr) { - *(vector float *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xst(__vector float __vec, long __offset, float *__ptr) { + *(__vector float *)((char *)__ptr + __offset) = __vec; } #endif static inline __ATTRS_o_ai void -vec_xst(vector double __vec, long __offset, double *__ptr) { - *(vector double *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xst(__vector double __vec, long __offset, double *__ptr) { + *(__vector double *)((char *)__ptr + __offset) = __vec; } /*-- vec_xstd2 --------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstd2(vector signed char __vec, long __offset, signed char *__ptr) { - *(vector signed char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstd2(__vector signed char __vec, long __offset, signed char *__ptr) { + *(__vector signed char *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstd2(vector unsigned char __vec, long __offset, unsigned char *__ptr) { - *(vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstd2(__vector unsigned char __vec, long __offset, unsigned char *__ptr) { + *(__vector unsigned char *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstd2(vector signed short __vec, long __offset, signed short *__ptr) { - *(vector signed short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstd2(__vector signed short __vec, long __offset, signed short *__ptr) { + *(__vector signed short *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstd2(vector unsigned short __vec, long __offset, unsigned short *__ptr) { - *(vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstd2(__vector unsigned short __vec, long __offset, unsigned short *__ptr) { + *(__vector unsigned short *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstd2(vector signed int __vec, long __offset, signed int *__ptr) { - *(vector signed int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstd2(__vector signed int __vec, long __offset, signed int *__ptr) { + *(__vector signed int *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstd2(vector unsigned int __vec, long __offset, unsigned int *__ptr) { - *(vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstd2(__vector unsigned int __vec, long __offset, unsigned int *__ptr) { + *(__vector unsigned int *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstd2(vector signed long long __vec, long __offset, +vec_xstd2(__vector signed long long __vec, long __offset, signed long long *__ptr) { - *(vector signed long long *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; + *(__vector signed long long *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstd2(vector unsigned long long __vec, long __offset, +vec_xstd2(__vector unsigned long long __vec, long __offset, unsigned long long *__ptr) { - *(vector unsigned long long *)((__INTPTR_TYPE__)__ptr + __offset) = - __vec; + *(__vector unsigned long long *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstd2(vector double __vec, long __offset, double *__ptr) { - *(vector double *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstd2(__vector double __vec, long __offset, double *__ptr) { + *(__vector double *)((char *)__ptr + __offset) = __vec; } /*-- vec_xstw4 --------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstw4(vector signed char __vec, long __offset, signed char *__ptr) { - *(vector signed char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstw4(__vector signed char __vec, long __offset, signed char *__ptr) { + *(__vector signed char *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstw4(vector unsigned char __vec, long __offset, unsigned char *__ptr) { - *(vector unsigned char *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstw4(__vector unsigned char __vec, long __offset, unsigned char *__ptr) { + *(__vector unsigned char *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstw4(vector signed short __vec, long __offset, signed short *__ptr) { - *(vector signed short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstw4(__vector signed short __vec, long __offset, signed short *__ptr) { + *(__vector signed short *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstw4(vector unsigned short __vec, long __offset, unsigned short *__ptr) { - *(vector unsigned short *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstw4(__vector unsigned short __vec, long __offset, unsigned short *__ptr) { + *(__vector unsigned short *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstw4(vector signed int __vec, long __offset, signed int *__ptr) { - *(vector signed int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstw4(__vector signed int __vec, long __offset, signed int *__ptr) { + *(__vector signed int *)((char *)__ptr + __offset) = __vec; } // This prototype is deprecated. static inline __ATTRS_o_ai void -vec_xstw4(vector unsigned int __vec, long __offset, unsigned int *__ptr) { - *(vector unsigned int *)((__INTPTR_TYPE__)__ptr + __offset) = __vec; +vec_xstw4(__vector unsigned int __vec, long __offset, unsigned int *__ptr) { + *(__vector unsigned int *)((char *)__ptr + __offset) = __vec; } /*-- vec_load_bndry ---------------------------------------------------------*/ -extern __ATTRS_o vector signed char +extern __ATTRS_o __vector signed char vec_load_bndry(const signed char *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); -extern __ATTRS_o vector unsigned char +extern __ATTRS_o __vector unsigned char vec_load_bndry(const unsigned char *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); -extern __ATTRS_o vector signed short +extern __ATTRS_o __vector signed short vec_load_bndry(const signed short *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); -extern __ATTRS_o vector unsigned short +extern __ATTRS_o __vector unsigned short vec_load_bndry(const unsigned short *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); -extern __ATTRS_o vector signed int +extern __ATTRS_o __vector signed int vec_load_bndry(const signed int *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); -extern __ATTRS_o vector unsigned int +extern __ATTRS_o __vector unsigned int vec_load_bndry(const unsigned int *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); -extern __ATTRS_o vector signed long long +extern __ATTRS_o __vector signed long long vec_load_bndry(const signed long long *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); -extern __ATTRS_o vector unsigned long long +extern __ATTRS_o __vector unsigned long long vec_load_bndry(const unsigned long long *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); #if __ARCH__ >= 12 -extern __ATTRS_o vector float +extern __ATTRS_o __vector float vec_load_bndry(const float *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); #endif -extern __ATTRS_o vector double +extern __ATTRS_o __vector double vec_load_bndry(const double *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); @@ -1332,159 +1392,159 @@ vec_load_bndry(const double *__ptr, unsigned short __len) /*-- vec_load_len -----------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char +static inline __ATTRS_o_ai __vector signed char vec_load_len(const signed char *__ptr, unsigned int __len) { - return (vector signed char)__builtin_s390_vll(__len, __ptr); + return (__vector signed char)__builtin_s390_vll(__len, __ptr); } -static inline __ATTRS_o_ai vector unsigned char +static inline __ATTRS_o_ai __vector unsigned char vec_load_len(const unsigned char *__ptr, unsigned int __len) { - return (vector unsigned char)__builtin_s390_vll(__len, __ptr); + return (__vector unsigned char)__builtin_s390_vll(__len, __ptr); } -static inline __ATTRS_o_ai vector signed short +static inline __ATTRS_o_ai __vector signed short vec_load_len(const signed short *__ptr, unsigned int __len) { - return (vector signed short)__builtin_s390_vll(__len, __ptr); + return (__vector signed short)__builtin_s390_vll(__len, __ptr); } -static inline __ATTRS_o_ai vector unsigned short +static inline __ATTRS_o_ai __vector unsigned short vec_load_len(const unsigned short *__ptr, unsigned int __len) { - return (vector unsigned short)__builtin_s390_vll(__len, __ptr); + return (__vector unsigned short)__builtin_s390_vll(__len, __ptr); } -static inline __ATTRS_o_ai vector signed int +static inline __ATTRS_o_ai __vector signed int vec_load_len(const signed int *__ptr, unsigned int __len) { - return (vector signed int)__builtin_s390_vll(__len, __ptr); + return (__vector signed int)__builtin_s390_vll(__len, __ptr); } -static inline __ATTRS_o_ai vector unsigned int +static inline __ATTRS_o_ai __vector unsigned int vec_load_len(const unsigned int *__ptr, unsigned int __len) { - return (vector unsigned int)__builtin_s390_vll(__len, __ptr); + return (__vector unsigned int)__builtin_s390_vll(__len, __ptr); } -static inline __ATTRS_o_ai vector signed long long +static inline __ATTRS_o_ai __vector signed long long vec_load_len(const signed long long *__ptr, unsigned int __len) { - return (vector signed long long)__builtin_s390_vll(__len, __ptr); + return (__vector signed long long)__builtin_s390_vll(__len, __ptr); } -static inline __ATTRS_o_ai vector unsigned long long +static inline __ATTRS_o_ai __vector unsigned long long vec_load_len(const unsigned long long *__ptr, unsigned int __len) { - return (vector unsigned long long)__builtin_s390_vll(__len, __ptr); + return (__vector unsigned long long)__builtin_s390_vll(__len, __ptr); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float +static inline __ATTRS_o_ai __vector float vec_load_len(const float *__ptr, unsigned int __len) { - return (vector float)__builtin_s390_vll(__len, __ptr); + return (__vector float)__builtin_s390_vll(__len, __ptr); } #endif -static inline __ATTRS_o_ai vector double +static inline __ATTRS_o_ai __vector double vec_load_len(const double *__ptr, unsigned int __len) { - return (vector double)__builtin_s390_vll(__len, __ptr); + return (__vector double)__builtin_s390_vll(__len, __ptr); } /*-- vec_load_len_r ---------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_ai vector unsigned char +static inline __ATTRS_ai __vector unsigned char vec_load_len_r(const unsigned char *__ptr, unsigned int __len) { - return (vector unsigned char)__builtin_s390_vlrl(__len, __ptr); + return (__vector unsigned char)__builtin_s390_vlrl(__len, __ptr); } #endif /*-- vec_store_len ----------------------------------------------------------*/ static inline __ATTRS_o_ai void -vec_store_len(vector signed char __vec, signed char *__ptr, +vec_store_len(__vector signed char __vec, signed char *__ptr, unsigned int __len) { - __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); + __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void -vec_store_len(vector unsigned char __vec, unsigned char *__ptr, +vec_store_len(__vector unsigned char __vec, unsigned char *__ptr, unsigned int __len) { - __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); + __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void -vec_store_len(vector signed short __vec, signed short *__ptr, +vec_store_len(__vector signed short __vec, signed short *__ptr, unsigned int __len) { - __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); + __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void -vec_store_len(vector unsigned short __vec, unsigned short *__ptr, +vec_store_len(__vector unsigned short __vec, unsigned short *__ptr, unsigned int __len) { - __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); + __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void -vec_store_len(vector signed int __vec, signed int *__ptr, +vec_store_len(__vector signed int __vec, signed int *__ptr, unsigned int __len) { - __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); + __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void -vec_store_len(vector unsigned int __vec, unsigned int *__ptr, +vec_store_len(__vector unsigned int __vec, unsigned int *__ptr, unsigned int __len) { - __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); + __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void -vec_store_len(vector signed long long __vec, signed long long *__ptr, +vec_store_len(__vector signed long long __vec, signed long long *__ptr, unsigned int __len) { - __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); + __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } static inline __ATTRS_o_ai void -vec_store_len(vector unsigned long long __vec, unsigned long long *__ptr, +vec_store_len(__vector unsigned long long __vec, unsigned long long *__ptr, unsigned int __len) { - __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); + __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai void -vec_store_len(vector float __vec, float *__ptr, +vec_store_len(__vector float __vec, float *__ptr, unsigned int __len) { - __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); + __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } #endif static inline __ATTRS_o_ai void -vec_store_len(vector double __vec, double *__ptr, +vec_store_len(__vector double __vec, double *__ptr, unsigned int __len) { - __builtin_s390_vstl((vector signed char)__vec, __len, __ptr); + __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } /*-- vec_store_len_r --------------------------------------------------------*/ #if __ARCH__ >= 12 static inline __ATTRS_ai void -vec_store_len_r(vector unsigned char __vec, unsigned char *__ptr, +vec_store_len_r(__vector unsigned char __vec, unsigned char *__ptr, unsigned int __len) { - __builtin_s390_vstrl((vector signed char)__vec, __len, __ptr); + __builtin_s390_vstrl((__vector signed char)__vec, __len, __ptr); } #endif /*-- vec_load_pair ----------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed long long +static inline __ATTRS_o_ai __vector signed long long vec_load_pair(signed long long __a, signed long long __b) { - return (vector signed long long)(__a, __b); + return (__vector signed long long)(__a, __b); } -static inline __ATTRS_o_ai vector unsigned long long +static inline __ATTRS_o_ai __vector unsigned long long vec_load_pair(unsigned long long __a, unsigned long long __b) { - return (vector unsigned long long)(__a, __b); + return (__vector unsigned long long)(__a, __b); } /*-- vec_genmask ------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char +static inline __ATTRS_o_ai __vector unsigned char vec_genmask(unsigned short __mask) __constant(__mask) { - return (vector unsigned char)( + return (__vector unsigned char)( __mask & 0x8000 ? 0xff : 0, __mask & 0x4000 ? 0xff : 0, __mask & 0x2000 ? 0xff : 0, @@ -1505,7 +1565,7 @@ vec_genmask(unsigned short __mask) /*-- vec_genmasks_* ---------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char +static inline __ATTRS_o_ai __vector unsigned char vec_genmasks_8(unsigned char __first, unsigned char __last) __constant(__first) __constant(__last) { unsigned char __bit1 = __first & 7; @@ -1515,10 +1575,10 @@ vec_genmasks_8(unsigned char __first, unsigned char __last) unsigned char __value = (__bit1 <= __bit2 ? __mask1 & ~__mask2 : __mask1 | ~__mask2); - return (vector unsigned char)__value; + return (__vector unsigned char)__value; } -static inline __ATTRS_o_ai vector unsigned short +static inline __ATTRS_o_ai __vector unsigned short vec_genmasks_16(unsigned char __first, unsigned char __last) __constant(__first) __constant(__last) { unsigned char __bit1 = __first & 15; @@ -1528,10 +1588,10 @@ vec_genmasks_16(unsigned char __first, unsigned char __last) unsigned short __value = (__bit1 <= __bit2 ? __mask1 & ~__mask2 : __mask1 | ~__mask2); - return (vector unsigned short)__value; + return (__vector unsigned short)__value; } -static inline __ATTRS_o_ai vector unsigned int +static inline __ATTRS_o_ai __vector unsigned int vec_genmasks_32(unsigned char __first, unsigned char __last) __constant(__first) __constant(__last) { unsigned char __bit1 = __first & 31; @@ -1541,10 +1601,10 @@ vec_genmasks_32(unsigned char __first, unsigned char __last) unsigned int __value = (__bit1 <= __bit2 ? __mask1 & ~__mask2 : __mask1 | ~__mask2); - return (vector unsigned int)__value; + return (__vector unsigned int)__value; } -static inline __ATTRS_o_ai vector unsigned long long +static inline __ATTRS_o_ai __vector unsigned long long vec_genmasks_64(unsigned char __first, unsigned char __last) __constant(__first) __constant(__last) { unsigned char __bit1 = __first & 63; @@ -1554,978 +1614,986 @@ vec_genmasks_64(unsigned char __first, unsigned char __last) unsigned long long __value = (__bit1 <= __bit2 ? __mask1 & ~__mask2 : __mask1 | ~__mask2); - return (vector unsigned long long)__value; + return (__vector unsigned long long)__value; } /*-- vec_splat --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_splat(vector signed char __vec, int __index) +static inline __ATTRS_o_ai __vector signed char +vec_splat(__vector signed char __vec, int __index) __constant_range(__index, 0, 15) { - return (vector signed char)__vec[__index]; + return (__vector signed char)__vec[__index]; } -static inline __ATTRS_o_ai vector bool char -vec_splat(vector bool char __vec, int __index) +static inline __ATTRS_o_ai __vector __bool char +vec_splat(__vector __bool char __vec, int __index) __constant_range(__index, 0, 15) { - return (vector bool char)(vector unsigned char)__vec[__index]; + return (__vector __bool char)(__vector unsigned char)__vec[__index]; } -static inline __ATTRS_o_ai vector unsigned char -vec_splat(vector unsigned char __vec, int __index) +static inline __ATTRS_o_ai __vector unsigned char +vec_splat(__vector unsigned char __vec, int __index) __constant_range(__index, 0, 15) { - return (vector unsigned char)__vec[__index]; + return (__vector unsigned char)__vec[__index]; } -static inline __ATTRS_o_ai vector signed short -vec_splat(vector signed short __vec, int __index) +static inline __ATTRS_o_ai __vector signed short +vec_splat(__vector signed short __vec, int __index) __constant_range(__index, 0, 7) { - return (vector signed short)__vec[__index]; + return (__vector signed short)__vec[__index]; } -static inline __ATTRS_o_ai vector bool short -vec_splat(vector bool short __vec, int __index) +static inline __ATTRS_o_ai __vector __bool short +vec_splat(__vector __bool short __vec, int __index) __constant_range(__index, 0, 7) { - return (vector bool short)(vector unsigned short)__vec[__index]; + return (__vector __bool short)(__vector unsigned short)__vec[__index]; } -static inline __ATTRS_o_ai vector unsigned short -vec_splat(vector unsigned short __vec, int __index) +static inline __ATTRS_o_ai __vector unsigned short +vec_splat(__vector unsigned short __vec, int __index) __constant_range(__index, 0, 7) { - return (vector unsigned short)__vec[__index]; + return (__vector unsigned short)__vec[__index]; } -static inline __ATTRS_o_ai vector signed int -vec_splat(vector signed int __vec, int __index) +static inline __ATTRS_o_ai __vector signed int +vec_splat(__vector signed int __vec, int __index) __constant_range(__index, 0, 3) { - return (vector signed int)__vec[__index]; + return (__vector signed int)__vec[__index]; } -static inline __ATTRS_o_ai vector bool int -vec_splat(vector bool int __vec, int __index) +static inline __ATTRS_o_ai __vector __bool int +vec_splat(__vector __bool int __vec, int __index) __constant_range(__index, 0, 3) { - return (vector bool int)(vector unsigned int)__vec[__index]; + return (__vector __bool int)(__vector unsigned int)__vec[__index]; } -static inline __ATTRS_o_ai vector unsigned int -vec_splat(vector unsigned int __vec, int __index) +static inline __ATTRS_o_ai __vector unsigned int +vec_splat(__vector unsigned int __vec, int __index) __constant_range(__index, 0, 3) { - return (vector unsigned int)__vec[__index]; + return (__vector unsigned int)__vec[__index]; } -static inline __ATTRS_o_ai vector signed long long -vec_splat(vector signed long long __vec, int __index) +static inline __ATTRS_o_ai __vector signed long long +vec_splat(__vector signed long long __vec, int __index) __constant_range(__index, 0, 1) { - return (vector signed long long)__vec[__index]; + return (__vector signed long long)__vec[__index]; } -static inline __ATTRS_o_ai vector bool long long -vec_splat(vector bool long long __vec, int __index) +static inline __ATTRS_o_ai __vector __bool long long +vec_splat(__vector __bool long long __vec, int __index) __constant_range(__index, 0, 1) { - return (vector bool long long)(vector unsigned long long)__vec[__index]; + return ((__vector __bool long long) + (__vector unsigned long long)__vec[__index]); } -static inline __ATTRS_o_ai vector unsigned long long -vec_splat(vector unsigned long long __vec, int __index) +static inline __ATTRS_o_ai __vector unsigned long long +vec_splat(__vector unsigned long long __vec, int __index) __constant_range(__index, 0, 1) { - return (vector unsigned long long)__vec[__index]; + return (__vector unsigned long long)__vec[__index]; } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_splat(vector float __vec, int __index) +static inline __ATTRS_o_ai __vector float +vec_splat(__vector float __vec, int __index) __constant_range(__index, 0, 3) { - return (vector float)__vec[__index]; + return (__vector float)__vec[__index]; } #endif -static inline __ATTRS_o_ai vector double -vec_splat(vector double __vec, int __index) +static inline __ATTRS_o_ai __vector double +vec_splat(__vector double __vec, int __index) __constant_range(__index, 0, 1) { - return (vector double)__vec[__index]; + return (__vector double)__vec[__index]; } /*-- vec_splat_s* -----------------------------------------------------------*/ -static inline __ATTRS_ai vector signed char +static inline __ATTRS_ai __vector signed char vec_splat_s8(signed char __scalar) __constant(__scalar) { - return (vector signed char)__scalar; + return (__vector signed char)__scalar; } -static inline __ATTRS_ai vector signed short +static inline __ATTRS_ai __vector signed short vec_splat_s16(signed short __scalar) __constant(__scalar) { - return (vector signed short)__scalar; + return (__vector signed short)__scalar; } -static inline __ATTRS_ai vector signed int +static inline __ATTRS_ai __vector signed int vec_splat_s32(signed short __scalar) __constant(__scalar) { - return (vector signed int)(signed int)__scalar; + return (__vector signed int)(signed int)__scalar; } -static inline __ATTRS_ai vector signed long long +static inline __ATTRS_ai __vector signed long long vec_splat_s64(signed short __scalar) __constant(__scalar) { - return (vector signed long long)(signed long)__scalar; + return (__vector signed long long)(signed long)__scalar; } /*-- vec_splat_u* -----------------------------------------------------------*/ -static inline __ATTRS_ai vector unsigned char +static inline __ATTRS_ai __vector unsigned char vec_splat_u8(unsigned char __scalar) __constant(__scalar) { - return (vector unsigned char)__scalar; + return (__vector unsigned char)__scalar; } -static inline __ATTRS_ai vector unsigned short +static inline __ATTRS_ai __vector unsigned short vec_splat_u16(unsigned short __scalar) __constant(__scalar) { - return (vector unsigned short)__scalar; + return (__vector unsigned short)__scalar; } -static inline __ATTRS_ai vector unsigned int +static inline __ATTRS_ai __vector unsigned int vec_splat_u32(signed short __scalar) __constant(__scalar) { - return (vector unsigned int)(signed int)__scalar; + return (__vector unsigned int)(signed int)__scalar; } -static inline __ATTRS_ai vector unsigned long long +static inline __ATTRS_ai __vector unsigned long long vec_splat_u64(signed short __scalar) __constant(__scalar) { - return (vector unsigned long long)(signed long long)__scalar; + return (__vector unsigned long long)(signed long long)__scalar; } /*-- vec_splats -------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char +static inline __ATTRS_o_ai __vector signed char vec_splats(signed char __scalar) { - return (vector signed char)__scalar; + return (__vector signed char)__scalar; } -static inline __ATTRS_o_ai vector unsigned char +static inline __ATTRS_o_ai __vector unsigned char vec_splats(unsigned char __scalar) { - return (vector unsigned char)__scalar; + return (__vector unsigned char)__scalar; } -static inline __ATTRS_o_ai vector signed short +static inline __ATTRS_o_ai __vector signed short vec_splats(signed short __scalar) { - return (vector signed short)__scalar; + return (__vector signed short)__scalar; } -static inline __ATTRS_o_ai vector unsigned short +static inline __ATTRS_o_ai __vector unsigned short vec_splats(unsigned short __scalar) { - return (vector unsigned short)__scalar; + return (__vector unsigned short)__scalar; } -static inline __ATTRS_o_ai vector signed int +static inline __ATTRS_o_ai __vector signed int vec_splats(signed int __scalar) { - return (vector signed int)__scalar; + return (__vector signed int)__scalar; } -static inline __ATTRS_o_ai vector unsigned int +static inline __ATTRS_o_ai __vector unsigned int vec_splats(unsigned int __scalar) { - return (vector unsigned int)__scalar; + return (__vector unsigned int)__scalar; } -static inline __ATTRS_o_ai vector signed long long +static inline __ATTRS_o_ai __vector signed long long vec_splats(signed long long __scalar) { - return (vector signed long long)__scalar; + return (__vector signed long long)__scalar; } -static inline __ATTRS_o_ai vector unsigned long long +static inline __ATTRS_o_ai __vector unsigned long long vec_splats(unsigned long long __scalar) { - return (vector unsigned long long)__scalar; + return (__vector unsigned long long)__scalar; } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float +static inline __ATTRS_o_ai __vector float vec_splats(float __scalar) { - return (vector float)__scalar; + return (__vector float)__scalar; } #endif -static inline __ATTRS_o_ai vector double +static inline __ATTRS_o_ai __vector double vec_splats(double __scalar) { - return (vector double)__scalar; + return (__vector double)__scalar; } /*-- vec_extend_s64 ---------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed long long -vec_extend_s64(vector signed char __a) { - return (vector signed long long)(__a[7], __a[15]); +static inline __ATTRS_o_ai __vector signed long long +vec_extend_s64(__vector signed char __a) { + return (__vector signed long long)(__a[7], __a[15]); } -static inline __ATTRS_o_ai vector signed long long -vec_extend_s64(vector signed short __a) { - return (vector signed long long)(__a[3], __a[7]); +static inline __ATTRS_o_ai __vector signed long long +vec_extend_s64(__vector signed short __a) { + return (__vector signed long long)(__a[3], __a[7]); } -static inline __ATTRS_o_ai vector signed long long -vec_extend_s64(vector signed int __a) { - return (vector signed long long)(__a[1], __a[3]); +static inline __ATTRS_o_ai __vector signed long long +vec_extend_s64(__vector signed int __a) { + return (__vector signed long long)(__a[1], __a[3]); } /*-- vec_mergeh -------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_mergeh(vector signed char __a, vector signed char __b) { - return (vector signed char)( +static inline __ATTRS_o_ai __vector signed char +vec_mergeh(__vector signed char __a, __vector signed char __b) { + return (__vector signed char)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3], __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } -static inline __ATTRS_o_ai vector bool char -vec_mergeh(vector bool char __a, vector bool char __b) { - return (vector bool char)( +static inline __ATTRS_o_ai __vector __bool char +vec_mergeh(__vector __bool char __a, __vector __bool char __b) { + return (__vector __bool char)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3], __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } -static inline __ATTRS_o_ai vector unsigned char -vec_mergeh(vector unsigned char __a, vector unsigned char __b) { - return (vector unsigned char)( +static inline __ATTRS_o_ai __vector unsigned char +vec_mergeh(__vector unsigned char __a, __vector unsigned char __b) { + return (__vector unsigned char)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3], __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } -static inline __ATTRS_o_ai vector signed short -vec_mergeh(vector signed short __a, vector signed short __b) { - return (vector signed short)( +static inline __ATTRS_o_ai __vector signed short +vec_mergeh(__vector signed short __a, __vector signed short __b) { + return (__vector signed short)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3]); } -static inline __ATTRS_o_ai vector bool short -vec_mergeh(vector bool short __a, vector bool short __b) { - return (vector bool short)( +static inline __ATTRS_o_ai __vector __bool short +vec_mergeh(__vector __bool short __a, __vector __bool short __b) { + return (__vector __bool short)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3]); } -static inline __ATTRS_o_ai vector unsigned short -vec_mergeh(vector unsigned short __a, vector unsigned short __b) { - return (vector unsigned short)( +static inline __ATTRS_o_ai __vector unsigned short +vec_mergeh(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector unsigned short)( __a[0], __b[0], __a[1], __b[1], __a[2], __b[2], __a[3], __b[3]); } -static inline __ATTRS_o_ai vector signed int -vec_mergeh(vector signed int __a, vector signed int __b) { - return (vector signed int)(__a[0], __b[0], __a[1], __b[1]); +static inline __ATTRS_o_ai __vector signed int +vec_mergeh(__vector signed int __a, __vector signed int __b) { + return (__vector signed int)(__a[0], __b[0], __a[1], __b[1]); } -static inline __ATTRS_o_ai vector bool int -vec_mergeh(vector bool int __a, vector bool int __b) { - return (vector bool int)(__a[0], __b[0], __a[1], __b[1]); +static inline __ATTRS_o_ai __vector __bool int +vec_mergeh(__vector __bool int __a, __vector __bool int __b) { + return (__vector __bool int)(__a[0], __b[0], __a[1], __b[1]); } -static inline __ATTRS_o_ai vector unsigned int -vec_mergeh(vector unsigned int __a, vector unsigned int __b) { - return (vector unsigned int)(__a[0], __b[0], __a[1], __b[1]); +static inline __ATTRS_o_ai __vector unsigned int +vec_mergeh(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector unsigned int)(__a[0], __b[0], __a[1], __b[1]); } -static inline __ATTRS_o_ai vector signed long long -vec_mergeh(vector signed long long __a, vector signed long long __b) { - return (vector signed long long)(__a[0], __b[0]); +static inline __ATTRS_o_ai __vector signed long long +vec_mergeh(__vector signed long long __a, __vector signed long long __b) { + return (__vector signed long long)(__a[0], __b[0]); } -static inline __ATTRS_o_ai vector bool long long -vec_mergeh(vector bool long long __a, vector bool long long __b) { - return (vector bool long long)(__a[0], __b[0]); +static inline __ATTRS_o_ai __vector __bool long long +vec_mergeh(__vector __bool long long __a, __vector __bool long long __b) { + return (__vector __bool long long)(__a[0], __b[0]); } -static inline __ATTRS_o_ai vector unsigned long long -vec_mergeh(vector unsigned long long __a, vector unsigned long long __b) { - return (vector unsigned long long)(__a[0], __b[0]); +static inline __ATTRS_o_ai __vector unsigned long long +vec_mergeh(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector unsigned long long)(__a[0], __b[0]); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_mergeh(vector float __a, vector float __b) { - return (vector float)(__a[0], __b[0], __a[1], __b[1]); +static inline __ATTRS_o_ai __vector float +vec_mergeh(__vector float __a, __vector float __b) { + return (__vector float)(__a[0], __b[0], __a[1], __b[1]); } #endif -static inline __ATTRS_o_ai vector double -vec_mergeh(vector double __a, vector double __b) { - return (vector double)(__a[0], __b[0]); +static inline __ATTRS_o_ai __vector double +vec_mergeh(__vector double __a, __vector double __b) { + return (__vector double)(__a[0], __b[0]); } /*-- vec_mergel -------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_mergel(vector signed char __a, vector signed char __b) { - return (vector signed char)( +static inline __ATTRS_o_ai __vector signed char +vec_mergel(__vector signed char __a, __vector signed char __b) { + return (__vector signed char)( __a[8], __b[8], __a[9], __b[9], __a[10], __b[10], __a[11], __b[11], __a[12], __b[12], __a[13], __b[13], __a[14], __b[14], __a[15], __b[15]); } -static inline __ATTRS_o_ai vector bool char -vec_mergel(vector bool char __a, vector bool char __b) { - return (vector bool char)( +static inline __ATTRS_o_ai __vector __bool char +vec_mergel(__vector __bool char __a, __vector __bool char __b) { + return (__vector __bool char)( __a[8], __b[8], __a[9], __b[9], __a[10], __b[10], __a[11], __b[11], __a[12], __b[12], __a[13], __b[13], __a[14], __b[14], __a[15], __b[15]); } -static inline __ATTRS_o_ai vector unsigned char -vec_mergel(vector unsigned char __a, vector unsigned char __b) { - return (vector unsigned char)( +static inline __ATTRS_o_ai __vector unsigned char +vec_mergel(__vector unsigned char __a, __vector unsigned char __b) { + return (__vector unsigned char)( __a[8], __b[8], __a[9], __b[9], __a[10], __b[10], __a[11], __b[11], __a[12], __b[12], __a[13], __b[13], __a[14], __b[14], __a[15], __b[15]); } -static inline __ATTRS_o_ai vector signed short -vec_mergel(vector signed short __a, vector signed short __b) { - return (vector signed short)( +static inline __ATTRS_o_ai __vector signed short +vec_mergel(__vector signed short __a, __vector signed short __b) { + return (__vector signed short)( __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } -static inline __ATTRS_o_ai vector bool short -vec_mergel(vector bool short __a, vector bool short __b) { - return (vector bool short)( +static inline __ATTRS_o_ai __vector __bool short +vec_mergel(__vector __bool short __a, __vector __bool short __b) { + return (__vector __bool short)( __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } -static inline __ATTRS_o_ai vector unsigned short -vec_mergel(vector unsigned short __a, vector unsigned short __b) { - return (vector unsigned short)( +static inline __ATTRS_o_ai __vector unsigned short +vec_mergel(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector unsigned short)( __a[4], __b[4], __a[5], __b[5], __a[6], __b[6], __a[7], __b[7]); } -static inline __ATTRS_o_ai vector signed int -vec_mergel(vector signed int __a, vector signed int __b) { - return (vector signed int)(__a[2], __b[2], __a[3], __b[3]); +static inline __ATTRS_o_ai __vector signed int +vec_mergel(__vector signed int __a, __vector signed int __b) { + return (__vector signed int)(__a[2], __b[2], __a[3], __b[3]); } -static inline __ATTRS_o_ai vector bool int -vec_mergel(vector bool int __a, vector bool int __b) { - return (vector bool int)(__a[2], __b[2], __a[3], __b[3]); +static inline __ATTRS_o_ai __vector __bool int +vec_mergel(__vector __bool int __a, __vector __bool int __b) { + return (__vector __bool int)(__a[2], __b[2], __a[3], __b[3]); } -static inline __ATTRS_o_ai vector unsigned int -vec_mergel(vector unsigned int __a, vector unsigned int __b) { - return (vector unsigned int)(__a[2], __b[2], __a[3], __b[3]); +static inline __ATTRS_o_ai __vector unsigned int +vec_mergel(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector unsigned int)(__a[2], __b[2], __a[3], __b[3]); } -static inline __ATTRS_o_ai vector signed long long -vec_mergel(vector signed long long __a, vector signed long long __b) { - return (vector signed long long)(__a[1], __b[1]); +static inline __ATTRS_o_ai __vector signed long long +vec_mergel(__vector signed long long __a, __vector signed long long __b) { + return (__vector signed long long)(__a[1], __b[1]); } -static inline __ATTRS_o_ai vector bool long long -vec_mergel(vector bool long long __a, vector bool long long __b) { - return (vector bool long long)(__a[1], __b[1]); +static inline __ATTRS_o_ai __vector __bool long long +vec_mergel(__vector __bool long long __a, __vector __bool long long __b) { + return (__vector __bool long long)(__a[1], __b[1]); } -static inline __ATTRS_o_ai vector unsigned long long -vec_mergel(vector unsigned long long __a, vector unsigned long long __b) { - return (vector unsigned long long)(__a[1], __b[1]); +static inline __ATTRS_o_ai __vector unsigned long long +vec_mergel(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector unsigned long long)(__a[1], __b[1]); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_mergel(vector float __a, vector float __b) { - return (vector float)(__a[2], __b[2], __a[3], __b[3]); +static inline __ATTRS_o_ai __vector float +vec_mergel(__vector float __a, __vector float __b) { + return (__vector float)(__a[2], __b[2], __a[3], __b[3]); } #endif -static inline __ATTRS_o_ai vector double -vec_mergel(vector double __a, vector double __b) { - return (vector double)(__a[1], __b[1]); +static inline __ATTRS_o_ai __vector double +vec_mergel(__vector double __a, __vector double __b) { + return (__vector double)(__a[1], __b[1]); } /*-- vec_pack ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_pack(vector signed short __a, vector signed short __b) { - vector signed char __ac = (vector signed char)__a; - vector signed char __bc = (vector signed char)__b; - return (vector signed char)( +static inline __ATTRS_o_ai __vector signed char +vec_pack(__vector signed short __a, __vector signed short __b) { + __vector signed char __ac = (__vector signed char)__a; + __vector signed char __bc = (__vector signed char)__b; + return (__vector signed char)( __ac[1], __ac[3], __ac[5], __ac[7], __ac[9], __ac[11], __ac[13], __ac[15], __bc[1], __bc[3], __bc[5], __bc[7], __bc[9], __bc[11], __bc[13], __bc[15]); } -static inline __ATTRS_o_ai vector bool char -vec_pack(vector bool short __a, vector bool short __b) { - vector bool char __ac = (vector bool char)__a; - vector bool char __bc = (vector bool char)__b; - return (vector bool char)( +static inline __ATTRS_o_ai __vector __bool char +vec_pack(__vector __bool short __a, __vector __bool short __b) { + __vector __bool char __ac = (__vector __bool char)__a; + __vector __bool char __bc = (__vector __bool char)__b; + return (__vector __bool char)( __ac[1], __ac[3], __ac[5], __ac[7], __ac[9], __ac[11], __ac[13], __ac[15], __bc[1], __bc[3], __bc[5], __bc[7], __bc[9], __bc[11], __bc[13], __bc[15]); } -static inline __ATTRS_o_ai vector unsigned char -vec_pack(vector unsigned short __a, vector unsigned short __b) { - vector unsigned char __ac = (vector unsigned char)__a; - vector unsigned char __bc = (vector unsigned char)__b; - return (vector unsigned char)( +static inline __ATTRS_o_ai __vector unsigned char +vec_pack(__vector unsigned short __a, __vector unsigned short __b) { + __vector unsigned char __ac = (__vector unsigned char)__a; + __vector unsigned char __bc = (__vector unsigned char)__b; + return (__vector unsigned char)( __ac[1], __ac[3], __ac[5], __ac[7], __ac[9], __ac[11], __ac[13], __ac[15], __bc[1], __bc[3], __bc[5], __bc[7], __bc[9], __bc[11], __bc[13], __bc[15]); } -static inline __ATTRS_o_ai vector signed short -vec_pack(vector signed int __a, vector signed int __b) { - vector signed short __ac = (vector signed short)__a; - vector signed short __bc = (vector signed short)__b; - return (vector signed short)( +static inline __ATTRS_o_ai __vector signed short +vec_pack(__vector signed int __a, __vector signed int __b) { + __vector signed short __ac = (__vector signed short)__a; + __vector signed short __bc = (__vector signed short)__b; + return (__vector signed short)( __ac[1], __ac[3], __ac[5], __ac[7], __bc[1], __bc[3], __bc[5], __bc[7]); } -static inline __ATTRS_o_ai vector bool short -vec_pack(vector bool int __a, vector bool int __b) { - vector bool short __ac = (vector bool short)__a; - vector bool short __bc = (vector bool short)__b; - return (vector bool short)( +static inline __ATTRS_o_ai __vector __bool short +vec_pack(__vector __bool int __a, __vector __bool int __b) { + __vector __bool short __ac = (__vector __bool short)__a; + __vector __bool short __bc = (__vector __bool short)__b; + return (__vector __bool short)( __ac[1], __ac[3], __ac[5], __ac[7], __bc[1], __bc[3], __bc[5], __bc[7]); } -static inline __ATTRS_o_ai vector unsigned short -vec_pack(vector unsigned int __a, vector unsigned int __b) { - vector unsigned short __ac = (vector unsigned short)__a; - vector unsigned short __bc = (vector unsigned short)__b; - return (vector unsigned short)( +static inline __ATTRS_o_ai __vector unsigned short +vec_pack(__vector unsigned int __a, __vector unsigned int __b) { + __vector unsigned short __ac = (__vector unsigned short)__a; + __vector unsigned short __bc = (__vector unsigned short)__b; + return (__vector unsigned short)( __ac[1], __ac[3], __ac[5], __ac[7], __bc[1], __bc[3], __bc[5], __bc[7]); } -static inline __ATTRS_o_ai vector signed int -vec_pack(vector signed long long __a, vector signed long long __b) { - vector signed int __ac = (vector signed int)__a; - vector signed int __bc = (vector signed int)__b; - return (vector signed int)(__ac[1], __ac[3], __bc[1], __bc[3]); +static inline __ATTRS_o_ai __vector signed int +vec_pack(__vector signed long long __a, __vector signed long long __b) { + __vector signed int __ac = (__vector signed int)__a; + __vector signed int __bc = (__vector signed int)__b; + return (__vector signed int)(__ac[1], __ac[3], __bc[1], __bc[3]); } -static inline __ATTRS_o_ai vector bool int -vec_pack(vector bool long long __a, vector bool long long __b) { - vector bool int __ac = (vector bool int)__a; - vector bool int __bc = (vector bool int)__b; - return (vector bool int)(__ac[1], __ac[3], __bc[1], __bc[3]); +static inline __ATTRS_o_ai __vector __bool int +vec_pack(__vector __bool long long __a, __vector __bool long long __b) { + __vector __bool int __ac = (__vector __bool int)__a; + __vector __bool int __bc = (__vector __bool int)__b; + return (__vector __bool int)(__ac[1], __ac[3], __bc[1], __bc[3]); } -static inline __ATTRS_o_ai vector unsigned int -vec_pack(vector unsigned long long __a, vector unsigned long long __b) { - vector unsigned int __ac = (vector unsigned int)__a; - vector unsigned int __bc = (vector unsigned int)__b; - return (vector unsigned int)(__ac[1], __ac[3], __bc[1], __bc[3]); +static inline __ATTRS_o_ai __vector unsigned int +vec_pack(__vector unsigned long long __a, __vector unsigned long long __b) { + __vector unsigned int __ac = (__vector unsigned int)__a; + __vector unsigned int __bc = (__vector unsigned int)__b; + return (__vector unsigned int)(__ac[1], __ac[3], __bc[1], __bc[3]); } /*-- vec_packs --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_packs(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed char +vec_packs(__vector signed short __a, __vector signed short __b) { return __builtin_s390_vpksh(__a, __b); } -static inline __ATTRS_o_ai vector unsigned char -vec_packs(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_packs(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vpklsh(__a, __b); } -static inline __ATTRS_o_ai vector signed short -vec_packs(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed short +vec_packs(__vector signed int __a, __vector signed int __b) { return __builtin_s390_vpksf(__a, __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_packs(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_packs(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vpklsf(__a, __b); } -static inline __ATTRS_o_ai vector signed int -vec_packs(vector signed long long __a, vector signed long long __b) { +static inline __ATTRS_o_ai __vector signed int +vec_packs(__vector signed long long __a, __vector signed long long __b) { return __builtin_s390_vpksg(__a, __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_packs(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_packs(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vpklsg(__a, __b); } /*-- vec_packs_cc -----------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_packs_cc(vector signed short __a, vector signed short __b, int *__cc) { +static inline __ATTRS_o_ai __vector signed char +vec_packs_cc(__vector signed short __a, __vector signed short __b, int *__cc) { return __builtin_s390_vpkshs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_packs_cc(vector unsigned short __a, vector unsigned short __b, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_packs_cc(__vector unsigned short __a, __vector unsigned short __b, + int *__cc) { return __builtin_s390_vpklshs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector signed short -vec_packs_cc(vector signed int __a, vector signed int __b, int *__cc) { +static inline __ATTRS_o_ai __vector signed short +vec_packs_cc(__vector signed int __a, __vector signed int __b, int *__cc) { return __builtin_s390_vpksfs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_packs_cc(vector unsigned int __a, vector unsigned int __b, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned short +vec_packs_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vpklsfs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector signed int -vec_packs_cc(vector signed long long __a, vector signed long long __b, +static inline __ATTRS_o_ai __vector signed int +vec_packs_cc(__vector signed long long __a, __vector signed long long __b, int *__cc) { return __builtin_s390_vpksgs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_packs_cc(vector unsigned long long __a, vector unsigned long long __b, +static inline __ATTRS_o_ai __vector unsigned int +vec_packs_cc(__vector unsigned long long __a, __vector unsigned long long __b, int *__cc) { return __builtin_s390_vpklsgs(__a, __b, __cc); } /*-- vec_packsu -------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_packsu(vector signed short __a, vector signed short __b) { - const vector signed short __zero = (vector signed short)0; +static inline __ATTRS_o_ai __vector unsigned char +vec_packsu(__vector signed short __a, __vector signed short __b) { + const __vector signed short __zero = (__vector signed short)0; return __builtin_s390_vpklsh( - (vector unsigned short)(__a >= __zero) & (vector unsigned short)__a, - (vector unsigned short)(__b >= __zero) & (vector unsigned short)__b); + (__vector unsigned short)(__a >= __zero) & (__vector unsigned short)__a, + (__vector unsigned short)(__b >= __zero) & (__vector unsigned short)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_packsu(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_packsu(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vpklsh(__a, __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_packsu(vector signed int __a, vector signed int __b) { - const vector signed int __zero = (vector signed int)0; +static inline __ATTRS_o_ai __vector unsigned short +vec_packsu(__vector signed int __a, __vector signed int __b) { + const __vector signed int __zero = (__vector signed int)0; return __builtin_s390_vpklsf( - (vector unsigned int)(__a >= __zero) & (vector unsigned int)__a, - (vector unsigned int)(__b >= __zero) & (vector unsigned int)__b); + (__vector unsigned int)(__a >= __zero) & (__vector unsigned int)__a, + (__vector unsigned int)(__b >= __zero) & (__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_packsu(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_packsu(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vpklsf(__a, __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_packsu(vector signed long long __a, vector signed long long __b) { - const vector signed long long __zero = (vector signed long long)0; +static inline __ATTRS_o_ai __vector unsigned int +vec_packsu(__vector signed long long __a, __vector signed long long __b) { + const __vector signed long long __zero = (__vector signed long long)0; return __builtin_s390_vpklsg( - (vector unsigned long long)(__a >= __zero) & - (vector unsigned long long)__a, - (vector unsigned long long)(__b >= __zero) & - (vector unsigned long long)__b); + (__vector unsigned long long)(__a >= __zero) & + (__vector unsigned long long)__a, + (__vector unsigned long long)(__b >= __zero) & + (__vector unsigned long long)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_packsu(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_packsu(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vpklsg(__a, __b); } /*-- vec_packsu_cc ----------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_packsu_cc(vector unsigned short __a, vector unsigned short __b, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_packsu_cc(__vector unsigned short __a, __vector unsigned short __b, + int *__cc) { return __builtin_s390_vpklshs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_packsu_cc(vector unsigned int __a, vector unsigned int __b, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned short +vec_packsu_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vpklsfs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_packsu_cc(vector unsigned long long __a, vector unsigned long long __b, +static inline __ATTRS_o_ai __vector unsigned int +vec_packsu_cc(__vector unsigned long long __a, __vector unsigned long long __b, int *__cc) { return __builtin_s390_vpklsgs(__a, __b, __cc); } /*-- vec_unpackh ------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed short -vec_unpackh(vector signed char __a) { +static inline __ATTRS_o_ai __vector signed short +vec_unpackh(__vector signed char __a) { return __builtin_s390_vuphb(__a); } -static inline __ATTRS_o_ai vector bool short -vec_unpackh(vector bool char __a) { - return (vector bool short)__builtin_s390_vuphb((vector signed char)__a); +static inline __ATTRS_o_ai __vector __bool short +vec_unpackh(__vector __bool char __a) { + return ((__vector __bool short) + __builtin_s390_vuphb((__vector signed char)__a)); } -static inline __ATTRS_o_ai vector unsigned short -vec_unpackh(vector unsigned char __a) { +static inline __ATTRS_o_ai __vector unsigned short +vec_unpackh(__vector unsigned char __a) { return __builtin_s390_vuplhb(__a); } -static inline __ATTRS_o_ai vector signed int -vec_unpackh(vector signed short __a) { +static inline __ATTRS_o_ai __vector signed int +vec_unpackh(__vector signed short __a) { return __builtin_s390_vuphh(__a); } -static inline __ATTRS_o_ai vector bool int -vec_unpackh(vector bool short __a) { - return (vector bool int)__builtin_s390_vuphh((vector signed short)__a); +static inline __ATTRS_o_ai __vector __bool int +vec_unpackh(__vector __bool short __a) { + return (__vector __bool int)__builtin_s390_vuphh((__vector signed short)__a); } -static inline __ATTRS_o_ai vector unsigned int -vec_unpackh(vector unsigned short __a) { +static inline __ATTRS_o_ai __vector unsigned int +vec_unpackh(__vector unsigned short __a) { return __builtin_s390_vuplhh(__a); } -static inline __ATTRS_o_ai vector signed long long -vec_unpackh(vector signed int __a) { +static inline __ATTRS_o_ai __vector signed long long +vec_unpackh(__vector signed int __a) { return __builtin_s390_vuphf(__a); } -static inline __ATTRS_o_ai vector bool long long -vec_unpackh(vector bool int __a) { - return (vector bool long long)__builtin_s390_vuphf((vector signed int)__a); +static inline __ATTRS_o_ai __vector __bool long long +vec_unpackh(__vector __bool int __a) { + return ((__vector __bool long long) + __builtin_s390_vuphf((__vector signed int)__a)); } -static inline __ATTRS_o_ai vector unsigned long long -vec_unpackh(vector unsigned int __a) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_unpackh(__vector unsigned int __a) { return __builtin_s390_vuplhf(__a); } /*-- vec_unpackl ------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed short -vec_unpackl(vector signed char __a) { +static inline __ATTRS_o_ai __vector signed short +vec_unpackl(__vector signed char __a) { return __builtin_s390_vuplb(__a); } -static inline __ATTRS_o_ai vector bool short -vec_unpackl(vector bool char __a) { - return (vector bool short)__builtin_s390_vuplb((vector signed char)__a); +static inline __ATTRS_o_ai __vector __bool short +vec_unpackl(__vector __bool char __a) { + return ((__vector __bool short) + __builtin_s390_vuplb((__vector signed char)__a)); } -static inline __ATTRS_o_ai vector unsigned short -vec_unpackl(vector unsigned char __a) { +static inline __ATTRS_o_ai __vector unsigned short +vec_unpackl(__vector unsigned char __a) { return __builtin_s390_vupllb(__a); } -static inline __ATTRS_o_ai vector signed int -vec_unpackl(vector signed short __a) { +static inline __ATTRS_o_ai __vector signed int +vec_unpackl(__vector signed short __a) { return __builtin_s390_vuplhw(__a); } -static inline __ATTRS_o_ai vector bool int -vec_unpackl(vector bool short __a) { - return (vector bool int)__builtin_s390_vuplhw((vector signed short)__a); +static inline __ATTRS_o_ai __vector __bool int +vec_unpackl(__vector __bool short __a) { + return ((__vector __bool int) + __builtin_s390_vuplhw((__vector signed short)__a)); } -static inline __ATTRS_o_ai vector unsigned int -vec_unpackl(vector unsigned short __a) { +static inline __ATTRS_o_ai __vector unsigned int +vec_unpackl(__vector unsigned short __a) { return __builtin_s390_vupllh(__a); } -static inline __ATTRS_o_ai vector signed long long -vec_unpackl(vector signed int __a) { +static inline __ATTRS_o_ai __vector signed long long +vec_unpackl(__vector signed int __a) { return __builtin_s390_vuplf(__a); } -static inline __ATTRS_o_ai vector bool long long -vec_unpackl(vector bool int __a) { - return (vector bool long long)__builtin_s390_vuplf((vector signed int)__a); +static inline __ATTRS_o_ai __vector __bool long long +vec_unpackl(__vector __bool int __a) { + return ((__vector __bool long long) + __builtin_s390_vuplf((__vector signed int)__a)); } -static inline __ATTRS_o_ai vector unsigned long long -vec_unpackl(vector unsigned int __a) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_unpackl(__vector unsigned int __a) { return __builtin_s390_vupllf(__a); } /*-- vec_cmpeq --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_cmpeq(vector bool char __a, vector bool char __b) { - return (vector bool char)(__a == __b); +static inline __ATTRS_o_ai __vector __bool char +vec_cmpeq(__vector __bool char __a, __vector __bool char __b) { + return (__vector __bool char)(__a == __b); } -static inline __ATTRS_o_ai vector bool char -vec_cmpeq(vector signed char __a, vector signed char __b) { - return (vector bool char)(__a == __b); +static inline __ATTRS_o_ai __vector __bool char +vec_cmpeq(__vector signed char __a, __vector signed char __b) { + return (__vector __bool char)(__a == __b); } -static inline __ATTRS_o_ai vector bool char -vec_cmpeq(vector unsigned char __a, vector unsigned char __b) { - return (vector bool char)(__a == __b); +static inline __ATTRS_o_ai __vector __bool char +vec_cmpeq(__vector unsigned char __a, __vector unsigned char __b) { + return (__vector __bool char)(__a == __b); } -static inline __ATTRS_o_ai vector bool short -vec_cmpeq(vector bool short __a, vector bool short __b) { - return (vector bool short)(__a == __b); +static inline __ATTRS_o_ai __vector __bool short +vec_cmpeq(__vector __bool short __a, __vector __bool short __b) { + return (__vector __bool short)(__a == __b); } -static inline __ATTRS_o_ai vector bool short -vec_cmpeq(vector signed short __a, vector signed short __b) { - return (vector bool short)(__a == __b); +static inline __ATTRS_o_ai __vector __bool short +vec_cmpeq(__vector signed short __a, __vector signed short __b) { + return (__vector __bool short)(__a == __b); } -static inline __ATTRS_o_ai vector bool short -vec_cmpeq(vector unsigned short __a, vector unsigned short __b) { - return (vector bool short)(__a == __b); +static inline __ATTRS_o_ai __vector __bool short +vec_cmpeq(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector __bool short)(__a == __b); } -static inline __ATTRS_o_ai vector bool int -vec_cmpeq(vector bool int __a, vector bool int __b) { - return (vector bool int)(__a == __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpeq(__vector __bool int __a, __vector __bool int __b) { + return (__vector __bool int)(__a == __b); } -static inline __ATTRS_o_ai vector bool int -vec_cmpeq(vector signed int __a, vector signed int __b) { - return (vector bool int)(__a == __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpeq(__vector signed int __a, __vector signed int __b) { + return (__vector __bool int)(__a == __b); } -static inline __ATTRS_o_ai vector bool int -vec_cmpeq(vector unsigned int __a, vector unsigned int __b) { - return (vector bool int)(__a == __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpeq(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector __bool int)(__a == __b); } -static inline __ATTRS_o_ai vector bool long long -vec_cmpeq(vector bool long long __a, vector bool long long __b) { - return (vector bool long long)(__a == __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmpeq(__vector __bool long long __a, __vector __bool long long __b) { + return (__vector __bool long long)(__a == __b); } -static inline __ATTRS_o_ai vector bool long long -vec_cmpeq(vector signed long long __a, vector signed long long __b) { - return (vector bool long long)(__a == __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmpeq(__vector signed long long __a, __vector signed long long __b) { + return (__vector __bool long long)(__a == __b); } -static inline __ATTRS_o_ai vector bool long long -vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b) { - return (vector bool long long)(__a == __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmpeq(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector __bool long long)(__a == __b); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector bool int -vec_cmpeq(vector float __a, vector float __b) { - return (vector bool int)(__a == __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpeq(__vector float __a, __vector float __b) { + return (__vector __bool int)(__a == __b); } #endif -static inline __ATTRS_o_ai vector bool long long -vec_cmpeq(vector double __a, vector double __b) { - return (vector bool long long)(__a == __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmpeq(__vector double __a, __vector double __b) { + return (__vector __bool long long)(__a == __b); } /*-- vec_cmpge --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_cmpge(vector signed char __a, vector signed char __b) { - return (vector bool char)(__a >= __b); +static inline __ATTRS_o_ai __vector __bool char +vec_cmpge(__vector signed char __a, __vector signed char __b) { + return (__vector __bool char)(__a >= __b); } -static inline __ATTRS_o_ai vector bool char -vec_cmpge(vector unsigned char __a, vector unsigned char __b) { - return (vector bool char)(__a >= __b); +static inline __ATTRS_o_ai __vector __bool char +vec_cmpge(__vector unsigned char __a, __vector unsigned char __b) { + return (__vector __bool char)(__a >= __b); } -static inline __ATTRS_o_ai vector bool short -vec_cmpge(vector signed short __a, vector signed short __b) { - return (vector bool short)(__a >= __b); +static inline __ATTRS_o_ai __vector __bool short +vec_cmpge(__vector signed short __a, __vector signed short __b) { + return (__vector __bool short)(__a >= __b); } -static inline __ATTRS_o_ai vector bool short -vec_cmpge(vector unsigned short __a, vector unsigned short __b) { - return (vector bool short)(__a >= __b); +static inline __ATTRS_o_ai __vector __bool short +vec_cmpge(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector __bool short)(__a >= __b); } -static inline __ATTRS_o_ai vector bool int -vec_cmpge(vector signed int __a, vector signed int __b) { - return (vector bool int)(__a >= __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpge(__vector signed int __a, __vector signed int __b) { + return (__vector __bool int)(__a >= __b); } -static inline __ATTRS_o_ai vector bool int -vec_cmpge(vector unsigned int __a, vector unsigned int __b) { - return (vector bool int)(__a >= __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpge(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector __bool int)(__a >= __b); } -static inline __ATTRS_o_ai vector bool long long -vec_cmpge(vector signed long long __a, vector signed long long __b) { - return (vector bool long long)(__a >= __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmpge(__vector signed long long __a, __vector signed long long __b) { + return (__vector __bool long long)(__a >= __b); } -static inline __ATTRS_o_ai vector bool long long -vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) { - return (vector bool long long)(__a >= __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmpge(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector __bool long long)(__a >= __b); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector bool int -vec_cmpge(vector float __a, vector float __b) { - return (vector bool int)(__a >= __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpge(__vector float __a, __vector float __b) { + return (__vector __bool int)(__a >= __b); } #endif -static inline __ATTRS_o_ai vector bool long long -vec_cmpge(vector double __a, vector double __b) { - return (vector bool long long)(__a >= __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmpge(__vector double __a, __vector double __b) { + return (__vector __bool long long)(__a >= __b); } /*-- vec_cmpgt --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_cmpgt(vector signed char __a, vector signed char __b) { - return (vector bool char)(__a > __b); +static inline __ATTRS_o_ai __vector __bool char +vec_cmpgt(__vector signed char __a, __vector signed char __b) { + return (__vector __bool char)(__a > __b); } -static inline __ATTRS_o_ai vector bool char -vec_cmpgt(vector unsigned char __a, vector unsigned char __b) { - return (vector bool char)(__a > __b); +static inline __ATTRS_o_ai __vector __bool char +vec_cmpgt(__vector unsigned char __a, __vector unsigned char __b) { + return (__vector __bool char)(__a > __b); } -static inline __ATTRS_o_ai vector bool short -vec_cmpgt(vector signed short __a, vector signed short __b) { - return (vector bool short)(__a > __b); +static inline __ATTRS_o_ai __vector __bool short +vec_cmpgt(__vector signed short __a, __vector signed short __b) { + return (__vector __bool short)(__a > __b); } -static inline __ATTRS_o_ai vector bool short -vec_cmpgt(vector unsigned short __a, vector unsigned short __b) { - return (vector bool short)(__a > __b); +static inline __ATTRS_o_ai __vector __bool short +vec_cmpgt(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector __bool short)(__a > __b); } -static inline __ATTRS_o_ai vector bool int -vec_cmpgt(vector signed int __a, vector signed int __b) { - return (vector bool int)(__a > __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpgt(__vector signed int __a, __vector signed int __b) { + return (__vector __bool int)(__a > __b); } -static inline __ATTRS_o_ai vector bool int -vec_cmpgt(vector unsigned int __a, vector unsigned int __b) { - return (vector bool int)(__a > __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpgt(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector __bool int)(__a > __b); } -static inline __ATTRS_o_ai vector bool long long -vec_cmpgt(vector signed long long __a, vector signed long long __b) { - return (vector bool long long)(__a > __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmpgt(__vector signed long long __a, __vector signed long long __b) { + return (__vector __bool long long)(__a > __b); } -static inline __ATTRS_o_ai vector bool long long -vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b) { - return (vector bool long long)(__a > __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmpgt(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector __bool long long)(__a > __b); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector bool int -vec_cmpgt(vector float __a, vector float __b) { - return (vector bool int)(__a > __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpgt(__vector float __a, __vector float __b) { + return (__vector __bool int)(__a > __b); } #endif -static inline __ATTRS_o_ai vector bool long long -vec_cmpgt(vector double __a, vector double __b) { - return (vector bool long long)(__a > __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmpgt(__vector double __a, __vector double __b) { + return (__vector __bool long long)(__a > __b); } /*-- vec_cmple --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_cmple(vector signed char __a, vector signed char __b) { - return (vector bool char)(__a <= __b); +static inline __ATTRS_o_ai __vector __bool char +vec_cmple(__vector signed char __a, __vector signed char __b) { + return (__vector __bool char)(__a <= __b); } -static inline __ATTRS_o_ai vector bool char -vec_cmple(vector unsigned char __a, vector unsigned char __b) { - return (vector bool char)(__a <= __b); +static inline __ATTRS_o_ai __vector __bool char +vec_cmple(__vector unsigned char __a, __vector unsigned char __b) { + return (__vector __bool char)(__a <= __b); } -static inline __ATTRS_o_ai vector bool short -vec_cmple(vector signed short __a, vector signed short __b) { - return (vector bool short)(__a <= __b); +static inline __ATTRS_o_ai __vector __bool short +vec_cmple(__vector signed short __a, __vector signed short __b) { + return (__vector __bool short)(__a <= __b); } -static inline __ATTRS_o_ai vector bool short -vec_cmple(vector unsigned short __a, vector unsigned short __b) { - return (vector bool short)(__a <= __b); +static inline __ATTRS_o_ai __vector __bool short +vec_cmple(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector __bool short)(__a <= __b); } -static inline __ATTRS_o_ai vector bool int -vec_cmple(vector signed int __a, vector signed int __b) { - return (vector bool int)(__a <= __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmple(__vector signed int __a, __vector signed int __b) { + return (__vector __bool int)(__a <= __b); } -static inline __ATTRS_o_ai vector bool int -vec_cmple(vector unsigned int __a, vector unsigned int __b) { - return (vector bool int)(__a <= __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmple(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector __bool int)(__a <= __b); } -static inline __ATTRS_o_ai vector bool long long -vec_cmple(vector signed long long __a, vector signed long long __b) { - return (vector bool long long)(__a <= __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmple(__vector signed long long __a, __vector signed long long __b) { + return (__vector __bool long long)(__a <= __b); } -static inline __ATTRS_o_ai vector bool long long -vec_cmple(vector unsigned long long __a, vector unsigned long long __b) { - return (vector bool long long)(__a <= __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmple(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector __bool long long)(__a <= __b); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector bool int -vec_cmple(vector float __a, vector float __b) { - return (vector bool int)(__a <= __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmple(__vector float __a, __vector float __b) { + return (__vector __bool int)(__a <= __b); } #endif -static inline __ATTRS_o_ai vector bool long long -vec_cmple(vector double __a, vector double __b) { - return (vector bool long long)(__a <= __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmple(__vector double __a, __vector double __b) { + return (__vector __bool long long)(__a <= __b); } /*-- vec_cmplt --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_cmplt(vector signed char __a, vector signed char __b) { - return (vector bool char)(__a < __b); +static inline __ATTRS_o_ai __vector __bool char +vec_cmplt(__vector signed char __a, __vector signed char __b) { + return (__vector __bool char)(__a < __b); } -static inline __ATTRS_o_ai vector bool char -vec_cmplt(vector unsigned char __a, vector unsigned char __b) { - return (vector bool char)(__a < __b); +static inline __ATTRS_o_ai __vector __bool char +vec_cmplt(__vector unsigned char __a, __vector unsigned char __b) { + return (__vector __bool char)(__a < __b); } -static inline __ATTRS_o_ai vector bool short -vec_cmplt(vector signed short __a, vector signed short __b) { - return (vector bool short)(__a < __b); +static inline __ATTRS_o_ai __vector __bool short +vec_cmplt(__vector signed short __a, __vector signed short __b) { + return (__vector __bool short)(__a < __b); } -static inline __ATTRS_o_ai vector bool short -vec_cmplt(vector unsigned short __a, vector unsigned short __b) { - return (vector bool short)(__a < __b); +static inline __ATTRS_o_ai __vector __bool short +vec_cmplt(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector __bool short)(__a < __b); } -static inline __ATTRS_o_ai vector bool int -vec_cmplt(vector signed int __a, vector signed int __b) { - return (vector bool int)(__a < __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmplt(__vector signed int __a, __vector signed int __b) { + return (__vector __bool int)(__a < __b); } -static inline __ATTRS_o_ai vector bool int -vec_cmplt(vector unsigned int __a, vector unsigned int __b) { - return (vector bool int)(__a < __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmplt(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector __bool int)(__a < __b); } -static inline __ATTRS_o_ai vector bool long long -vec_cmplt(vector signed long long __a, vector signed long long __b) { - return (vector bool long long)(__a < __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmplt(__vector signed long long __a, __vector signed long long __b) { + return (__vector __bool long long)(__a < __b); } -static inline __ATTRS_o_ai vector bool long long -vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) { - return (vector bool long long)(__a < __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmplt(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector __bool long long)(__a < __b); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector bool int -vec_cmplt(vector float __a, vector float __b) { - return (vector bool int)(__a < __b); +static inline __ATTRS_o_ai __vector __bool int +vec_cmplt(__vector float __a, __vector float __b) { + return (__vector __bool int)(__a < __b); } #endif -static inline __ATTRS_o_ai vector bool long long -vec_cmplt(vector double __a, vector double __b) { - return (vector bool long long)(__a < __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_cmplt(__vector double __a, __vector double __b) { + return (__vector __bool long long)(__a < __b); } /*-- vec_all_eq -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_all_eq(vector signed char __a, vector signed char __b) { +vec_all_eq(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs(__a, __b, &__cc); return __cc == 0; @@ -2533,56 +2601,56 @@ vec_all_eq(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector signed char __a, vector bool char __b) { +vec_all_eq(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs(__a, (vector signed char)__b, &__cc); + __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector bool char __a, vector signed char __b) { +vec_all_eq(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, __b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_eq(vector unsigned char __a, vector unsigned char __b) { +vec_all_eq(__vector unsigned char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector unsigned char __a, vector bool char __b) { +vec_all_eq(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector bool char __a, vector unsigned char __b) { +vec_all_eq(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_eq(vector bool char __a, vector bool char __b) { +vec_all_eq(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_eq(vector signed short __a, vector signed short __b) { +vec_all_eq(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs(__a, __b, &__cc); return __cc == 0; @@ -2590,56 +2658,56 @@ vec_all_eq(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector signed short __a, vector bool short __b) { +vec_all_eq(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs(__a, (vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector bool short __a, vector signed short __b) { +vec_all_eq(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, __b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_eq(vector unsigned short __a, vector unsigned short __b) { +vec_all_eq(__vector unsigned short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector unsigned short __a, vector bool short __b) { +vec_all_eq(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector bool short __a, vector unsigned short __b) { +vec_all_eq(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_eq(vector bool short __a, vector bool short __b) { +vec_all_eq(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_eq(vector signed int __a, vector signed int __b) { +vec_all_eq(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs(__a, __b, &__cc); return __cc == 0; @@ -2647,56 +2715,56 @@ vec_all_eq(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector signed int __a, vector bool int __b) { +vec_all_eq(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs(__a, (vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector bool int __a, vector signed int __b) { +vec_all_eq(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, __b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_eq(vector unsigned int __a, vector unsigned int __b) { +vec_all_eq(__vector unsigned int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector unsigned int __a, vector bool int __b) { +vec_all_eq(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector bool int __a, vector unsigned int __b) { +vec_all_eq(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_eq(vector bool int __a, vector bool int __b) { +vec_all_eq(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_eq(vector signed long long __a, vector signed long long __b) { +vec_all_eq(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs(__a, __b, &__cc); return __cc == 0; @@ -2704,57 +2772,57 @@ vec_all_eq(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector signed long long __a, vector bool long long __b) { +vec_all_eq(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs(__a, (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector bool long long __a, vector signed long long __b) { +vec_all_eq(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, __b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_eq(vector unsigned long long __a, vector unsigned long long __b) { +vec_all_eq(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector unsigned long long __a, vector bool long long __b) { +vec_all_eq(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_eq(vector bool long long __a, vector unsigned long long __b) { +vec_all_eq(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_eq(vector bool long long __a, vector bool long long __b) { +vec_all_eq(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc == 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_eq(vector float __a, vector float __b) { +vec_all_eq(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfcesbs(__a, __b, &__cc); return __cc == 0; @@ -2762,7 +2830,7 @@ vec_all_eq(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_all_eq(vector double __a, vector double __b) { +vec_all_eq(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfcedbs(__a, __b, &__cc); return __cc == 0; @@ -2771,7 +2839,7 @@ vec_all_eq(vector double __a, vector double __b) { /*-- vec_all_ne -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_all_ne(vector signed char __a, vector signed char __b) { +vec_all_ne(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs(__a, __b, &__cc); return __cc == 3; @@ -2779,56 +2847,56 @@ vec_all_ne(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector signed char __a, vector bool char __b) { +vec_all_ne(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs(__a, (vector signed char)__b, &__cc); + __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector bool char __a, vector signed char __b) { +vec_all_ne(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, __b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ne(vector unsigned char __a, vector unsigned char __b) { +vec_all_ne(__vector unsigned char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector unsigned char __a, vector bool char __b) { +vec_all_ne(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector bool char __a, vector unsigned char __b) { +vec_all_ne(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ne(vector bool char __a, vector bool char __b) { +vec_all_ne(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ne(vector signed short __a, vector signed short __b) { +vec_all_ne(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs(__a, __b, &__cc); return __cc == 3; @@ -2836,56 +2904,56 @@ vec_all_ne(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector signed short __a, vector bool short __b) { +vec_all_ne(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs(__a, (vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector bool short __a, vector signed short __b) { +vec_all_ne(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, __b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ne(vector unsigned short __a, vector unsigned short __b) { +vec_all_ne(__vector unsigned short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector unsigned short __a, vector bool short __b) { +vec_all_ne(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector bool short __a, vector unsigned short __b) { +vec_all_ne(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ne(vector bool short __a, vector bool short __b) { +vec_all_ne(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ne(vector signed int __a, vector signed int __b) { +vec_all_ne(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs(__a, __b, &__cc); return __cc == 3; @@ -2893,56 +2961,56 @@ vec_all_ne(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector signed int __a, vector bool int __b) { +vec_all_ne(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs(__a, (vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector bool int __a, vector signed int __b) { +vec_all_ne(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, __b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ne(vector unsigned int __a, vector unsigned int __b) { +vec_all_ne(__vector unsigned int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector unsigned int __a, vector bool int __b) { +vec_all_ne(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector bool int __a, vector unsigned int __b) { +vec_all_ne(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ne(vector bool int __a, vector bool int __b) { +vec_all_ne(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ne(vector signed long long __a, vector signed long long __b) { +vec_all_ne(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs(__a, __b, &__cc); return __cc == 3; @@ -2950,57 +3018,57 @@ vec_all_ne(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector signed long long __a, vector bool long long __b) { +vec_all_ne(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs(__a, (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector bool long long __a, vector signed long long __b) { +vec_all_ne(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, __b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ne(vector unsigned long long __a, vector unsigned long long __b) { +vec_all_ne(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector unsigned long long __a, vector bool long long __b) { +vec_all_ne(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ne(vector bool long long __a, vector unsigned long long __b) { +vec_all_ne(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ne(vector bool long long __a, vector bool long long __b) { +vec_all_ne(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc == 3; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_ne(vector float __a, vector float __b) { +vec_all_ne(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfcesbs(__a, __b, &__cc); return __cc == 3; @@ -3008,7 +3076,7 @@ vec_all_ne(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_all_ne(vector double __a, vector double __b) { +vec_all_ne(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfcedbs(__a, __b, &__cc); return __cc == 3; @@ -3017,7 +3085,7 @@ vec_all_ne(vector double __a, vector double __b) { /*-- vec_all_ge -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_all_ge(vector signed char __a, vector signed char __b) { +vec_all_ge(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, __a, &__cc); return __cc == 3; @@ -3025,22 +3093,22 @@ vec_all_ge(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector signed char __a, vector bool char __b) { +vec_all_ge(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchbs((vector signed char)__b, __a, &__cc); + __builtin_s390_vchbs((__vector signed char)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool char __a, vector signed char __b) { +vec_all_ge(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vchbs(__b, (vector signed char)__a, &__cc); + __builtin_s390_vchbs(__b, (__vector signed char)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ge(vector unsigned char __a, vector unsigned char __b) { +vec_all_ge(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, __a, &__cc); return __cc == 3; @@ -3048,31 +3116,31 @@ vec_all_ge(vector unsigned char __a, vector unsigned char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector unsigned char __a, vector bool char __b) { +vec_all_ge(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__b, __a, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool char __a, vector unsigned char __b) { +vec_all_ge(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vchlbs(__b, (vector unsigned char)__a, &__cc); + __builtin_s390_vchlbs(__b, (__vector unsigned char)__a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool char __a, vector bool char __b) { +vec_all_ge(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__b, - (vector unsigned char)__a, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__b, + (__vector unsigned char)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ge(vector signed short __a, vector signed short __b) { +vec_all_ge(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, __a, &__cc); return __cc == 3; @@ -3080,22 +3148,22 @@ vec_all_ge(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector signed short __a, vector bool short __b) { +vec_all_ge(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchhs((vector signed short)__b, __a, &__cc); + __builtin_s390_vchhs((__vector signed short)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool short __a, vector signed short __b) { +vec_all_ge(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vchhs(__b, (vector signed short)__a, &__cc); + __builtin_s390_vchhs(__b, (__vector signed short)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ge(vector unsigned short __a, vector unsigned short __b) { +vec_all_ge(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, __a, &__cc); return __cc == 3; @@ -3103,31 +3171,31 @@ vec_all_ge(vector unsigned short __a, vector unsigned short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector unsigned short __a, vector bool short __b) { +vec_all_ge(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__b, __a, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool short __a, vector unsigned short __b) { +vec_all_ge(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vchlhs(__b, (vector unsigned short)__a, &__cc); + __builtin_s390_vchlhs(__b, (__vector unsigned short)__a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool short __a, vector bool short __b) { +vec_all_ge(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__b, - (vector unsigned short)__a, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__b, + (__vector unsigned short)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ge(vector signed int __a, vector signed int __b) { +vec_all_ge(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, __a, &__cc); return __cc == 3; @@ -3135,22 +3203,22 @@ vec_all_ge(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector signed int __a, vector bool int __b) { +vec_all_ge(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchfs((vector signed int)__b, __a, &__cc); + __builtin_s390_vchfs((__vector signed int)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool int __a, vector signed int __b) { +vec_all_ge(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vchfs(__b, (vector signed int)__a, &__cc); + __builtin_s390_vchfs(__b, (__vector signed int)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ge(vector unsigned int __a, vector unsigned int __b) { +vec_all_ge(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, __a, &__cc); return __cc == 3; @@ -3158,31 +3226,31 @@ vec_all_ge(vector unsigned int __a, vector unsigned int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector unsigned int __a, vector bool int __b) { +vec_all_ge(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__b, __a, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool int __a, vector unsigned int __b) { +vec_all_ge(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vchlfs(__b, (vector unsigned int)__a, &__cc); + __builtin_s390_vchlfs(__b, (__vector unsigned int)__a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool int __a, vector bool int __b) { +vec_all_ge(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__b, - (vector unsigned int)__a, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__b, + (__vector unsigned int)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ge(vector signed long long __a, vector signed long long __b) { +vec_all_ge(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, __a, &__cc); return __cc == 3; @@ -3190,22 +3258,22 @@ vec_all_ge(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector signed long long __a, vector bool long long __b) { +vec_all_ge(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchgs((vector signed long long)__b, __a, &__cc); + __builtin_s390_vchgs((__vector signed long long)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool long long __a, vector signed long long __b) { +vec_all_ge(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vchgs(__b, (vector signed long long)__a, &__cc); + __builtin_s390_vchgs(__b, (__vector signed long long)__a, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_ge(vector unsigned long long __a, vector unsigned long long __b) { +vec_all_ge(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, __a, &__cc); return __cc == 3; @@ -3213,32 +3281,32 @@ vec_all_ge(vector unsigned long long __a, vector unsigned long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector unsigned long long __a, vector bool long long __b) { +vec_all_ge(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__b, __a, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__b, __a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool long long __a, vector unsigned long long __b) { +vec_all_ge(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vchlgs(__b, (vector unsigned long long)__a, &__cc); + __builtin_s390_vchlgs(__b, (__vector unsigned long long)__a, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_ge(vector bool long long __a, vector bool long long __b) { +vec_all_ge(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__b, - (vector unsigned long long)__a, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__b, + (__vector unsigned long long)__a, &__cc); return __cc == 3; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_ge(vector float __a, vector float __b) { +vec_all_ge(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__a, __b, &__cc); return __cc == 0; @@ -3246,7 +3314,7 @@ vec_all_ge(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_all_ge(vector double __a, vector double __b) { +vec_all_ge(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__a, __b, &__cc); return __cc == 0; @@ -3255,7 +3323,7 @@ vec_all_ge(vector double __a, vector double __b) { /*-- vec_all_gt -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_all_gt(vector signed char __a, vector signed char __b) { +vec_all_gt(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__a, __b, &__cc); return __cc == 0; @@ -3263,22 +3331,22 @@ vec_all_gt(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector signed char __a, vector bool char __b) { +vec_all_gt(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchbs(__a, (vector signed char)__b, &__cc); + __builtin_s390_vchbs(__a, (__vector signed char)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool char __a, vector signed char __b) { +vec_all_gt(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vchbs((vector signed char)__a, __b, &__cc); + __builtin_s390_vchbs((__vector signed char)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_gt(vector unsigned char __a, vector unsigned char __b) { +vec_all_gt(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__a, __b, &__cc); return __cc == 0; @@ -3286,31 +3354,31 @@ vec_all_gt(vector unsigned char __a, vector unsigned char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector unsigned char __a, vector bool char __b) { +vec_all_gt(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs(__a, (vector unsigned char)__b, &__cc); + __builtin_s390_vchlbs(__a, (__vector unsigned char)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool char __a, vector unsigned char __b) { +vec_all_gt(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__a, __b, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool char __a, vector bool char __b) { +vec_all_gt(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__a, - (vector unsigned char)__b, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_gt(vector signed short __a, vector signed short __b) { +vec_all_gt(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__a, __b, &__cc); return __cc == 0; @@ -3318,22 +3386,22 @@ vec_all_gt(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector signed short __a, vector bool short __b) { +vec_all_gt(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchhs(__a, (vector signed short)__b, &__cc); + __builtin_s390_vchhs(__a, (__vector signed short)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool short __a, vector signed short __b) { +vec_all_gt(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vchhs((vector signed short)__a, __b, &__cc); + __builtin_s390_vchhs((__vector signed short)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_gt(vector unsigned short __a, vector unsigned short __b) { +vec_all_gt(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__a, __b, &__cc); return __cc == 0; @@ -3341,31 +3409,31 @@ vec_all_gt(vector unsigned short __a, vector unsigned short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector unsigned short __a, vector bool short __b) { +vec_all_gt(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs(__a, (vector unsigned short)__b, &__cc); + __builtin_s390_vchlhs(__a, (__vector unsigned short)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool short __a, vector unsigned short __b) { +vec_all_gt(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__a, __b, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool short __a, vector bool short __b) { +vec_all_gt(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__a, - (vector unsigned short)__b, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_gt(vector signed int __a, vector signed int __b) { +vec_all_gt(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__a, __b, &__cc); return __cc == 0; @@ -3373,22 +3441,22 @@ vec_all_gt(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector signed int __a, vector bool int __b) { +vec_all_gt(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchfs(__a, (vector signed int)__b, &__cc); + __builtin_s390_vchfs(__a, (__vector signed int)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool int __a, vector signed int __b) { +vec_all_gt(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vchfs((vector signed int)__a, __b, &__cc); + __builtin_s390_vchfs((__vector signed int)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_gt(vector unsigned int __a, vector unsigned int __b) { +vec_all_gt(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__a, __b, &__cc); return __cc == 0; @@ -3396,31 +3464,31 @@ vec_all_gt(vector unsigned int __a, vector unsigned int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector unsigned int __a, vector bool int __b) { +vec_all_gt(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs(__a, (vector unsigned int)__b, &__cc); + __builtin_s390_vchlfs(__a, (__vector unsigned int)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool int __a, vector unsigned int __b) { +vec_all_gt(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__a, __b, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool int __a, vector bool int __b) { +vec_all_gt(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__a, - (vector unsigned int)__b, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_gt(vector signed long long __a, vector signed long long __b) { +vec_all_gt(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__a, __b, &__cc); return __cc == 0; @@ -3428,22 +3496,22 @@ vec_all_gt(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector signed long long __a, vector bool long long __b) { +vec_all_gt(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchgs(__a, (vector signed long long)__b, &__cc); + __builtin_s390_vchgs(__a, (__vector signed long long)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool long long __a, vector signed long long __b) { +vec_all_gt(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vchgs((vector signed long long)__a, __b, &__cc); + __builtin_s390_vchgs((__vector signed long long)__a, __b, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_gt(vector unsigned long long __a, vector unsigned long long __b) { +vec_all_gt(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__a, __b, &__cc); return __cc == 0; @@ -3451,32 +3519,32 @@ vec_all_gt(vector unsigned long long __a, vector unsigned long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector unsigned long long __a, vector bool long long __b) { +vec_all_gt(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs(__a, (vector unsigned long long)__b, &__cc); + __builtin_s390_vchlgs(__a, (__vector unsigned long long)__b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool long long __a, vector unsigned long long __b) { +vec_all_gt(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__a, __b, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__a, __b, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_gt(vector bool long long __a, vector bool long long __b) { +vec_all_gt(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__a, - (vector unsigned long long)__b, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc == 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_gt(vector float __a, vector float __b) { +vec_all_gt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__a, __b, &__cc); return __cc == 0; @@ -3484,7 +3552,7 @@ vec_all_gt(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_all_gt(vector double __a, vector double __b) { +vec_all_gt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__a, __b, &__cc); return __cc == 0; @@ -3493,7 +3561,7 @@ vec_all_gt(vector double __a, vector double __b) { /*-- vec_all_le -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_all_le(vector signed char __a, vector signed char __b) { +vec_all_le(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__a, __b, &__cc); return __cc == 3; @@ -3501,22 +3569,22 @@ vec_all_le(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector signed char __a, vector bool char __b) { +vec_all_le(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchbs(__a, (vector signed char)__b, &__cc); + __builtin_s390_vchbs(__a, (__vector signed char)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool char __a, vector signed char __b) { +vec_all_le(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vchbs((vector signed char)__a, __b, &__cc); + __builtin_s390_vchbs((__vector signed char)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_le(vector unsigned char __a, vector unsigned char __b) { +vec_all_le(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__a, __b, &__cc); return __cc == 3; @@ -3524,31 +3592,31 @@ vec_all_le(vector unsigned char __a, vector unsigned char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector unsigned char __a, vector bool char __b) { +vec_all_le(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs(__a, (vector unsigned char)__b, &__cc); + __builtin_s390_vchlbs(__a, (__vector unsigned char)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool char __a, vector unsigned char __b) { +vec_all_le(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__a, __b, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool char __a, vector bool char __b) { +vec_all_le(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__a, - (vector unsigned char)__b, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_le(vector signed short __a, vector signed short __b) { +vec_all_le(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__a, __b, &__cc); return __cc == 3; @@ -3556,22 +3624,22 @@ vec_all_le(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector signed short __a, vector bool short __b) { +vec_all_le(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchhs(__a, (vector signed short)__b, &__cc); + __builtin_s390_vchhs(__a, (__vector signed short)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool short __a, vector signed short __b) { +vec_all_le(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vchhs((vector signed short)__a, __b, &__cc); + __builtin_s390_vchhs((__vector signed short)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_le(vector unsigned short __a, vector unsigned short __b) { +vec_all_le(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__a, __b, &__cc); return __cc == 3; @@ -3579,31 +3647,31 @@ vec_all_le(vector unsigned short __a, vector unsigned short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector unsigned short __a, vector bool short __b) { +vec_all_le(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs(__a, (vector unsigned short)__b, &__cc); + __builtin_s390_vchlhs(__a, (__vector unsigned short)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool short __a, vector unsigned short __b) { +vec_all_le(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__a, __b, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool short __a, vector bool short __b) { +vec_all_le(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__a, - (vector unsigned short)__b, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_le(vector signed int __a, vector signed int __b) { +vec_all_le(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__a, __b, &__cc); return __cc == 3; @@ -3611,22 +3679,22 @@ vec_all_le(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector signed int __a, vector bool int __b) { +vec_all_le(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchfs(__a, (vector signed int)__b, &__cc); + __builtin_s390_vchfs(__a, (__vector signed int)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool int __a, vector signed int __b) { +vec_all_le(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vchfs((vector signed int)__a, __b, &__cc); + __builtin_s390_vchfs((__vector signed int)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_le(vector unsigned int __a, vector unsigned int __b) { +vec_all_le(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__a, __b, &__cc); return __cc == 3; @@ -3634,31 +3702,31 @@ vec_all_le(vector unsigned int __a, vector unsigned int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector unsigned int __a, vector bool int __b) { +vec_all_le(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs(__a, (vector unsigned int)__b, &__cc); + __builtin_s390_vchlfs(__a, (__vector unsigned int)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool int __a, vector unsigned int __b) { +vec_all_le(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__a, __b, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool int __a, vector bool int __b) { +vec_all_le(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__a, - (vector unsigned int)__b, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_le(vector signed long long __a, vector signed long long __b) { +vec_all_le(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__a, __b, &__cc); return __cc == 3; @@ -3666,22 +3734,22 @@ vec_all_le(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector signed long long __a, vector bool long long __b) { +vec_all_le(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchgs(__a, (vector signed long long)__b, &__cc); + __builtin_s390_vchgs(__a, (__vector signed long long)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool long long __a, vector signed long long __b) { +vec_all_le(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vchgs((vector signed long long)__a, __b, &__cc); + __builtin_s390_vchgs((__vector signed long long)__a, __b, &__cc); return __cc == 3; } static inline __ATTRS_o_ai int -vec_all_le(vector unsigned long long __a, vector unsigned long long __b) { +vec_all_le(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__a, __b, &__cc); return __cc == 3; @@ -3689,32 +3757,32 @@ vec_all_le(vector unsigned long long __a, vector unsigned long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector unsigned long long __a, vector bool long long __b) { +vec_all_le(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs(__a, (vector unsigned long long)__b, &__cc); + __builtin_s390_vchlgs(__a, (__vector unsigned long long)__b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool long long __a, vector unsigned long long __b) { +vec_all_le(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__a, __b, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__a, __b, &__cc); return __cc == 3; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_le(vector bool long long __a, vector bool long long __b) { +vec_all_le(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__a, - (vector unsigned long long)__b, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc == 3; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_le(vector float __a, vector float __b) { +vec_all_le(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__b, __a, &__cc); return __cc == 0; @@ -3722,7 +3790,7 @@ vec_all_le(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_all_le(vector double __a, vector double __b) { +vec_all_le(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__b, __a, &__cc); return __cc == 0; @@ -3731,7 +3799,7 @@ vec_all_le(vector double __a, vector double __b) { /*-- vec_all_lt -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_all_lt(vector signed char __a, vector signed char __b) { +vec_all_lt(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, __a, &__cc); return __cc == 0; @@ -3739,22 +3807,22 @@ vec_all_lt(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector signed char __a, vector bool char __b) { +vec_all_lt(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchbs((vector signed char)__b, __a, &__cc); + __builtin_s390_vchbs((__vector signed char)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool char __a, vector signed char __b) { +vec_all_lt(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vchbs(__b, (vector signed char)__a, &__cc); + __builtin_s390_vchbs(__b, (__vector signed char)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_lt(vector unsigned char __a, vector unsigned char __b) { +vec_all_lt(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, __a, &__cc); return __cc == 0; @@ -3762,31 +3830,31 @@ vec_all_lt(vector unsigned char __a, vector unsigned char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector unsigned char __a, vector bool char __b) { +vec_all_lt(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__b, __a, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool char __a, vector unsigned char __b) { +vec_all_lt(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vchlbs(__b, (vector unsigned char)__a, &__cc); + __builtin_s390_vchlbs(__b, (__vector unsigned char)__a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool char __a, vector bool char __b) { +vec_all_lt(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__b, - (vector unsigned char)__a, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__b, + (__vector unsigned char)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_lt(vector signed short __a, vector signed short __b) { +vec_all_lt(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, __a, &__cc); return __cc == 0; @@ -3794,22 +3862,22 @@ vec_all_lt(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector signed short __a, vector bool short __b) { +vec_all_lt(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchhs((vector signed short)__b, __a, &__cc); + __builtin_s390_vchhs((__vector signed short)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool short __a, vector signed short __b) { +vec_all_lt(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vchhs(__b, (vector signed short)__a, &__cc); + __builtin_s390_vchhs(__b, (__vector signed short)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_lt(vector unsigned short __a, vector unsigned short __b) { +vec_all_lt(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, __a, &__cc); return __cc == 0; @@ -3817,31 +3885,31 @@ vec_all_lt(vector unsigned short __a, vector unsigned short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector unsigned short __a, vector bool short __b) { +vec_all_lt(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__b, __a, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool short __a, vector unsigned short __b) { +vec_all_lt(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vchlhs(__b, (vector unsigned short)__a, &__cc); + __builtin_s390_vchlhs(__b, (__vector unsigned short)__a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool short __a, vector bool short __b) { +vec_all_lt(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__b, - (vector unsigned short)__a, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__b, + (__vector unsigned short)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_lt(vector signed int __a, vector signed int __b) { +vec_all_lt(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, __a, &__cc); return __cc == 0; @@ -3849,22 +3917,22 @@ vec_all_lt(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector signed int __a, vector bool int __b) { +vec_all_lt(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchfs((vector signed int)__b, __a, &__cc); + __builtin_s390_vchfs((__vector signed int)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool int __a, vector signed int __b) { +vec_all_lt(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vchfs(__b, (vector signed int)__a, &__cc); + __builtin_s390_vchfs(__b, (__vector signed int)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_lt(vector unsigned int __a, vector unsigned int __b) { +vec_all_lt(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, __a, &__cc); return __cc == 0; @@ -3872,31 +3940,31 @@ vec_all_lt(vector unsigned int __a, vector unsigned int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector unsigned int __a, vector bool int __b) { +vec_all_lt(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__b, __a, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool int __a, vector unsigned int __b) { +vec_all_lt(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vchlfs(__b, (vector unsigned int)__a, &__cc); + __builtin_s390_vchlfs(__b, (__vector unsigned int)__a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool int __a, vector bool int __b) { +vec_all_lt(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__b, - (vector unsigned int)__a, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__b, + (__vector unsigned int)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_lt(vector signed long long __a, vector signed long long __b) { +vec_all_lt(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, __a, &__cc); return __cc == 0; @@ -3904,22 +3972,22 @@ vec_all_lt(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector signed long long __a, vector bool long long __b) { +vec_all_lt(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchgs((vector signed long long)__b, __a, &__cc); + __builtin_s390_vchgs((__vector signed long long)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool long long __a, vector signed long long __b) { +vec_all_lt(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vchgs(__b, (vector signed long long)__a, &__cc); + __builtin_s390_vchgs(__b, (__vector signed long long)__a, &__cc); return __cc == 0; } static inline __ATTRS_o_ai int -vec_all_lt(vector unsigned long long __a, vector unsigned long long __b) { +vec_all_lt(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, __a, &__cc); return __cc == 0; @@ -3927,32 +3995,32 @@ vec_all_lt(vector unsigned long long __a, vector unsigned long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector unsigned long long __a, vector bool long long __b) { +vec_all_lt(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__b, __a, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__b, __a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool long long __a, vector unsigned long long __b) { +vec_all_lt(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vchlgs(__b, (vector unsigned long long)__a, &__cc); + __builtin_s390_vchlgs(__b, (__vector unsigned long long)__a, &__cc); return __cc == 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_all_lt(vector bool long long __a, vector bool long long __b) { +vec_all_lt(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__b, - (vector unsigned long long)__a, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__b, + (__vector unsigned long long)__a, &__cc); return __cc == 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_lt(vector float __a, vector float __b) { +vec_all_lt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__b, __a, &__cc); return __cc == 0; @@ -3960,7 +4028,7 @@ vec_all_lt(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_all_lt(vector double __a, vector double __b) { +vec_all_lt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__b, __a, &__cc); return __cc == 0; @@ -3970,7 +4038,7 @@ vec_all_lt(vector double __a, vector double __b) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_nge(vector float __a, vector float __b) { +vec_all_nge(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__a, __b, &__cc); return __cc == 3; @@ -3978,7 +4046,7 @@ vec_all_nge(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_all_nge(vector double __a, vector double __b) { +vec_all_nge(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__a, __b, &__cc); return __cc == 3; @@ -3988,7 +4056,7 @@ vec_all_nge(vector double __a, vector double __b) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_ngt(vector float __a, vector float __b) { +vec_all_ngt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__a, __b, &__cc); return __cc == 3; @@ -3996,7 +4064,7 @@ vec_all_ngt(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_all_ngt(vector double __a, vector double __b) { +vec_all_ngt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__a, __b, &__cc); return __cc == 3; @@ -4006,7 +4074,7 @@ vec_all_ngt(vector double __a, vector double __b) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_nle(vector float __a, vector float __b) { +vec_all_nle(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__b, __a, &__cc); return __cc == 3; @@ -4014,7 +4082,7 @@ vec_all_nle(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_all_nle(vector double __a, vector double __b) { +vec_all_nle(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__b, __a, &__cc); return __cc == 3; @@ -4024,7 +4092,7 @@ vec_all_nle(vector double __a, vector double __b) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_nlt(vector float __a, vector float __b) { +vec_all_nlt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__b, __a, &__cc); return __cc == 3; @@ -4032,7 +4100,7 @@ vec_all_nlt(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_all_nlt(vector double __a, vector double __b) { +vec_all_nlt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__b, __a, &__cc); return __cc == 3; @@ -4042,7 +4110,7 @@ vec_all_nlt(vector double __a, vector double __b) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_nan(vector float __a) { +vec_all_nan(__vector float __a) { int __cc; __builtin_s390_vftcisb(__a, 15, &__cc); return __cc == 0; @@ -4050,7 +4118,7 @@ vec_all_nan(vector float __a) { #endif static inline __ATTRS_o_ai int -vec_all_nan(vector double __a) { +vec_all_nan(__vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); return __cc == 0; @@ -4060,7 +4128,7 @@ vec_all_nan(vector double __a) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_all_numeric(vector float __a) { +vec_all_numeric(__vector float __a) { int __cc; __builtin_s390_vftcisb(__a, 15, &__cc); return __cc == 3; @@ -4068,7 +4136,7 @@ vec_all_numeric(vector float __a) { #endif static inline __ATTRS_o_ai int -vec_all_numeric(vector double __a) { +vec_all_numeric(__vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); return __cc == 3; @@ -4077,7 +4145,7 @@ vec_all_numeric(vector double __a) { /*-- vec_any_eq -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_any_eq(vector signed char __a, vector signed char __b) { +vec_any_eq(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs(__a, __b, &__cc); return __cc <= 1; @@ -4085,56 +4153,56 @@ vec_any_eq(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector signed char __a, vector bool char __b) { +vec_any_eq(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs(__a, (vector signed char)__b, &__cc); + __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector bool char __a, vector signed char __b) { +vec_any_eq(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, __b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_eq(vector unsigned char __a, vector unsigned char __b) { +vec_any_eq(__vector unsigned char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector unsigned char __a, vector bool char __b) { +vec_any_eq(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector bool char __a, vector unsigned char __b) { +vec_any_eq(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_eq(vector bool char __a, vector bool char __b) { +vec_any_eq(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_eq(vector signed short __a, vector signed short __b) { +vec_any_eq(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs(__a, __b, &__cc); return __cc <= 1; @@ -4142,56 +4210,56 @@ vec_any_eq(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector signed short __a, vector bool short __b) { +vec_any_eq(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs(__a, (vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector bool short __a, vector signed short __b) { +vec_any_eq(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, __b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_eq(vector unsigned short __a, vector unsigned short __b) { +vec_any_eq(__vector unsigned short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector unsigned short __a, vector bool short __b) { +vec_any_eq(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector bool short __a, vector unsigned short __b) { +vec_any_eq(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_eq(vector bool short __a, vector bool short __b) { +vec_any_eq(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_eq(vector signed int __a, vector signed int __b) { +vec_any_eq(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs(__a, __b, &__cc); return __cc <= 1; @@ -4199,56 +4267,56 @@ vec_any_eq(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector signed int __a, vector bool int __b) { +vec_any_eq(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs(__a, (vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector bool int __a, vector signed int __b) { +vec_any_eq(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, __b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_eq(vector unsigned int __a, vector unsigned int __b) { +vec_any_eq(__vector unsigned int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector unsigned int __a, vector bool int __b) { +vec_any_eq(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector bool int __a, vector unsigned int __b) { +vec_any_eq(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_eq(vector bool int __a, vector bool int __b) { +vec_any_eq(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_eq(vector signed long long __a, vector signed long long __b) { +vec_any_eq(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs(__a, __b, &__cc); return __cc <= 1; @@ -4256,57 +4324,57 @@ vec_any_eq(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector signed long long __a, vector bool long long __b) { +vec_any_eq(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs(__a, (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector bool long long __a, vector signed long long __b) { +vec_any_eq(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, __b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_eq(vector unsigned long long __a, vector unsigned long long __b) { +vec_any_eq(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector unsigned long long __a, vector bool long long __b) { +vec_any_eq(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_eq(vector bool long long __a, vector unsigned long long __b) { +vec_any_eq(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_eq(vector bool long long __a, vector bool long long __b) { +vec_any_eq(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc <= 1; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_eq(vector float __a, vector float __b) { +vec_any_eq(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfcesbs(__a, __b, &__cc); return __cc <= 1; @@ -4314,7 +4382,7 @@ vec_any_eq(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_any_eq(vector double __a, vector double __b) { +vec_any_eq(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfcedbs(__a, __b, &__cc); return __cc <= 1; @@ -4323,7 +4391,7 @@ vec_any_eq(vector double __a, vector double __b) { /*-- vec_any_ne -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_any_ne(vector signed char __a, vector signed char __b) { +vec_any_ne(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vceqbs(__a, __b, &__cc); return __cc != 0; @@ -4331,56 +4399,56 @@ vec_any_ne(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector signed char __a, vector bool char __b) { +vec_any_ne(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs(__a, (vector signed char)__b, &__cc); + __builtin_s390_vceqbs(__a, (__vector signed char)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector bool char __a, vector signed char __b) { +vec_any_ne(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, __b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ne(vector unsigned char __a, vector unsigned char __b) { +vec_any_ne(__vector unsigned char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector unsigned char __a, vector bool char __b) { +vec_any_ne(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector bool char __a, vector unsigned char __b) { +vec_any_ne(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ne(vector bool char __a, vector bool char __b) { +vec_any_ne(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vceqbs((vector signed char)__a, - (vector signed char)__b, &__cc); + __builtin_s390_vceqbs((__vector signed char)__a, + (__vector signed char)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ne(vector signed short __a, vector signed short __b) { +vec_any_ne(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vceqhs(__a, __b, &__cc); return __cc != 0; @@ -4388,56 +4456,56 @@ vec_any_ne(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector signed short __a, vector bool short __b) { +vec_any_ne(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs(__a, (vector signed short)__b, &__cc); + __builtin_s390_vceqhs(__a, (__vector signed short)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector bool short __a, vector signed short __b) { +vec_any_ne(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, __b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ne(vector unsigned short __a, vector unsigned short __b) { +vec_any_ne(__vector unsigned short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector unsigned short __a, vector bool short __b) { +vec_any_ne(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector bool short __a, vector unsigned short __b) { +vec_any_ne(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ne(vector bool short __a, vector bool short __b) { +vec_any_ne(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vceqhs((vector signed short)__a, - (vector signed short)__b, &__cc); + __builtin_s390_vceqhs((__vector signed short)__a, + (__vector signed short)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ne(vector signed int __a, vector signed int __b) { +vec_any_ne(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vceqfs(__a, __b, &__cc); return __cc != 0; @@ -4445,56 +4513,56 @@ vec_any_ne(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector signed int __a, vector bool int __b) { +vec_any_ne(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs(__a, (vector signed int)__b, &__cc); + __builtin_s390_vceqfs(__a, (__vector signed int)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector bool int __a, vector signed int __b) { +vec_any_ne(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, __b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ne(vector unsigned int __a, vector unsigned int __b) { +vec_any_ne(__vector unsigned int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector unsigned int __a, vector bool int __b) { +vec_any_ne(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector bool int __a, vector unsigned int __b) { +vec_any_ne(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ne(vector bool int __a, vector bool int __b) { +vec_any_ne(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vceqfs((vector signed int)__a, - (vector signed int)__b, &__cc); + __builtin_s390_vceqfs((__vector signed int)__a, + (__vector signed int)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ne(vector signed long long __a, vector signed long long __b) { +vec_any_ne(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vceqgs(__a, __b, &__cc); return __cc != 0; @@ -4502,57 +4570,57 @@ vec_any_ne(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector signed long long __a, vector bool long long __b) { +vec_any_ne(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs(__a, (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs(__a, (__vector signed long long)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector bool long long __a, vector signed long long __b) { +vec_any_ne(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, __b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ne(vector unsigned long long __a, vector unsigned long long __b) { +vec_any_ne(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector unsigned long long __a, vector bool long long __b) { +vec_any_ne(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ne(vector bool long long __a, vector unsigned long long __b) { +vec_any_ne(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ne(vector bool long long __a, vector bool long long __b) { +vec_any_ne(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vceqgs((vector signed long long)__a, - (vector signed long long)__b, &__cc); + __builtin_s390_vceqgs((__vector signed long long)__a, + (__vector signed long long)__b, &__cc); return __cc != 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_ne(vector float __a, vector float __b) { +vec_any_ne(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfcesbs(__a, __b, &__cc); return __cc != 0; @@ -4560,7 +4628,7 @@ vec_any_ne(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_any_ne(vector double __a, vector double __b) { +vec_any_ne(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfcedbs(__a, __b, &__cc); return __cc != 0; @@ -4569,7 +4637,7 @@ vec_any_ne(vector double __a, vector double __b) { /*-- vec_any_ge -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_any_ge(vector signed char __a, vector signed char __b) { +vec_any_ge(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, __a, &__cc); return __cc != 0; @@ -4577,22 +4645,22 @@ vec_any_ge(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector signed char __a, vector bool char __b) { +vec_any_ge(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchbs((vector signed char)__b, __a, &__cc); + __builtin_s390_vchbs((__vector signed char)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool char __a, vector signed char __b) { +vec_any_ge(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vchbs(__b, (vector signed char)__a, &__cc); + __builtin_s390_vchbs(__b, (__vector signed char)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ge(vector unsigned char __a, vector unsigned char __b) { +vec_any_ge(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, __a, &__cc); return __cc != 0; @@ -4600,31 +4668,31 @@ vec_any_ge(vector unsigned char __a, vector unsigned char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector unsigned char __a, vector bool char __b) { +vec_any_ge(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__b, __a, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool char __a, vector unsigned char __b) { +vec_any_ge(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vchlbs(__b, (vector unsigned char)__a, &__cc); + __builtin_s390_vchlbs(__b, (__vector unsigned char)__a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool char __a, vector bool char __b) { +vec_any_ge(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__b, - (vector unsigned char)__a, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__b, + (__vector unsigned char)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ge(vector signed short __a, vector signed short __b) { +vec_any_ge(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, __a, &__cc); return __cc != 0; @@ -4632,22 +4700,22 @@ vec_any_ge(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector signed short __a, vector bool short __b) { +vec_any_ge(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchhs((vector signed short)__b, __a, &__cc); + __builtin_s390_vchhs((__vector signed short)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool short __a, vector signed short __b) { +vec_any_ge(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vchhs(__b, (vector signed short)__a, &__cc); + __builtin_s390_vchhs(__b, (__vector signed short)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ge(vector unsigned short __a, vector unsigned short __b) { +vec_any_ge(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, __a, &__cc); return __cc != 0; @@ -4655,31 +4723,31 @@ vec_any_ge(vector unsigned short __a, vector unsigned short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector unsigned short __a, vector bool short __b) { +vec_any_ge(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__b, __a, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool short __a, vector unsigned short __b) { +vec_any_ge(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vchlhs(__b, (vector unsigned short)__a, &__cc); + __builtin_s390_vchlhs(__b, (__vector unsigned short)__a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool short __a, vector bool short __b) { +vec_any_ge(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__b, - (vector unsigned short)__a, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__b, + (__vector unsigned short)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ge(vector signed int __a, vector signed int __b) { +vec_any_ge(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, __a, &__cc); return __cc != 0; @@ -4687,22 +4755,22 @@ vec_any_ge(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector signed int __a, vector bool int __b) { +vec_any_ge(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchfs((vector signed int)__b, __a, &__cc); + __builtin_s390_vchfs((__vector signed int)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool int __a, vector signed int __b) { +vec_any_ge(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vchfs(__b, (vector signed int)__a, &__cc); + __builtin_s390_vchfs(__b, (__vector signed int)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ge(vector unsigned int __a, vector unsigned int __b) { +vec_any_ge(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, __a, &__cc); return __cc != 0; @@ -4710,31 +4778,31 @@ vec_any_ge(vector unsigned int __a, vector unsigned int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector unsigned int __a, vector bool int __b) { +vec_any_ge(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__b, __a, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool int __a, vector unsigned int __b) { +vec_any_ge(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vchlfs(__b, (vector unsigned int)__a, &__cc); + __builtin_s390_vchlfs(__b, (__vector unsigned int)__a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool int __a, vector bool int __b) { +vec_any_ge(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__b, - (vector unsigned int)__a, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__b, + (__vector unsigned int)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ge(vector signed long long __a, vector signed long long __b) { +vec_any_ge(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, __a, &__cc); return __cc != 0; @@ -4742,22 +4810,22 @@ vec_any_ge(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector signed long long __a, vector bool long long __b) { +vec_any_ge(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchgs((vector signed long long)__b, __a, &__cc); + __builtin_s390_vchgs((__vector signed long long)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool long long __a, vector signed long long __b) { +vec_any_ge(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vchgs(__b, (vector signed long long)__a, &__cc); + __builtin_s390_vchgs(__b, (__vector signed long long)__a, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_ge(vector unsigned long long __a, vector unsigned long long __b) { +vec_any_ge(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, __a, &__cc); return __cc != 0; @@ -4765,32 +4833,32 @@ vec_any_ge(vector unsigned long long __a, vector unsigned long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector unsigned long long __a, vector bool long long __b) { +vec_any_ge(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__b, __a, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__b, __a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool long long __a, vector unsigned long long __b) { +vec_any_ge(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vchlgs(__b, (vector unsigned long long)__a, &__cc); + __builtin_s390_vchlgs(__b, (__vector unsigned long long)__a, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_ge(vector bool long long __a, vector bool long long __b) { +vec_any_ge(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__b, - (vector unsigned long long)__a, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__b, + (__vector unsigned long long)__a, &__cc); return __cc != 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_ge(vector float __a, vector float __b) { +vec_any_ge(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__a, __b, &__cc); return __cc <= 1; @@ -4798,7 +4866,7 @@ vec_any_ge(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_any_ge(vector double __a, vector double __b) { +vec_any_ge(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__a, __b, &__cc); return __cc <= 1; @@ -4807,7 +4875,7 @@ vec_any_ge(vector double __a, vector double __b) { /*-- vec_any_gt -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_any_gt(vector signed char __a, vector signed char __b) { +vec_any_gt(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__a, __b, &__cc); return __cc <= 1; @@ -4815,22 +4883,22 @@ vec_any_gt(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector signed char __a, vector bool char __b) { +vec_any_gt(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchbs(__a, (vector signed char)__b, &__cc); + __builtin_s390_vchbs(__a, (__vector signed char)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool char __a, vector signed char __b) { +vec_any_gt(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vchbs((vector signed char)__a, __b, &__cc); + __builtin_s390_vchbs((__vector signed char)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_gt(vector unsigned char __a, vector unsigned char __b) { +vec_any_gt(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__a, __b, &__cc); return __cc <= 1; @@ -4838,31 +4906,31 @@ vec_any_gt(vector unsigned char __a, vector unsigned char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector unsigned char __a, vector bool char __b) { +vec_any_gt(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs(__a, (vector unsigned char)__b, &__cc); + __builtin_s390_vchlbs(__a, (__vector unsigned char)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool char __a, vector unsigned char __b) { +vec_any_gt(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__a, __b, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool char __a, vector bool char __b) { +vec_any_gt(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__a, - (vector unsigned char)__b, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_gt(vector signed short __a, vector signed short __b) { +vec_any_gt(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__a, __b, &__cc); return __cc <= 1; @@ -4870,22 +4938,22 @@ vec_any_gt(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector signed short __a, vector bool short __b) { +vec_any_gt(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchhs(__a, (vector signed short)__b, &__cc); + __builtin_s390_vchhs(__a, (__vector signed short)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool short __a, vector signed short __b) { +vec_any_gt(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vchhs((vector signed short)__a, __b, &__cc); + __builtin_s390_vchhs((__vector signed short)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_gt(vector unsigned short __a, vector unsigned short __b) { +vec_any_gt(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__a, __b, &__cc); return __cc <= 1; @@ -4893,31 +4961,31 @@ vec_any_gt(vector unsigned short __a, vector unsigned short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector unsigned short __a, vector bool short __b) { +vec_any_gt(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs(__a, (vector unsigned short)__b, &__cc); + __builtin_s390_vchlhs(__a, (__vector unsigned short)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool short __a, vector unsigned short __b) { +vec_any_gt(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__a, __b, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool short __a, vector bool short __b) { +vec_any_gt(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__a, - (vector unsigned short)__b, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_gt(vector signed int __a, vector signed int __b) { +vec_any_gt(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__a, __b, &__cc); return __cc <= 1; @@ -4925,22 +4993,22 @@ vec_any_gt(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector signed int __a, vector bool int __b) { +vec_any_gt(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchfs(__a, (vector signed int)__b, &__cc); + __builtin_s390_vchfs(__a, (__vector signed int)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool int __a, vector signed int __b) { +vec_any_gt(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vchfs((vector signed int)__a, __b, &__cc); + __builtin_s390_vchfs((__vector signed int)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_gt(vector unsigned int __a, vector unsigned int __b) { +vec_any_gt(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__a, __b, &__cc); return __cc <= 1; @@ -4948,31 +5016,31 @@ vec_any_gt(vector unsigned int __a, vector unsigned int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector unsigned int __a, vector bool int __b) { +vec_any_gt(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs(__a, (vector unsigned int)__b, &__cc); + __builtin_s390_vchlfs(__a, (__vector unsigned int)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool int __a, vector unsigned int __b) { +vec_any_gt(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__a, __b, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool int __a, vector bool int __b) { +vec_any_gt(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__a, - (vector unsigned int)__b, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_gt(vector signed long long __a, vector signed long long __b) { +vec_any_gt(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__a, __b, &__cc); return __cc <= 1; @@ -4980,22 +5048,22 @@ vec_any_gt(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector signed long long __a, vector bool long long __b) { +vec_any_gt(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchgs(__a, (vector signed long long)__b, &__cc); + __builtin_s390_vchgs(__a, (__vector signed long long)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool long long __a, vector signed long long __b) { +vec_any_gt(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vchgs((vector signed long long)__a, __b, &__cc); + __builtin_s390_vchgs((__vector signed long long)__a, __b, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_gt(vector unsigned long long __a, vector unsigned long long __b) { +vec_any_gt(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__a, __b, &__cc); return __cc <= 1; @@ -5003,32 +5071,32 @@ vec_any_gt(vector unsigned long long __a, vector unsigned long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector unsigned long long __a, vector bool long long __b) { +vec_any_gt(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs(__a, (vector unsigned long long)__b, &__cc); + __builtin_s390_vchlgs(__a, (__vector unsigned long long)__b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool long long __a, vector unsigned long long __b) { +vec_any_gt(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__a, __b, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__a, __b, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_gt(vector bool long long __a, vector bool long long __b) { +vec_any_gt(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__a, - (vector unsigned long long)__b, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc <= 1; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_gt(vector float __a, vector float __b) { +vec_any_gt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__a, __b, &__cc); return __cc <= 1; @@ -5036,7 +5104,7 @@ vec_any_gt(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_any_gt(vector double __a, vector double __b) { +vec_any_gt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__a, __b, &__cc); return __cc <= 1; @@ -5045,7 +5113,7 @@ vec_any_gt(vector double __a, vector double __b) { /*-- vec_any_le -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_any_le(vector signed char __a, vector signed char __b) { +vec_any_le(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__a, __b, &__cc); return __cc != 0; @@ -5053,22 +5121,22 @@ vec_any_le(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector signed char __a, vector bool char __b) { +vec_any_le(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchbs(__a, (vector signed char)__b, &__cc); + __builtin_s390_vchbs(__a, (__vector signed char)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool char __a, vector signed char __b) { +vec_any_le(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vchbs((vector signed char)__a, __b, &__cc); + __builtin_s390_vchbs((__vector signed char)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_le(vector unsigned char __a, vector unsigned char __b) { +vec_any_le(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__a, __b, &__cc); return __cc != 0; @@ -5076,31 +5144,31 @@ vec_any_le(vector unsigned char __a, vector unsigned char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector unsigned char __a, vector bool char __b) { +vec_any_le(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs(__a, (vector unsigned char)__b, &__cc); + __builtin_s390_vchlbs(__a, (__vector unsigned char)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool char __a, vector unsigned char __b) { +vec_any_le(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__a, __b, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool char __a, vector bool char __b) { +vec_any_le(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__a, - (vector unsigned char)__b, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__a, + (__vector unsigned char)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_le(vector signed short __a, vector signed short __b) { +vec_any_le(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__a, __b, &__cc); return __cc != 0; @@ -5108,22 +5176,22 @@ vec_any_le(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector signed short __a, vector bool short __b) { +vec_any_le(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchhs(__a, (vector signed short)__b, &__cc); + __builtin_s390_vchhs(__a, (__vector signed short)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool short __a, vector signed short __b) { +vec_any_le(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vchhs((vector signed short)__a, __b, &__cc); + __builtin_s390_vchhs((__vector signed short)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_le(vector unsigned short __a, vector unsigned short __b) { +vec_any_le(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__a, __b, &__cc); return __cc != 0; @@ -5131,31 +5199,31 @@ vec_any_le(vector unsigned short __a, vector unsigned short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector unsigned short __a, vector bool short __b) { +vec_any_le(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs(__a, (vector unsigned short)__b, &__cc); + __builtin_s390_vchlhs(__a, (__vector unsigned short)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool short __a, vector unsigned short __b) { +vec_any_le(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__a, __b, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool short __a, vector bool short __b) { +vec_any_le(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__a, - (vector unsigned short)__b, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__a, + (__vector unsigned short)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_le(vector signed int __a, vector signed int __b) { +vec_any_le(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__a, __b, &__cc); return __cc != 0; @@ -5163,22 +5231,22 @@ vec_any_le(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector signed int __a, vector bool int __b) { +vec_any_le(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchfs(__a, (vector signed int)__b, &__cc); + __builtin_s390_vchfs(__a, (__vector signed int)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool int __a, vector signed int __b) { +vec_any_le(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vchfs((vector signed int)__a, __b, &__cc); + __builtin_s390_vchfs((__vector signed int)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_le(vector unsigned int __a, vector unsigned int __b) { +vec_any_le(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__a, __b, &__cc); return __cc != 0; @@ -5186,31 +5254,31 @@ vec_any_le(vector unsigned int __a, vector unsigned int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector unsigned int __a, vector bool int __b) { +vec_any_le(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs(__a, (vector unsigned int)__b, &__cc); + __builtin_s390_vchlfs(__a, (__vector unsigned int)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool int __a, vector unsigned int __b) { +vec_any_le(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__a, __b, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool int __a, vector bool int __b) { +vec_any_le(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__a, - (vector unsigned int)__b, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__a, + (__vector unsigned int)__b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_le(vector signed long long __a, vector signed long long __b) { +vec_any_le(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__a, __b, &__cc); return __cc != 0; @@ -5218,22 +5286,22 @@ vec_any_le(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector signed long long __a, vector bool long long __b) { +vec_any_le(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchgs(__a, (vector signed long long)__b, &__cc); + __builtin_s390_vchgs(__a, (__vector signed long long)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool long long __a, vector signed long long __b) { +vec_any_le(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vchgs((vector signed long long)__a, __b, &__cc); + __builtin_s390_vchgs((__vector signed long long)__a, __b, &__cc); return __cc != 0; } static inline __ATTRS_o_ai int -vec_any_le(vector unsigned long long __a, vector unsigned long long __b) { +vec_any_le(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__a, __b, &__cc); return __cc != 0; @@ -5241,32 +5309,32 @@ vec_any_le(vector unsigned long long __a, vector unsigned long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector unsigned long long __a, vector bool long long __b) { +vec_any_le(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs(__a, (vector unsigned long long)__b, &__cc); + __builtin_s390_vchlgs(__a, (__vector unsigned long long)__b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool long long __a, vector unsigned long long __b) { +vec_any_le(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__a, __b, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__a, __b, &__cc); return __cc != 0; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_le(vector bool long long __a, vector bool long long __b) { +vec_any_le(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__a, - (vector unsigned long long)__b, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__a, + (__vector unsigned long long)__b, &__cc); return __cc != 0; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_le(vector float __a, vector float __b) { +vec_any_le(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__b, __a, &__cc); return __cc <= 1; @@ -5274,7 +5342,7 @@ vec_any_le(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_any_le(vector double __a, vector double __b) { +vec_any_le(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__b, __a, &__cc); return __cc <= 1; @@ -5283,7 +5351,7 @@ vec_any_le(vector double __a, vector double __b) { /*-- vec_any_lt -------------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_any_lt(vector signed char __a, vector signed char __b) { +vec_any_lt(__vector signed char __a, __vector signed char __b) { int __cc; __builtin_s390_vchbs(__b, __a, &__cc); return __cc <= 1; @@ -5291,22 +5359,22 @@ vec_any_lt(vector signed char __a, vector signed char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector signed char __a, vector bool char __b) { +vec_any_lt(__vector signed char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchbs((vector signed char)__b, __a, &__cc); + __builtin_s390_vchbs((__vector signed char)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool char __a, vector signed char __b) { +vec_any_lt(__vector __bool char __a, __vector signed char __b) { int __cc; - __builtin_s390_vchbs(__b, (vector signed char)__a, &__cc); + __builtin_s390_vchbs(__b, (__vector signed char)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_lt(vector unsigned char __a, vector unsigned char __b) { +vec_any_lt(__vector unsigned char __a, __vector unsigned char __b) { int __cc; __builtin_s390_vchlbs(__b, __a, &__cc); return __cc <= 1; @@ -5314,31 +5382,31 @@ vec_any_lt(vector unsigned char __a, vector unsigned char __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector unsigned char __a, vector bool char __b) { +vec_any_lt(__vector unsigned char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__b, __a, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool char __a, vector unsigned char __b) { +vec_any_lt(__vector __bool char __a, __vector unsigned char __b) { int __cc; - __builtin_s390_vchlbs(__b, (vector unsigned char)__a, &__cc); + __builtin_s390_vchlbs(__b, (__vector unsigned char)__a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool char __a, vector bool char __b) { +vec_any_lt(__vector __bool char __a, __vector __bool char __b) { int __cc; - __builtin_s390_vchlbs((vector unsigned char)__b, - (vector unsigned char)__a, &__cc); + __builtin_s390_vchlbs((__vector unsigned char)__b, + (__vector unsigned char)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_lt(vector signed short __a, vector signed short __b) { +vec_any_lt(__vector signed short __a, __vector signed short __b) { int __cc; __builtin_s390_vchhs(__b, __a, &__cc); return __cc <= 1; @@ -5346,22 +5414,22 @@ vec_any_lt(vector signed short __a, vector signed short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector signed short __a, vector bool short __b) { +vec_any_lt(__vector signed short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchhs((vector signed short)__b, __a, &__cc); + __builtin_s390_vchhs((__vector signed short)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool short __a, vector signed short __b) { +vec_any_lt(__vector __bool short __a, __vector signed short __b) { int __cc; - __builtin_s390_vchhs(__b, (vector signed short)__a, &__cc); + __builtin_s390_vchhs(__b, (__vector signed short)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_lt(vector unsigned short __a, vector unsigned short __b) { +vec_any_lt(__vector unsigned short __a, __vector unsigned short __b) { int __cc; __builtin_s390_vchlhs(__b, __a, &__cc); return __cc <= 1; @@ -5369,31 +5437,31 @@ vec_any_lt(vector unsigned short __a, vector unsigned short __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector unsigned short __a, vector bool short __b) { +vec_any_lt(__vector unsigned short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__b, __a, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool short __a, vector unsigned short __b) { +vec_any_lt(__vector __bool short __a, __vector unsigned short __b) { int __cc; - __builtin_s390_vchlhs(__b, (vector unsigned short)__a, &__cc); + __builtin_s390_vchlhs(__b, (__vector unsigned short)__a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool short __a, vector bool short __b) { +vec_any_lt(__vector __bool short __a, __vector __bool short __b) { int __cc; - __builtin_s390_vchlhs((vector unsigned short)__b, - (vector unsigned short)__a, &__cc); + __builtin_s390_vchlhs((__vector unsigned short)__b, + (__vector unsigned short)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_lt(vector signed int __a, vector signed int __b) { +vec_any_lt(__vector signed int __a, __vector signed int __b) { int __cc; __builtin_s390_vchfs(__b, __a, &__cc); return __cc <= 1; @@ -5401,22 +5469,22 @@ vec_any_lt(vector signed int __a, vector signed int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector signed int __a, vector bool int __b) { +vec_any_lt(__vector signed int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchfs((vector signed int)__b, __a, &__cc); + __builtin_s390_vchfs((__vector signed int)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool int __a, vector signed int __b) { +vec_any_lt(__vector __bool int __a, __vector signed int __b) { int __cc; - __builtin_s390_vchfs(__b, (vector signed int)__a, &__cc); + __builtin_s390_vchfs(__b, (__vector signed int)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_lt(vector unsigned int __a, vector unsigned int __b) { +vec_any_lt(__vector unsigned int __a, __vector unsigned int __b) { int __cc; __builtin_s390_vchlfs(__b, __a, &__cc); return __cc <= 1; @@ -5424,31 +5492,31 @@ vec_any_lt(vector unsigned int __a, vector unsigned int __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector unsigned int __a, vector bool int __b) { +vec_any_lt(__vector unsigned int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__b, __a, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool int __a, vector unsigned int __b) { +vec_any_lt(__vector __bool int __a, __vector unsigned int __b) { int __cc; - __builtin_s390_vchlfs(__b, (vector unsigned int)__a, &__cc); + __builtin_s390_vchlfs(__b, (__vector unsigned int)__a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool int __a, vector bool int __b) { +vec_any_lt(__vector __bool int __a, __vector __bool int __b) { int __cc; - __builtin_s390_vchlfs((vector unsigned int)__b, - (vector unsigned int)__a, &__cc); + __builtin_s390_vchlfs((__vector unsigned int)__b, + (__vector unsigned int)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_lt(vector signed long long __a, vector signed long long __b) { +vec_any_lt(__vector signed long long __a, __vector signed long long __b) { int __cc; __builtin_s390_vchgs(__b, __a, &__cc); return __cc <= 1; @@ -5456,22 +5524,22 @@ vec_any_lt(vector signed long long __a, vector signed long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector signed long long __a, vector bool long long __b) { +vec_any_lt(__vector signed long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchgs((vector signed long long)__b, __a, &__cc); + __builtin_s390_vchgs((__vector signed long long)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool long long __a, vector signed long long __b) { +vec_any_lt(__vector __bool long long __a, __vector signed long long __b) { int __cc; - __builtin_s390_vchgs(__b, (vector signed long long)__a, &__cc); + __builtin_s390_vchgs(__b, (__vector signed long long)__a, &__cc); return __cc <= 1; } static inline __ATTRS_o_ai int -vec_any_lt(vector unsigned long long __a, vector unsigned long long __b) { +vec_any_lt(__vector unsigned long long __a, __vector unsigned long long __b) { int __cc; __builtin_s390_vchlgs(__b, __a, &__cc); return __cc <= 1; @@ -5479,32 +5547,32 @@ vec_any_lt(vector unsigned long long __a, vector unsigned long long __b) { // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector unsigned long long __a, vector bool long long __b) { +vec_any_lt(__vector unsigned long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__b, __a, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__b, __a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool long long __a, vector unsigned long long __b) { +vec_any_lt(__vector __bool long long __a, __vector unsigned long long __b) { int __cc; - __builtin_s390_vchlgs(__b, (vector unsigned long long)__a, &__cc); + __builtin_s390_vchlgs(__b, (__vector unsigned long long)__a, &__cc); return __cc <= 1; } // This prototype is deprecated. static inline __ATTRS_o_ai int -vec_any_lt(vector bool long long __a, vector bool long long __b) { +vec_any_lt(__vector __bool long long __a, __vector __bool long long __b) { int __cc; - __builtin_s390_vchlgs((vector unsigned long long)__b, - (vector unsigned long long)__a, &__cc); + __builtin_s390_vchlgs((__vector unsigned long long)__b, + (__vector unsigned long long)__a, &__cc); return __cc <= 1; } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_lt(vector float __a, vector float __b) { +vec_any_lt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__b, __a, &__cc); return __cc <= 1; @@ -5512,7 +5580,7 @@ vec_any_lt(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_any_lt(vector double __a, vector double __b) { +vec_any_lt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__b, __a, &__cc); return __cc <= 1; @@ -5522,7 +5590,7 @@ vec_any_lt(vector double __a, vector double __b) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_nge(vector float __a, vector float __b) { +vec_any_nge(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__a, __b, &__cc); return __cc != 0; @@ -5530,7 +5598,7 @@ vec_any_nge(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_any_nge(vector double __a, vector double __b) { +vec_any_nge(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__a, __b, &__cc); return __cc != 0; @@ -5540,7 +5608,7 @@ vec_any_nge(vector double __a, vector double __b) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_ngt(vector float __a, vector float __b) { +vec_any_ngt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__a, __b, &__cc); return __cc != 0; @@ -5548,7 +5616,7 @@ vec_any_ngt(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_any_ngt(vector double __a, vector double __b) { +vec_any_ngt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__a, __b, &__cc); return __cc != 0; @@ -5558,7 +5626,7 @@ vec_any_ngt(vector double __a, vector double __b) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_nle(vector float __a, vector float __b) { +vec_any_nle(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchesbs(__b, __a, &__cc); return __cc != 0; @@ -5566,7 +5634,7 @@ vec_any_nle(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_any_nle(vector double __a, vector double __b) { +vec_any_nle(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchedbs(__b, __a, &__cc); return __cc != 0; @@ -5576,7 +5644,7 @@ vec_any_nle(vector double __a, vector double __b) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_nlt(vector float __a, vector float __b) { +vec_any_nlt(__vector float __a, __vector float __b) { int __cc; __builtin_s390_vfchsbs(__b, __a, &__cc); return __cc != 0; @@ -5584,7 +5652,7 @@ vec_any_nlt(vector float __a, vector float __b) { #endif static inline __ATTRS_o_ai int -vec_any_nlt(vector double __a, vector double __b) { +vec_any_nlt(__vector double __a, __vector double __b) { int __cc; __builtin_s390_vfchdbs(__b, __a, &__cc); return __cc != 0; @@ -5594,7 +5662,7 @@ vec_any_nlt(vector double __a, vector double __b) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_nan(vector float __a) { +vec_any_nan(__vector float __a) { int __cc; __builtin_s390_vftcisb(__a, 15, &__cc); return __cc != 3; @@ -5602,7 +5670,7 @@ vec_any_nan(vector float __a) { #endif static inline __ATTRS_o_ai int -vec_any_nan(vector double __a) { +vec_any_nan(__vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); return __cc != 3; @@ -5612,7 +5680,7 @@ vec_any_nan(vector double __a) { #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_any_numeric(vector float __a) { +vec_any_numeric(__vector float __a) { int __cc; __builtin_s390_vftcisb(__a, 15, &__cc); return __cc != 0; @@ -5620,7 +5688,7 @@ vec_any_numeric(vector float __a) { #endif static inline __ATTRS_o_ai int -vec_any_numeric(vector double __a) { +vec_any_numeric(__vector double __a) { int __cc; __builtin_s390_vftcidb(__a, 15, &__cc); return __cc != 0; @@ -5628,2389 +5696,2393 @@ vec_any_numeric(vector double __a) { /*-- vec_andc ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_andc(vector bool char __a, vector bool char __b) { +static inline __ATTRS_o_ai __vector __bool char +vec_andc(__vector __bool char __a, __vector __bool char __b) { return __a & ~__b; } -static inline __ATTRS_o_ai vector signed char -vec_andc(vector signed char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_andc(__vector signed char __a, __vector signed char __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_andc(vector bool char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_andc(__vector __bool char __a, __vector signed char __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_andc(vector signed char __a, vector bool char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_andc(__vector signed char __a, __vector __bool char __b) { return __a & ~__b; } -static inline __ATTRS_o_ai vector unsigned char -vec_andc(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_andc(__vector unsigned char __a, __vector unsigned char __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_andc(vector bool char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_andc(__vector __bool char __a, __vector unsigned char __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_andc(vector unsigned char __a, vector bool char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_andc(__vector unsigned char __a, __vector __bool char __b) { return __a & ~__b; } -static inline __ATTRS_o_ai vector bool short -vec_andc(vector bool short __a, vector bool short __b) { +static inline __ATTRS_o_ai __vector __bool short +vec_andc(__vector __bool short __a, __vector __bool short __b) { return __a & ~__b; } -static inline __ATTRS_o_ai vector signed short -vec_andc(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_andc(__vector signed short __a, __vector signed short __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_andc(vector bool short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_andc(__vector __bool short __a, __vector signed short __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_andc(vector signed short __a, vector bool short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_andc(__vector signed short __a, __vector __bool short __b) { return __a & ~__b; } -static inline __ATTRS_o_ai vector unsigned short -vec_andc(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_andc(__vector unsigned short __a, __vector unsigned short __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_andc(vector bool short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_andc(__vector __bool short __a, __vector unsigned short __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_andc(vector unsigned short __a, vector bool short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_andc(__vector unsigned short __a, __vector __bool short __b) { return __a & ~__b; } -static inline __ATTRS_o_ai vector bool int -vec_andc(vector bool int __a, vector bool int __b) { +static inline __ATTRS_o_ai __vector __bool int +vec_andc(__vector __bool int __a, __vector __bool int __b) { return __a & ~__b; } -static inline __ATTRS_o_ai vector signed int -vec_andc(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_andc(__vector signed int __a, __vector signed int __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_andc(vector bool int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_andc(__vector __bool int __a, __vector signed int __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_andc(vector signed int __a, vector bool int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_andc(__vector signed int __a, __vector __bool int __b) { return __a & ~__b; } -static inline __ATTRS_o_ai vector unsigned int -vec_andc(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_andc(__vector unsigned int __a, __vector unsigned int __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_andc(vector bool int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_andc(__vector __bool int __a, __vector unsigned int __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_andc(vector unsigned int __a, vector bool int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_andc(__vector unsigned int __a, __vector __bool int __b) { return __a & ~__b; } -static inline __ATTRS_o_ai vector bool long long -vec_andc(vector bool long long __a, vector bool long long __b) { +static inline __ATTRS_o_ai __vector __bool long long +vec_andc(__vector __bool long long __a, __vector __bool long long __b) { return __a & ~__b; } -static inline __ATTRS_o_ai vector signed long long -vec_andc(vector signed long long __a, vector signed long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_andc(__vector signed long long __a, __vector signed long long __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_andc(vector bool long long __a, vector signed long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_andc(__vector __bool long long __a, __vector signed long long __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_andc(vector signed long long __a, vector bool long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_andc(__vector signed long long __a, __vector __bool long long __b) { return __a & ~__b; } -static inline __ATTRS_o_ai vector unsigned long long -vec_andc(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_andc(__vector unsigned long long __a, __vector unsigned long long __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_andc(vector bool long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_andc(__vector __bool long long __a, __vector unsigned long long __b) { return __a & ~__b; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_andc(vector unsigned long long __a, vector bool long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_andc(__vector unsigned long long __a, __vector __bool long long __b) { return __a & ~__b; } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_andc(vector float __a, vector float __b) { - return (vector float)((vector unsigned int)__a & - ~(vector unsigned int)__b); +static inline __ATTRS_o_ai __vector float +vec_andc(__vector float __a, __vector float __b) { + return (__vector float)((__vector unsigned int)__a & + ~(__vector unsigned int)__b); } #endif -static inline __ATTRS_o_ai vector double -vec_andc(vector double __a, vector double __b) { - return (vector double)((vector unsigned long long)__a & - ~(vector unsigned long long)__b); +static inline __ATTRS_o_ai __vector double +vec_andc(__vector double __a, __vector double __b) { + return (__vector double)((__vector unsigned long long)__a & + ~(__vector unsigned long long)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector double -vec_andc(vector bool long long __a, vector double __b) { - return (vector double)((vector unsigned long long)__a & - ~(vector unsigned long long)__b); +static inline __ATTRS_o_ai __vector double +vec_andc(__vector __bool long long __a, __vector double __b) { + return (__vector double)((__vector unsigned long long)__a & + ~(__vector unsigned long long)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector double -vec_andc(vector double __a, vector bool long long __b) { - return (vector double)((vector unsigned long long)__a & - ~(vector unsigned long long)__b); +static inline __ATTRS_o_ai __vector double +vec_andc(__vector double __a, __vector __bool long long __b) { + return (__vector double)((__vector unsigned long long)__a & + ~(__vector unsigned long long)__b); } /*-- vec_nor ----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_nor(vector bool char __a, vector bool char __b) { +static inline __ATTRS_o_ai __vector __bool char +vec_nor(__vector __bool char __a, __vector __bool char __b) { return ~(__a | __b); } -static inline __ATTRS_o_ai vector signed char -vec_nor(vector signed char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_nor(__vector signed char __a, __vector signed char __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_nor(vector bool char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_nor(__vector __bool char __a, __vector signed char __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_nor(vector signed char __a, vector bool char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_nor(__vector signed char __a, __vector __bool char __b) { return ~(__a | __b); } -static inline __ATTRS_o_ai vector unsigned char -vec_nor(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_nor(__vector unsigned char __a, __vector unsigned char __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_nor(vector bool char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_nor(__vector __bool char __a, __vector unsigned char __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_nor(vector unsigned char __a, vector bool char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_nor(__vector unsigned char __a, __vector __bool char __b) { return ~(__a | __b); } -static inline __ATTRS_o_ai vector bool short -vec_nor(vector bool short __a, vector bool short __b) { +static inline __ATTRS_o_ai __vector __bool short +vec_nor(__vector __bool short __a, __vector __bool short __b) { return ~(__a | __b); } -static inline __ATTRS_o_ai vector signed short -vec_nor(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_nor(__vector signed short __a, __vector signed short __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_nor(vector bool short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_nor(__vector __bool short __a, __vector signed short __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_nor(vector signed short __a, vector bool short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_nor(__vector signed short __a, __vector __bool short __b) { return ~(__a | __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_nor(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_nor(__vector unsigned short __a, __vector unsigned short __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_nor(vector bool short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_nor(__vector __bool short __a, __vector unsigned short __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_nor(vector unsigned short __a, vector bool short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_nor(__vector unsigned short __a, __vector __bool short __b) { return ~(__a | __b); } -static inline __ATTRS_o_ai vector bool int -vec_nor(vector bool int __a, vector bool int __b) { +static inline __ATTRS_o_ai __vector __bool int +vec_nor(__vector __bool int __a, __vector __bool int __b) { return ~(__a | __b); } -static inline __ATTRS_o_ai vector signed int -vec_nor(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_nor(__vector signed int __a, __vector signed int __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_nor(vector bool int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_nor(__vector __bool int __a, __vector signed int __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_nor(vector signed int __a, vector bool int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_nor(__vector signed int __a, __vector __bool int __b) { return ~(__a | __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_nor(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_nor(__vector unsigned int __a, __vector unsigned int __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_nor(vector bool int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_nor(__vector __bool int __a, __vector unsigned int __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_nor(vector unsigned int __a, vector bool int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_nor(__vector unsigned int __a, __vector __bool int __b) { return ~(__a | __b); } -static inline __ATTRS_o_ai vector bool long long -vec_nor(vector bool long long __a, vector bool long long __b) { +static inline __ATTRS_o_ai __vector __bool long long +vec_nor(__vector __bool long long __a, __vector __bool long long __b) { return ~(__a | __b); } -static inline __ATTRS_o_ai vector signed long long -vec_nor(vector signed long long __a, vector signed long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_nor(__vector signed long long __a, __vector signed long long __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_nor(vector bool long long __a, vector signed long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_nor(__vector __bool long long __a, __vector signed long long __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_nor(vector signed long long __a, vector bool long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_nor(__vector signed long long __a, __vector __bool long long __b) { return ~(__a | __b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_nor(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_nor(__vector unsigned long long __a, __vector unsigned long long __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_nor(vector bool long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_nor(__vector __bool long long __a, __vector unsigned long long __b) { return ~(__a | __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_nor(vector unsigned long long __a, vector bool long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_nor(__vector unsigned long long __a, __vector __bool long long __b) { return ~(__a | __b); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_nor(vector float __a, vector float __b) { - return (vector float)~((vector unsigned int)__a | - (vector unsigned int)__b); +static inline __ATTRS_o_ai __vector float +vec_nor(__vector float __a, __vector float __b) { + return (__vector float)~((__vector unsigned int)__a | + (__vector unsigned int)__b); } #endif -static inline __ATTRS_o_ai vector double -vec_nor(vector double __a, vector double __b) { - return (vector double)~((vector unsigned long long)__a | - (vector unsigned long long)__b); +static inline __ATTRS_o_ai __vector double +vec_nor(__vector double __a, __vector double __b) { + return (__vector double)~((__vector unsigned long long)__a | + (__vector unsigned long long)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector double -vec_nor(vector bool long long __a, vector double __b) { - return (vector double)~((vector unsigned long long)__a | - (vector unsigned long long)__b); +static inline __ATTRS_o_ai __vector double +vec_nor(__vector __bool long long __a, __vector double __b) { + return (__vector double)~((__vector unsigned long long)__a | + (__vector unsigned long long)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector double -vec_nor(vector double __a, vector bool long long __b) { - return (vector double)~((vector unsigned long long)__a | - (vector unsigned long long)__b); +static inline __ATTRS_o_ai __vector double +vec_nor(__vector double __a, __vector __bool long long __b) { + return (__vector double)~((__vector unsigned long long)__a | + (__vector unsigned long long)__b); } /*-- vec_orc ----------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector bool char -vec_orc(vector bool char __a, vector bool char __b) { +static inline __ATTRS_o_ai __vector __bool char +vec_orc(__vector __bool char __a, __vector __bool char __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector signed char -vec_orc(vector signed char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_orc(__vector signed char __a, __vector signed char __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector unsigned char -vec_orc(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_orc(__vector unsigned char __a, __vector unsigned char __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector bool short -vec_orc(vector bool short __a, vector bool short __b) { +static inline __ATTRS_o_ai __vector __bool short +vec_orc(__vector __bool short __a, __vector __bool short __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector signed short -vec_orc(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_orc(__vector signed short __a, __vector signed short __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector unsigned short -vec_orc(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_orc(__vector unsigned short __a, __vector unsigned short __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector bool int -vec_orc(vector bool int __a, vector bool int __b) { +static inline __ATTRS_o_ai __vector __bool int +vec_orc(__vector __bool int __a, __vector __bool int __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector signed int -vec_orc(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_orc(__vector signed int __a, __vector signed int __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector unsigned int -vec_orc(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_orc(__vector unsigned int __a, __vector unsigned int __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector bool long long -vec_orc(vector bool long long __a, vector bool long long __b) { +static inline __ATTRS_o_ai __vector __bool long long +vec_orc(__vector __bool long long __a, __vector __bool long long __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector signed long long -vec_orc(vector signed long long __a, vector signed long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_orc(__vector signed long long __a, __vector signed long long __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector unsigned long long -vec_orc(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_orc(__vector unsigned long long __a, __vector unsigned long long __b) { return __a | ~__b; } -static inline __ATTRS_o_ai vector float -vec_orc(vector float __a, vector float __b) { - return (vector float)((vector unsigned int)__a | - ~(vector unsigned int)__b); +static inline __ATTRS_o_ai __vector float +vec_orc(__vector float __a, __vector float __b) { + return (__vector float)((__vector unsigned int)__a | + ~(__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector double -vec_orc(vector double __a, vector double __b) { - return (vector double)((vector unsigned long long)__a | - ~(vector unsigned long long)__b); +static inline __ATTRS_o_ai __vector double +vec_orc(__vector double __a, __vector double __b) { + return (__vector double)((__vector unsigned long long)__a | + ~(__vector unsigned long long)__b); } #endif /*-- vec_nand ---------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector bool char -vec_nand(vector bool char __a, vector bool char __b) { +static inline __ATTRS_o_ai __vector __bool char +vec_nand(__vector __bool char __a, __vector __bool char __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector signed char -vec_nand(vector signed char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_nand(__vector signed char __a, __vector signed char __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector unsigned char -vec_nand(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_nand(__vector unsigned char __a, __vector unsigned char __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector bool short -vec_nand(vector bool short __a, vector bool short __b) { +static inline __ATTRS_o_ai __vector __bool short +vec_nand(__vector __bool short __a, __vector __bool short __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector signed short -vec_nand(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_nand(__vector signed short __a, __vector signed short __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_nand(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_nand(__vector unsigned short __a, __vector unsigned short __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector bool int -vec_nand(vector bool int __a, vector bool int __b) { +static inline __ATTRS_o_ai __vector __bool int +vec_nand(__vector __bool int __a, __vector __bool int __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector signed int -vec_nand(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_nand(__vector signed int __a, __vector signed int __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_nand(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_nand(__vector unsigned int __a, __vector unsigned int __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector bool long long -vec_nand(vector bool long long __a, vector bool long long __b) { +static inline __ATTRS_o_ai __vector __bool long long +vec_nand(__vector __bool long long __a, __vector __bool long long __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector signed long long -vec_nand(vector signed long long __a, vector signed long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_nand(__vector signed long long __a, __vector signed long long __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_nand(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_nand(__vector unsigned long long __a, __vector unsigned long long __b) { return ~(__a & __b); } -static inline __ATTRS_o_ai vector float -vec_nand(vector float __a, vector float __b) { - return (vector float)~((vector unsigned int)__a & - (vector unsigned int)__b); +static inline __ATTRS_o_ai __vector float +vec_nand(__vector float __a, __vector float __b) { + return (__vector float)~((__vector unsigned int)__a & + (__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector double -vec_nand(vector double __a, vector double __b) { - return (vector double)~((vector unsigned long long)__a & - (vector unsigned long long)__b); +static inline __ATTRS_o_ai __vector double +vec_nand(__vector double __a, __vector double __b) { + return (__vector double)~((__vector unsigned long long)__a & + (__vector unsigned long long)__b); } #endif /*-- vec_eqv ----------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector bool char -vec_eqv(vector bool char __a, vector bool char __b) { +static inline __ATTRS_o_ai __vector __bool char +vec_eqv(__vector __bool char __a, __vector __bool char __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector signed char -vec_eqv(vector signed char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_eqv(__vector signed char __a, __vector signed char __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector unsigned char -vec_eqv(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_eqv(__vector unsigned char __a, __vector unsigned char __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector bool short -vec_eqv(vector bool short __a, vector bool short __b) { +static inline __ATTRS_o_ai __vector __bool short +vec_eqv(__vector __bool short __a, __vector __bool short __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector signed short -vec_eqv(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_eqv(__vector signed short __a, __vector signed short __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_eqv(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_eqv(__vector unsigned short __a, __vector unsigned short __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector bool int -vec_eqv(vector bool int __a, vector bool int __b) { +static inline __ATTRS_o_ai __vector __bool int +vec_eqv(__vector __bool int __a, __vector __bool int __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector signed int -vec_eqv(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_eqv(__vector signed int __a, __vector signed int __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_eqv(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_eqv(__vector unsigned int __a, __vector unsigned int __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector bool long long -vec_eqv(vector bool long long __a, vector bool long long __b) { +static inline __ATTRS_o_ai __vector __bool long long +vec_eqv(__vector __bool long long __a, __vector __bool long long __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector signed long long -vec_eqv(vector signed long long __a, vector signed long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_eqv(__vector signed long long __a, __vector signed long long __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_eqv(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_eqv(__vector unsigned long long __a, __vector unsigned long long __b) { return ~(__a ^ __b); } -static inline __ATTRS_o_ai vector float -vec_eqv(vector float __a, vector float __b) { - return (vector float)~((vector unsigned int)__a ^ - (vector unsigned int)__b); +static inline __ATTRS_o_ai __vector float +vec_eqv(__vector float __a, __vector float __b) { + return (__vector float)~((__vector unsigned int)__a ^ + (__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector double -vec_eqv(vector double __a, vector double __b) { - return (vector double)~((vector unsigned long long)__a ^ - (vector unsigned long long)__b); +static inline __ATTRS_o_ai __vector double +vec_eqv(__vector double __a, __vector double __b) { + return (__vector double)~((__vector unsigned long long)__a ^ + (__vector unsigned long long)__b); } #endif /*-- vec_cntlz --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_cntlz(vector signed char __a) { - return __builtin_s390_vclzb((vector unsigned char)__a); +static inline __ATTRS_o_ai __vector unsigned char +vec_cntlz(__vector signed char __a) { + return __builtin_s390_vclzb((__vector unsigned char)__a); } -static inline __ATTRS_o_ai vector unsigned char -vec_cntlz(vector unsigned char __a) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cntlz(__vector unsigned char __a) { return __builtin_s390_vclzb(__a); } -static inline __ATTRS_o_ai vector unsigned short -vec_cntlz(vector signed short __a) { - return __builtin_s390_vclzh((vector unsigned short)__a); +static inline __ATTRS_o_ai __vector unsigned short +vec_cntlz(__vector signed short __a) { + return __builtin_s390_vclzh((__vector unsigned short)__a); } -static inline __ATTRS_o_ai vector unsigned short -vec_cntlz(vector unsigned short __a) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cntlz(__vector unsigned short __a) { return __builtin_s390_vclzh(__a); } -static inline __ATTRS_o_ai vector unsigned int -vec_cntlz(vector signed int __a) { - return __builtin_s390_vclzf((vector unsigned int)__a); +static inline __ATTRS_o_ai __vector unsigned int +vec_cntlz(__vector signed int __a) { + return __builtin_s390_vclzf((__vector unsigned int)__a); } -static inline __ATTRS_o_ai vector unsigned int -vec_cntlz(vector unsigned int __a) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cntlz(__vector unsigned int __a) { return __builtin_s390_vclzf(__a); } -static inline __ATTRS_o_ai vector unsigned long long -vec_cntlz(vector signed long long __a) { - return __builtin_s390_vclzg((vector unsigned long long)__a); +static inline __ATTRS_o_ai __vector unsigned long long +vec_cntlz(__vector signed long long __a) { + return __builtin_s390_vclzg((__vector unsigned long long)__a); } -static inline __ATTRS_o_ai vector unsigned long long -vec_cntlz(vector unsigned long long __a) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_cntlz(__vector unsigned long long __a) { return __builtin_s390_vclzg(__a); } /*-- vec_cnttz --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_cnttz(vector signed char __a) { - return __builtin_s390_vctzb((vector unsigned char)__a); +static inline __ATTRS_o_ai __vector unsigned char +vec_cnttz(__vector signed char __a) { + return __builtin_s390_vctzb((__vector unsigned char)__a); } -static inline __ATTRS_o_ai vector unsigned char -vec_cnttz(vector unsigned char __a) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cnttz(__vector unsigned char __a) { return __builtin_s390_vctzb(__a); } -static inline __ATTRS_o_ai vector unsigned short -vec_cnttz(vector signed short __a) { - return __builtin_s390_vctzh((vector unsigned short)__a); +static inline __ATTRS_o_ai __vector unsigned short +vec_cnttz(__vector signed short __a) { + return __builtin_s390_vctzh((__vector unsigned short)__a); } -static inline __ATTRS_o_ai vector unsigned short -vec_cnttz(vector unsigned short __a) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cnttz(__vector unsigned short __a) { return __builtin_s390_vctzh(__a); } -static inline __ATTRS_o_ai vector unsigned int -vec_cnttz(vector signed int __a) { - return __builtin_s390_vctzf((vector unsigned int)__a); +static inline __ATTRS_o_ai __vector unsigned int +vec_cnttz(__vector signed int __a) { + return __builtin_s390_vctzf((__vector unsigned int)__a); } -static inline __ATTRS_o_ai vector unsigned int -vec_cnttz(vector unsigned int __a) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cnttz(__vector unsigned int __a) { return __builtin_s390_vctzf(__a); } -static inline __ATTRS_o_ai vector unsigned long long -vec_cnttz(vector signed long long __a) { - return __builtin_s390_vctzg((vector unsigned long long)__a); +static inline __ATTRS_o_ai __vector unsigned long long +vec_cnttz(__vector signed long long __a) { + return __builtin_s390_vctzg((__vector unsigned long long)__a); } -static inline __ATTRS_o_ai vector unsigned long long -vec_cnttz(vector unsigned long long __a) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_cnttz(__vector unsigned long long __a) { return __builtin_s390_vctzg(__a); } /*-- vec_popcnt -------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_popcnt(vector signed char __a) { - return __builtin_s390_vpopctb((vector unsigned char)__a); +static inline __ATTRS_o_ai __vector unsigned char +vec_popcnt(__vector signed char __a) { + return __builtin_s390_vpopctb((__vector unsigned char)__a); } -static inline __ATTRS_o_ai vector unsigned char -vec_popcnt(vector unsigned char __a) { +static inline __ATTRS_o_ai __vector unsigned char +vec_popcnt(__vector unsigned char __a) { return __builtin_s390_vpopctb(__a); } -static inline __ATTRS_o_ai vector unsigned short -vec_popcnt(vector signed short __a) { - return __builtin_s390_vpopcth((vector unsigned short)__a); +static inline __ATTRS_o_ai __vector unsigned short +vec_popcnt(__vector signed short __a) { + return __builtin_s390_vpopcth((__vector unsigned short)__a); } -static inline __ATTRS_o_ai vector unsigned short -vec_popcnt(vector unsigned short __a) { +static inline __ATTRS_o_ai __vector unsigned short +vec_popcnt(__vector unsigned short __a) { return __builtin_s390_vpopcth(__a); } -static inline __ATTRS_o_ai vector unsigned int -vec_popcnt(vector signed int __a) { - return __builtin_s390_vpopctf((vector unsigned int)__a); +static inline __ATTRS_o_ai __vector unsigned int +vec_popcnt(__vector signed int __a) { + return __builtin_s390_vpopctf((__vector unsigned int)__a); } -static inline __ATTRS_o_ai vector unsigned int -vec_popcnt(vector unsigned int __a) { +static inline __ATTRS_o_ai __vector unsigned int +vec_popcnt(__vector unsigned int __a) { return __builtin_s390_vpopctf(__a); } -static inline __ATTRS_o_ai vector unsigned long long -vec_popcnt(vector signed long long __a) { - return __builtin_s390_vpopctg((vector unsigned long long)__a); +static inline __ATTRS_o_ai __vector unsigned long long +vec_popcnt(__vector signed long long __a) { + return __builtin_s390_vpopctg((__vector unsigned long long)__a); } -static inline __ATTRS_o_ai vector unsigned long long -vec_popcnt(vector unsigned long long __a) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_popcnt(__vector unsigned long long __a) { return __builtin_s390_vpopctg(__a); } /*-- vec_rl -----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_rl(vector signed char __a, vector unsigned char __b) { - return (vector signed char)__builtin_s390_verllvb( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed char +vec_rl(__vector signed char __a, __vector unsigned char __b) { + return (__vector signed char)__builtin_s390_verllvb( + (__vector unsigned char)__a, __b); } -static inline __ATTRS_o_ai vector unsigned char -vec_rl(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_rl(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_verllvb(__a, __b); } -static inline __ATTRS_o_ai vector signed short -vec_rl(vector signed short __a, vector unsigned short __b) { - return (vector signed short)__builtin_s390_verllvh( - (vector unsigned short)__a, __b); +static inline __ATTRS_o_ai __vector signed short +vec_rl(__vector signed short __a, __vector unsigned short __b) { + return (__vector signed short)__builtin_s390_verllvh( + (__vector unsigned short)__a, __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_rl(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_rl(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_verllvh(__a, __b); } -static inline __ATTRS_o_ai vector signed int -vec_rl(vector signed int __a, vector unsigned int __b) { - return (vector signed int)__builtin_s390_verllvf( - (vector unsigned int)__a, __b); +static inline __ATTRS_o_ai __vector signed int +vec_rl(__vector signed int __a, __vector unsigned int __b) { + return (__vector signed int)__builtin_s390_verllvf( + (__vector unsigned int)__a, __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_rl(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_rl(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_verllvf(__a, __b); } -static inline __ATTRS_o_ai vector signed long long -vec_rl(vector signed long long __a, vector unsigned long long __b) { - return (vector signed long long)__builtin_s390_verllvg( - (vector unsigned long long)__a, __b); +static inline __ATTRS_o_ai __vector signed long long +vec_rl(__vector signed long long __a, __vector unsigned long long __b) { + return (__vector signed long long)__builtin_s390_verllvg( + (__vector unsigned long long)__a, __b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_rl(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_rl(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_verllvg(__a, __b); } /*-- vec_rli ----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_rli(vector signed char __a, unsigned long __b) { - return (vector signed char)__builtin_s390_verllb( - (vector unsigned char)__a, (int)__b); +static inline __ATTRS_o_ai __vector signed char +vec_rli(__vector signed char __a, unsigned long __b) { + return (__vector signed char)__builtin_s390_verllb( + (__vector unsigned char)__a, (int)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_rli(vector unsigned char __a, unsigned long __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_rli(__vector unsigned char __a, unsigned long __b) { return __builtin_s390_verllb(__a, (int)__b); } -static inline __ATTRS_o_ai vector signed short -vec_rli(vector signed short __a, unsigned long __b) { - return (vector signed short)__builtin_s390_verllh( - (vector unsigned short)__a, (int)__b); +static inline __ATTRS_o_ai __vector signed short +vec_rli(__vector signed short __a, unsigned long __b) { + return (__vector signed short)__builtin_s390_verllh( + (__vector unsigned short)__a, (int)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_rli(vector unsigned short __a, unsigned long __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_rli(__vector unsigned short __a, unsigned long __b) { return __builtin_s390_verllh(__a, (int)__b); } -static inline __ATTRS_o_ai vector signed int -vec_rli(vector signed int __a, unsigned long __b) { - return (vector signed int)__builtin_s390_verllf( - (vector unsigned int)__a, (int)__b); +static inline __ATTRS_o_ai __vector signed int +vec_rli(__vector signed int __a, unsigned long __b) { + return (__vector signed int)__builtin_s390_verllf( + (__vector unsigned int)__a, (int)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_rli(vector unsigned int __a, unsigned long __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_rli(__vector unsigned int __a, unsigned long __b) { return __builtin_s390_verllf(__a, (int)__b); } -static inline __ATTRS_o_ai vector signed long long -vec_rli(vector signed long long __a, unsigned long __b) { - return (vector signed long long)__builtin_s390_verllg( - (vector unsigned long long)__a, (int)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_rli(__vector signed long long __a, unsigned long __b) { + return (__vector signed long long)__builtin_s390_verllg( + (__vector unsigned long long)__a, (int)__b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_rli(vector unsigned long long __a, unsigned long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_rli(__vector unsigned long long __a, unsigned long __b) { return __builtin_s390_verllg(__a, (int)__b); } /*-- vec_rl_mask ------------------------------------------------------------*/ -extern __ATTRS_o vector signed char -vec_rl_mask(vector signed char __a, vector unsigned char __b, +extern __ATTRS_o __vector signed char +vec_rl_mask(__vector signed char __a, __vector unsigned char __b, unsigned char __c) __constant(__c); -extern __ATTRS_o vector unsigned char -vec_rl_mask(vector unsigned char __a, vector unsigned char __b, +extern __ATTRS_o __vector unsigned char +vec_rl_mask(__vector unsigned char __a, __vector unsigned char __b, unsigned char __c) __constant(__c); -extern __ATTRS_o vector signed short -vec_rl_mask(vector signed short __a, vector unsigned short __b, +extern __ATTRS_o __vector signed short +vec_rl_mask(__vector signed short __a, __vector unsigned short __b, unsigned char __c) __constant(__c); -extern __ATTRS_o vector unsigned short -vec_rl_mask(vector unsigned short __a, vector unsigned short __b, +extern __ATTRS_o __vector unsigned short +vec_rl_mask(__vector unsigned short __a, __vector unsigned short __b, unsigned char __c) __constant(__c); -extern __ATTRS_o vector signed int -vec_rl_mask(vector signed int __a, vector unsigned int __b, +extern __ATTRS_o __vector signed int +vec_rl_mask(__vector signed int __a, __vector unsigned int __b, unsigned char __c) __constant(__c); -extern __ATTRS_o vector unsigned int -vec_rl_mask(vector unsigned int __a, vector unsigned int __b, +extern __ATTRS_o __vector unsigned int +vec_rl_mask(__vector unsigned int __a, __vector unsigned int __b, unsigned char __c) __constant(__c); -extern __ATTRS_o vector signed long long -vec_rl_mask(vector signed long long __a, vector unsigned long long __b, +extern __ATTRS_o __vector signed long long +vec_rl_mask(__vector signed long long __a, __vector unsigned long long __b, unsigned char __c) __constant(__c); -extern __ATTRS_o vector unsigned long long -vec_rl_mask(vector unsigned long long __a, vector unsigned long long __b, +extern __ATTRS_o __vector unsigned long long +vec_rl_mask(__vector unsigned long long __a, __vector unsigned long long __b, unsigned char __c) __constant(__c); #define vec_rl_mask(X, Y, Z) ((__typeof__((vec_rl_mask)((X), (Y), (Z)))) \ __extension__ ({ \ - vector unsigned char __res; \ - vector unsigned char __x = (vector unsigned char)(X); \ - vector unsigned char __y = (vector unsigned char)(Y); \ + __vector unsigned char __res; \ + __vector unsigned char __x = (__vector unsigned char)(X); \ + __vector unsigned char __y = (__vector unsigned char)(Y); \ switch (sizeof ((X)[0])) { \ - case 1: __res = (vector unsigned char) __builtin_s390_verimb( \ - (vector unsigned char)__x, (vector unsigned char)__x, \ - (vector unsigned char)__y, (Z)); break; \ - case 2: __res = (vector unsigned char) __builtin_s390_verimh( \ - (vector unsigned short)__x, (vector unsigned short)__x, \ - (vector unsigned short)__y, (Z)); break; \ - case 4: __res = (vector unsigned char) __builtin_s390_verimf( \ - (vector unsigned int)__x, (vector unsigned int)__x, \ - (vector unsigned int)__y, (Z)); break; \ - default: __res = (vector unsigned char) __builtin_s390_verimg( \ - (vector unsigned long long)__x, (vector unsigned long long)__x, \ - (vector unsigned long long)__y, (Z)); break; \ + case 1: __res = (__vector unsigned char) __builtin_s390_verimb( \ + (__vector unsigned char)__x, (__vector unsigned char)__x, \ + (__vector unsigned char)__y, (Z)); break; \ + case 2: __res = (__vector unsigned char) __builtin_s390_verimh( \ + (__vector unsigned short)__x, (__vector unsigned short)__x, \ + (__vector unsigned short)__y, (Z)); break; \ + case 4: __res = (__vector unsigned char) __builtin_s390_verimf( \ + (__vector unsigned int)__x, (__vector unsigned int)__x, \ + (__vector unsigned int)__y, (Z)); break; \ + default: __res = (__vector unsigned char) __builtin_s390_verimg( \ + (__vector unsigned long long)__x, (__vector unsigned long long)__x, \ + (__vector unsigned long long)__y, (Z)); break; \ } __res; })) /*-- vec_sll ----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_sll(vector signed char __a, vector unsigned char __b) { - return (vector signed char)__builtin_s390_vsl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed char +vec_sll(__vector signed char __a, __vector unsigned char __b) { + return (__vector signed char)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_sll(vector signed char __a, vector unsigned short __b) { - return (vector signed char)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_sll(__vector signed char __a, __vector unsigned short __b) { + return (__vector signed char)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_sll(vector signed char __a, vector unsigned int __b) { - return (vector signed char)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_sll(__vector signed char __a, __vector unsigned int __b) { + return (__vector signed char)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool char -vec_sll(vector bool char __a, vector unsigned char __b) { - return (vector bool char)__builtin_s390_vsl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool char +vec_sll(__vector __bool char __a, __vector unsigned char __b) { + return (__vector __bool char)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool char -vec_sll(vector bool char __a, vector unsigned short __b) { - return (vector bool char)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool char +vec_sll(__vector __bool char __a, __vector unsigned short __b) { + return (__vector __bool char)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool char -vec_sll(vector bool char __a, vector unsigned int __b) { - return (vector bool char)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool char +vec_sll(__vector __bool char __a, __vector unsigned int __b) { + return (__vector __bool char)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_sll(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_sll(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsl(__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_sll(vector unsigned char __a, vector unsigned short __b) { - return __builtin_s390_vsl(__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_sll(__vector unsigned char __a, __vector unsigned short __b) { + return __builtin_s390_vsl(__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_sll(vector unsigned char __a, vector unsigned int __b) { - return __builtin_s390_vsl(__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_sll(__vector unsigned char __a, __vector unsigned int __b) { + return __builtin_s390_vsl(__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed short -vec_sll(vector signed short __a, vector unsigned char __b) { - return (vector signed short)__builtin_s390_vsl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed short +vec_sll(__vector signed short __a, __vector unsigned char __b) { + return (__vector signed short)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_sll(vector signed short __a, vector unsigned short __b) { - return (vector signed short)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_sll(__vector signed short __a, __vector unsigned short __b) { + return (__vector signed short)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_sll(vector signed short __a, vector unsigned int __b) { - return (vector signed short)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_sll(__vector signed short __a, __vector unsigned int __b) { + return (__vector signed short)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool short -vec_sll(vector bool short __a, vector unsigned char __b) { - return (vector bool short)__builtin_s390_vsl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool short +vec_sll(__vector __bool short __a, __vector unsigned char __b) { + return (__vector __bool short)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool short -vec_sll(vector bool short __a, vector unsigned short __b) { - return (vector bool short)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool short +vec_sll(__vector __bool short __a, __vector unsigned short __b) { + return (__vector __bool short)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool short -vec_sll(vector bool short __a, vector unsigned int __b) { - return (vector bool short)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool short +vec_sll(__vector __bool short __a, __vector unsigned int __b) { + return (__vector __bool short)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_sll(vector unsigned short __a, vector unsigned char __b) { - return (vector unsigned short)__builtin_s390_vsl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector unsigned short +vec_sll(__vector unsigned short __a, __vector unsigned char __b) { + return (__vector unsigned short)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_sll(vector unsigned short __a, vector unsigned short __b) { - return (vector unsigned short)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_sll(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector unsigned short)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_sll(vector unsigned short __a, vector unsigned int __b) { - return (vector unsigned short)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_sll(__vector unsigned short __a, __vector unsigned int __b) { + return (__vector unsigned short)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed int -vec_sll(vector signed int __a, vector unsigned char __b) { - return (vector signed int)__builtin_s390_vsl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed int +vec_sll(__vector signed int __a, __vector unsigned char __b) { + return (__vector signed int)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_sll(vector signed int __a, vector unsigned short __b) { - return (vector signed int)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_sll(__vector signed int __a, __vector unsigned short __b) { + return (__vector signed int)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_sll(vector signed int __a, vector unsigned int __b) { - return (vector signed int)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_sll(__vector signed int __a, __vector unsigned int __b) { + return (__vector signed int)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool int -vec_sll(vector bool int __a, vector unsigned char __b) { - return (vector bool int)__builtin_s390_vsl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool int +vec_sll(__vector __bool int __a, __vector unsigned char __b) { + return (__vector __bool int)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool int -vec_sll(vector bool int __a, vector unsigned short __b) { - return (vector bool int)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool int +vec_sll(__vector __bool int __a, __vector unsigned short __b) { + return (__vector __bool int)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool int -vec_sll(vector bool int __a, vector unsigned int __b) { - return (vector bool int)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool int +vec_sll(__vector __bool int __a, __vector unsigned int __b) { + return (__vector __bool int)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_sll(vector unsigned int __a, vector unsigned char __b) { - return (vector unsigned int)__builtin_s390_vsl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector unsigned int +vec_sll(__vector unsigned int __a, __vector unsigned char __b) { + return (__vector unsigned int)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_sll(vector unsigned int __a, vector unsigned short __b) { - return (vector unsigned int)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_sll(__vector unsigned int __a, __vector unsigned short __b) { + return (__vector unsigned int)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_sll(vector unsigned int __a, vector unsigned int __b) { - return (vector unsigned int)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_sll(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector unsigned int)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed long long -vec_sll(vector signed long long __a, vector unsigned char __b) { - return (vector signed long long)__builtin_s390_vsl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed long long +vec_sll(__vector signed long long __a, __vector unsigned char __b) { + return (__vector signed long long)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_sll(vector signed long long __a, vector unsigned short __b) { - return (vector signed long long)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_sll(__vector signed long long __a, __vector unsigned short __b) { + return (__vector signed long long)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_sll(vector signed long long __a, vector unsigned int __b) { - return (vector signed long long)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_sll(__vector signed long long __a, __vector unsigned int __b) { + return (__vector signed long long)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool long long -vec_sll(vector bool long long __a, vector unsigned char __b) { - return (vector bool long long)__builtin_s390_vsl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_sll(__vector __bool long long __a, __vector unsigned char __b) { + return (__vector __bool long long)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool long long -vec_sll(vector bool long long __a, vector unsigned short __b) { - return (vector bool long long)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool long long +vec_sll(__vector __bool long long __a, __vector unsigned short __b) { + return (__vector __bool long long)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool long long -vec_sll(vector bool long long __a, vector unsigned int __b) { - return (vector bool long long)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool long long +vec_sll(__vector __bool long long __a, __vector unsigned int __b) { + return (__vector __bool long long)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_sll(vector unsigned long long __a, vector unsigned char __b) { - return (vector unsigned long long)__builtin_s390_vsl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_sll(__vector unsigned long long __a, __vector unsigned char __b) { + return (__vector unsigned long long)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_sll(vector unsigned long long __a, vector unsigned short __b) { - return (vector unsigned long long)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_sll(__vector unsigned long long __a, __vector unsigned short __b) { + return (__vector unsigned long long)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_sll(vector unsigned long long __a, vector unsigned int __b) { - return (vector unsigned long long)__builtin_s390_vsl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_sll(__vector unsigned long long __a, __vector unsigned int __b) { + return (__vector unsigned long long)__builtin_s390_vsl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_slb ----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_slb(vector signed char __a, vector signed char __b) { - return (vector signed char)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_slb(__vector signed char __a, __vector signed char __b) { + return (__vector signed char)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed char -vec_slb(vector signed char __a, vector unsigned char __b) { - return (vector signed char)__builtin_s390_vslb( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed char +vec_slb(__vector signed char __a, __vector unsigned char __b) { + return (__vector signed char)__builtin_s390_vslb( + (__vector unsigned char)__a, __b); } -static inline __ATTRS_o_ai vector unsigned char -vec_slb(vector unsigned char __a, vector signed char __b) { - return __builtin_s390_vslb(__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_slb(__vector unsigned char __a, __vector signed char __b) { + return __builtin_s390_vslb(__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_slb(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_slb(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vslb(__a, __b); } -static inline __ATTRS_o_ai vector signed short -vec_slb(vector signed short __a, vector signed short __b) { - return (vector signed short)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_slb(__vector signed short __a, __vector signed short __b) { + return (__vector signed short)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed short -vec_slb(vector signed short __a, vector unsigned short __b) { - return (vector signed short)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_slb(__vector signed short __a, __vector unsigned short __b) { + return (__vector signed short)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_slb(vector unsigned short __a, vector signed short __b) { - return (vector unsigned short)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_slb(__vector unsigned short __a, __vector signed short __b) { + return (__vector unsigned short)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_slb(vector unsigned short __a, vector unsigned short __b) { - return (vector unsigned short)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_slb(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector unsigned short)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed int -vec_slb(vector signed int __a, vector signed int __b) { - return (vector signed int)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_slb(__vector signed int __a, __vector signed int __b) { + return (__vector signed int)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed int -vec_slb(vector signed int __a, vector unsigned int __b) { - return (vector signed int)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_slb(__vector signed int __a, __vector unsigned int __b) { + return (__vector signed int)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_slb(vector unsigned int __a, vector signed int __b) { - return (vector unsigned int)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_slb(__vector unsigned int __a, __vector signed int __b) { + return (__vector unsigned int)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_slb(vector unsigned int __a, vector unsigned int __b) { - return (vector unsigned int)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_slb(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector unsigned int)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed long long -vec_slb(vector signed long long __a, vector signed long long __b) { - return (vector signed long long)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_slb(__vector signed long long __a, __vector signed long long __b) { + return (__vector signed long long)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed long long -vec_slb(vector signed long long __a, vector unsigned long long __b) { - return (vector signed long long)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_slb(__vector signed long long __a, __vector unsigned long long __b) { + return (__vector signed long long)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_slb(vector unsigned long long __a, vector signed long long __b) { - return (vector unsigned long long)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_slb(__vector unsigned long long __a, __vector signed long long __b) { + return (__vector unsigned long long)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_slb(vector unsigned long long __a, vector unsigned long long __b) { - return (vector unsigned long long)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_slb(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector unsigned long long)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_slb(vector float __a, vector signed int __b) { - return (vector float)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector float +vec_slb(__vector float __a, __vector signed int __b) { + return (__vector float)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector float -vec_slb(vector float __a, vector unsigned int __b) { - return (vector float)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector float +vec_slb(__vector float __a, __vector unsigned int __b) { + return (__vector float)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } #endif -static inline __ATTRS_o_ai vector double -vec_slb(vector double __a, vector signed long long __b) { - return (vector double)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector double +vec_slb(__vector double __a, __vector signed long long __b) { + return (__vector double)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector double -vec_slb(vector double __a, vector unsigned long long __b) { - return (vector double)__builtin_s390_vslb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector double +vec_slb(__vector double __a, __vector unsigned long long __b) { + return (__vector double)__builtin_s390_vslb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_sld ----------------------------------------------------------------*/ -extern __ATTRS_o vector signed char -vec_sld(vector signed char __a, vector signed char __b, int __c) +extern __ATTRS_o __vector signed char +vec_sld(__vector signed char __a, __vector signed char __b, int __c) __constant_range(__c, 0, 15); -extern __ATTRS_o vector bool char -vec_sld(vector bool char __a, vector bool char __b, int __c) +extern __ATTRS_o __vector __bool char +vec_sld(__vector __bool char __a, __vector __bool char __b, int __c) __constant_range(__c, 0, 15); -extern __ATTRS_o vector unsigned char -vec_sld(vector unsigned char __a, vector unsigned char __b, int __c) +extern __ATTRS_o __vector unsigned char +vec_sld(__vector unsigned char __a, __vector unsigned char __b, int __c) __constant_range(__c, 0, 15); -extern __ATTRS_o vector signed short -vec_sld(vector signed short __a, vector signed short __b, int __c) +extern __ATTRS_o __vector signed short +vec_sld(__vector signed short __a, __vector signed short __b, int __c) __constant_range(__c, 0, 15); -extern __ATTRS_o vector bool short -vec_sld(vector bool short __a, vector bool short __b, int __c) +extern __ATTRS_o __vector __bool short +vec_sld(__vector __bool short __a, __vector __bool short __b, int __c) __constant_range(__c, 0, 15); -extern __ATTRS_o vector unsigned short -vec_sld(vector unsigned short __a, vector unsigned short __b, int __c) +extern __ATTRS_o __vector unsigned short +vec_sld(__vector unsigned short __a, __vector unsigned short __b, int __c) __constant_range(__c, 0, 15); -extern __ATTRS_o vector signed int -vec_sld(vector signed int __a, vector signed int __b, int __c) +extern __ATTRS_o __vector signed int +vec_sld(__vector signed int __a, __vector signed int __b, int __c) __constant_range(__c, 0, 15); -extern __ATTRS_o vector bool int -vec_sld(vector bool int __a, vector bool int __b, int __c) +extern __ATTRS_o __vector __bool int +vec_sld(__vector __bool int __a, __vector __bool int __b, int __c) __constant_range(__c, 0, 15); -extern __ATTRS_o vector unsigned int -vec_sld(vector unsigned int __a, vector unsigned int __b, int __c) +extern __ATTRS_o __vector unsigned int +vec_sld(__vector unsigned int __a, __vector unsigned int __b, int __c) __constant_range(__c, 0, 15); -extern __ATTRS_o vector signed long long -vec_sld(vector signed long long __a, vector signed long long __b, int __c) +extern __ATTRS_o __vector signed long long +vec_sld(__vector signed long long __a, __vector signed long long __b, int __c) __constant_range(__c, 0, 15); -extern __ATTRS_o vector bool long long -vec_sld(vector bool long long __a, vector bool long long __b, int __c) +extern __ATTRS_o __vector __bool long long +vec_sld(__vector __bool long long __a, __vector __bool long long __b, int __c) __constant_range(__c, 0, 15); -extern __ATTRS_o vector unsigned long long -vec_sld(vector unsigned long long __a, vector unsigned long long __b, int __c) +extern __ATTRS_o __vector unsigned long long +vec_sld(__vector unsigned long long __a, __vector unsigned long long __b, + int __c) __constant_range(__c, 0, 15); #if __ARCH__ >= 12 -extern __ATTRS_o vector float -vec_sld(vector float __a, vector float __b, int __c) +extern __ATTRS_o __vector float +vec_sld(__vector float __a, __vector float __b, int __c) __constant_range(__c, 0, 15); #endif -extern __ATTRS_o vector double -vec_sld(vector double __a, vector double __b, int __c) +extern __ATTRS_o __vector double +vec_sld(__vector double __a, __vector double __b, int __c) __constant_range(__c, 0, 15); #define vec_sld(X, Y, Z) ((__typeof__((vec_sld)((X), (Y), (Z)))) \ - __builtin_s390_vsldb((vector unsigned char)(X), \ - (vector unsigned char)(Y), (Z))) + __builtin_s390_vsldb((__vector unsigned char)(X), \ + (__vector unsigned char)(Y), (Z))) /*-- vec_sldw ---------------------------------------------------------------*/ -extern __ATTRS_o vector signed char -vec_sldw(vector signed char __a, vector signed char __b, int __c) +extern __ATTRS_o __vector signed char +vec_sldw(__vector signed char __a, __vector signed char __b, int __c) __constant_range(__c, 0, 3); -extern __ATTRS_o vector unsigned char -vec_sldw(vector unsigned char __a, vector unsigned char __b, int __c) +extern __ATTRS_o __vector unsigned char +vec_sldw(__vector unsigned char __a, __vector unsigned char __b, int __c) __constant_range(__c, 0, 3); -extern __ATTRS_o vector signed short -vec_sldw(vector signed short __a, vector signed short __b, int __c) +extern __ATTRS_o __vector signed short +vec_sldw(__vector signed short __a, __vector signed short __b, int __c) __constant_range(__c, 0, 3); -extern __ATTRS_o vector unsigned short -vec_sldw(vector unsigned short __a, vector unsigned short __b, int __c) +extern __ATTRS_o __vector unsigned short +vec_sldw(__vector unsigned short __a, __vector unsigned short __b, int __c) __constant_range(__c, 0, 3); -extern __ATTRS_o vector signed int -vec_sldw(vector signed int __a, vector signed int __b, int __c) +extern __ATTRS_o __vector signed int +vec_sldw(__vector signed int __a, __vector signed int __b, int __c) __constant_range(__c, 0, 3); -extern __ATTRS_o vector unsigned int -vec_sldw(vector unsigned int __a, vector unsigned int __b, int __c) +extern __ATTRS_o __vector unsigned int +vec_sldw(__vector unsigned int __a, __vector unsigned int __b, int __c) __constant_range(__c, 0, 3); -extern __ATTRS_o vector signed long long -vec_sldw(vector signed long long __a, vector signed long long __b, int __c) +extern __ATTRS_o __vector signed long long +vec_sldw(__vector signed long long __a, __vector signed long long __b, int __c) __constant_range(__c, 0, 3); -extern __ATTRS_o vector unsigned long long -vec_sldw(vector unsigned long long __a, vector unsigned long long __b, int __c) +extern __ATTRS_o __vector unsigned long long +vec_sldw(__vector unsigned long long __a, __vector unsigned long long __b, + int __c) __constant_range(__c, 0, 3); // This prototype is deprecated. -extern __ATTRS_o vector double -vec_sldw(vector double __a, vector double __b, int __c) +extern __ATTRS_o __vector double +vec_sldw(__vector double __a, __vector double __b, int __c) __constant_range(__c, 0, 3); #define vec_sldw(X, Y, Z) ((__typeof__((vec_sldw)((X), (Y), (Z)))) \ - __builtin_s390_vsldb((vector unsigned char)(X), \ - (vector unsigned char)(Y), (Z) * 4)) + __builtin_s390_vsldb((__vector unsigned char)(X), \ + (__vector unsigned char)(Y), (Z) * 4)) /*-- vec_sldb ---------------------------------------------------------------*/ #if __ARCH__ >= 13 -extern __ATTRS_o vector signed char -vec_sldb(vector signed char __a, vector signed char __b, int __c) +extern __ATTRS_o __vector signed char +vec_sldb(__vector signed char __a, __vector signed char __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector unsigned char -vec_sldb(vector unsigned char __a, vector unsigned char __b, int __c) +extern __ATTRS_o __vector unsigned char +vec_sldb(__vector unsigned char __a, __vector unsigned char __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector signed short -vec_sldb(vector signed short __a, vector signed short __b, int __c) +extern __ATTRS_o __vector signed short +vec_sldb(__vector signed short __a, __vector signed short __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector unsigned short -vec_sldb(vector unsigned short __a, vector unsigned short __b, int __c) +extern __ATTRS_o __vector unsigned short +vec_sldb(__vector unsigned short __a, __vector unsigned short __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector signed int -vec_sldb(vector signed int __a, vector signed int __b, int __c) +extern __ATTRS_o __vector signed int +vec_sldb(__vector signed int __a, __vector signed int __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector unsigned int -vec_sldb(vector unsigned int __a, vector unsigned int __b, int __c) +extern __ATTRS_o __vector unsigned int +vec_sldb(__vector unsigned int __a, __vector unsigned int __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector signed long long -vec_sldb(vector signed long long __a, vector signed long long __b, int __c) +extern __ATTRS_o __vector signed long long +vec_sldb(__vector signed long long __a, __vector signed long long __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector unsigned long long -vec_sldb(vector unsigned long long __a, vector unsigned long long __b, int __c) +extern __ATTRS_o __vector unsigned long long +vec_sldb(__vector unsigned long long __a, __vector unsigned long long __b, + int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector float -vec_sldb(vector float __a, vector float __b, int __c) +extern __ATTRS_o __vector float +vec_sldb(__vector float __a, __vector float __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector double -vec_sldb(vector double __a, vector double __b, int __c) +extern __ATTRS_o __vector double +vec_sldb(__vector double __a, __vector double __b, int __c) __constant_range(__c, 0, 7); #define vec_sldb(X, Y, Z) ((__typeof__((vec_sldb)((X), (Y), (Z)))) \ - __builtin_s390_vsld((vector unsigned char)(X), \ - (vector unsigned char)(Y), (Z))) + __builtin_s390_vsld((__vector unsigned char)(X), \ + (__vector unsigned char)(Y), (Z))) #endif /*-- vec_sral ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_sral(vector signed char __a, vector unsigned char __b) { - return (vector signed char)__builtin_s390_vsra( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed char +vec_sral(__vector signed char __a, __vector unsigned char __b) { + return (__vector signed char)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_sral(vector signed char __a, vector unsigned short __b) { - return (vector signed char)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_sral(__vector signed char __a, __vector unsigned short __b) { + return (__vector signed char)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_sral(vector signed char __a, vector unsigned int __b) { - return (vector signed char)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_sral(__vector signed char __a, __vector unsigned int __b) { + return (__vector signed char)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool char -vec_sral(vector bool char __a, vector unsigned char __b) { - return (vector bool char)__builtin_s390_vsra( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool char +vec_sral(__vector __bool char __a, __vector unsigned char __b) { + return (__vector __bool char)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool char -vec_sral(vector bool char __a, vector unsigned short __b) { - return (vector bool char)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool char +vec_sral(__vector __bool char __a, __vector unsigned short __b) { + return (__vector __bool char)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool char -vec_sral(vector bool char __a, vector unsigned int __b) { - return (vector bool char)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool char +vec_sral(__vector __bool char __a, __vector unsigned int __b) { + return (__vector __bool char)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_sral(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_sral(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsra(__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_sral(vector unsigned char __a, vector unsigned short __b) { - return __builtin_s390_vsra(__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_sral(__vector unsigned char __a, __vector unsigned short __b) { + return __builtin_s390_vsra(__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_sral(vector unsigned char __a, vector unsigned int __b) { - return __builtin_s390_vsra(__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_sral(__vector unsigned char __a, __vector unsigned int __b) { + return __builtin_s390_vsra(__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed short -vec_sral(vector signed short __a, vector unsigned char __b) { - return (vector signed short)__builtin_s390_vsra( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed short +vec_sral(__vector signed short __a, __vector unsigned char __b) { + return (__vector signed short)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_sral(vector signed short __a, vector unsigned short __b) { - return (vector signed short)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_sral(__vector signed short __a, __vector unsigned short __b) { + return (__vector signed short)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_sral(vector signed short __a, vector unsigned int __b) { - return (vector signed short)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_sral(__vector signed short __a, __vector unsigned int __b) { + return (__vector signed short)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool short -vec_sral(vector bool short __a, vector unsigned char __b) { - return (vector bool short)__builtin_s390_vsra( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool short +vec_sral(__vector __bool short __a, __vector unsigned char __b) { + return (__vector __bool short)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool short -vec_sral(vector bool short __a, vector unsigned short __b) { - return (vector bool short)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool short +vec_sral(__vector __bool short __a, __vector unsigned short __b) { + return (__vector __bool short)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool short -vec_sral(vector bool short __a, vector unsigned int __b) { - return (vector bool short)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool short +vec_sral(__vector __bool short __a, __vector unsigned int __b) { + return (__vector __bool short)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_sral(vector unsigned short __a, vector unsigned char __b) { - return (vector unsigned short)__builtin_s390_vsra( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector unsigned short +vec_sral(__vector unsigned short __a, __vector unsigned char __b) { + return (__vector unsigned short)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_sral(vector unsigned short __a, vector unsigned short __b) { - return (vector unsigned short)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_sral(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector unsigned short)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_sral(vector unsigned short __a, vector unsigned int __b) { - return (vector unsigned short)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_sral(__vector unsigned short __a, __vector unsigned int __b) { + return (__vector unsigned short)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed int -vec_sral(vector signed int __a, vector unsigned char __b) { - return (vector signed int)__builtin_s390_vsra( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed int +vec_sral(__vector signed int __a, __vector unsigned char __b) { + return (__vector signed int)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_sral(vector signed int __a, vector unsigned short __b) { - return (vector signed int)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_sral(__vector signed int __a, __vector unsigned short __b) { + return (__vector signed int)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_sral(vector signed int __a, vector unsigned int __b) { - return (vector signed int)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_sral(__vector signed int __a, __vector unsigned int __b) { + return (__vector signed int)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool int -vec_sral(vector bool int __a, vector unsigned char __b) { - return (vector bool int)__builtin_s390_vsra( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool int +vec_sral(__vector __bool int __a, __vector unsigned char __b) { + return (__vector __bool int)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool int -vec_sral(vector bool int __a, vector unsigned short __b) { - return (vector bool int)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool int +vec_sral(__vector __bool int __a, __vector unsigned short __b) { + return (__vector __bool int)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool int -vec_sral(vector bool int __a, vector unsigned int __b) { - return (vector bool int)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool int +vec_sral(__vector __bool int __a, __vector unsigned int __b) { + return (__vector __bool int)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_sral(vector unsigned int __a, vector unsigned char __b) { - return (vector unsigned int)__builtin_s390_vsra( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector unsigned int +vec_sral(__vector unsigned int __a, __vector unsigned char __b) { + return (__vector unsigned int)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_sral(vector unsigned int __a, vector unsigned short __b) { - return (vector unsigned int)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_sral(__vector unsigned int __a, __vector unsigned short __b) { + return (__vector unsigned int)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_sral(vector unsigned int __a, vector unsigned int __b) { - return (vector unsigned int)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_sral(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector unsigned int)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed long long -vec_sral(vector signed long long __a, vector unsigned char __b) { - return (vector signed long long)__builtin_s390_vsra( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed long long +vec_sral(__vector signed long long __a, __vector unsigned char __b) { + return (__vector signed long long)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_sral(vector signed long long __a, vector unsigned short __b) { - return (vector signed long long)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_sral(__vector signed long long __a, __vector unsigned short __b) { + return (__vector signed long long)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_sral(vector signed long long __a, vector unsigned int __b) { - return (vector signed long long)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_sral(__vector signed long long __a, __vector unsigned int __b) { + return (__vector signed long long)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool long long -vec_sral(vector bool long long __a, vector unsigned char __b) { - return (vector bool long long)__builtin_s390_vsra( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_sral(__vector __bool long long __a, __vector unsigned char __b) { + return (__vector __bool long long)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool long long -vec_sral(vector bool long long __a, vector unsigned short __b) { - return (vector bool long long)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool long long +vec_sral(__vector __bool long long __a, __vector unsigned short __b) { + return (__vector __bool long long)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool long long -vec_sral(vector bool long long __a, vector unsigned int __b) { - return (vector bool long long)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool long long +vec_sral(__vector __bool long long __a, __vector unsigned int __b) { + return (__vector __bool long long)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_sral(vector unsigned long long __a, vector unsigned char __b) { - return (vector unsigned long long)__builtin_s390_vsra( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_sral(__vector unsigned long long __a, __vector unsigned char __b) { + return (__vector unsigned long long)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_sral(vector unsigned long long __a, vector unsigned short __b) { - return (vector unsigned long long)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_sral(__vector unsigned long long __a, __vector unsigned short __b) { + return (__vector unsigned long long)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_sral(vector unsigned long long __a, vector unsigned int __b) { - return (vector unsigned long long)__builtin_s390_vsra( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_sral(__vector unsigned long long __a, __vector unsigned int __b) { + return (__vector unsigned long long)__builtin_s390_vsra( + (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_srab ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_srab(vector signed char __a, vector signed char __b) { - return (vector signed char)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_srab(__vector signed char __a, __vector signed char __b) { + return (__vector signed char)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed char -vec_srab(vector signed char __a, vector unsigned char __b) { - return (vector signed char)__builtin_s390_vsrab( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed char +vec_srab(__vector signed char __a, __vector unsigned char __b) { + return (__vector signed char)__builtin_s390_vsrab( + (__vector unsigned char)__a, __b); } -static inline __ATTRS_o_ai vector unsigned char -vec_srab(vector unsigned char __a, vector signed char __b) { - return __builtin_s390_vsrab(__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_srab(__vector unsigned char __a, __vector signed char __b) { + return __builtin_s390_vsrab(__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_srab(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_srab(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsrab(__a, __b); } -static inline __ATTRS_o_ai vector signed short -vec_srab(vector signed short __a, vector signed short __b) { - return (vector signed short)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_srab(__vector signed short __a, __vector signed short __b) { + return (__vector signed short)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed short -vec_srab(vector signed short __a, vector unsigned short __b) { - return (vector signed short)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_srab(__vector signed short __a, __vector unsigned short __b) { + return (__vector signed short)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_srab(vector unsigned short __a, vector signed short __b) { - return (vector unsigned short)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_srab(__vector unsigned short __a, __vector signed short __b) { + return (__vector unsigned short)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_srab(vector unsigned short __a, vector unsigned short __b) { - return (vector unsigned short)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_srab(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector unsigned short)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed int -vec_srab(vector signed int __a, vector signed int __b) { - return (vector signed int)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_srab(__vector signed int __a, __vector signed int __b) { + return (__vector signed int)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed int -vec_srab(vector signed int __a, vector unsigned int __b) { - return (vector signed int)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_srab(__vector signed int __a, __vector unsigned int __b) { + return (__vector signed int)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_srab(vector unsigned int __a, vector signed int __b) { - return (vector unsigned int)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_srab(__vector unsigned int __a, __vector signed int __b) { + return (__vector unsigned int)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_srab(vector unsigned int __a, vector unsigned int __b) { - return (vector unsigned int)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_srab(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector unsigned int)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed long long -vec_srab(vector signed long long __a, vector signed long long __b) { - return (vector signed long long)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_srab(__vector signed long long __a, __vector signed long long __b) { + return (__vector signed long long)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed long long -vec_srab(vector signed long long __a, vector unsigned long long __b) { - return (vector signed long long)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_srab(__vector signed long long __a, __vector unsigned long long __b) { + return (__vector signed long long)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_srab(vector unsigned long long __a, vector signed long long __b) { - return (vector unsigned long long)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_srab(__vector unsigned long long __a, __vector signed long long __b) { + return (__vector unsigned long long)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_srab(vector unsigned long long __a, vector unsigned long long __b) { - return (vector unsigned long long)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_srab(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector unsigned long long)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_srab(vector float __a, vector signed int __b) { - return (vector float)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector float +vec_srab(__vector float __a, __vector signed int __b) { + return (__vector float)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector float -vec_srab(vector float __a, vector unsigned int __b) { - return (vector float)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector float +vec_srab(__vector float __a, __vector unsigned int __b) { + return (__vector float)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } #endif -static inline __ATTRS_o_ai vector double -vec_srab(vector double __a, vector signed long long __b) { - return (vector double)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector double +vec_srab(__vector double __a, __vector signed long long __b) { + return (__vector double)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector double -vec_srab(vector double __a, vector unsigned long long __b) { - return (vector double)__builtin_s390_vsrab( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector double +vec_srab(__vector double __a, __vector unsigned long long __b) { + return (__vector double)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_srl ----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_srl(vector signed char __a, vector unsigned char __b) { - return (vector signed char)__builtin_s390_vsrl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed char +vec_srl(__vector signed char __a, __vector unsigned char __b) { + return (__vector signed char)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_srl(vector signed char __a, vector unsigned short __b) { - return (vector signed char)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_srl(__vector signed char __a, __vector unsigned short __b) { + return (__vector signed char)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_srl(vector signed char __a, vector unsigned int __b) { - return (vector signed char)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_srl(__vector signed char __a, __vector unsigned int __b) { + return (__vector signed char)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool char -vec_srl(vector bool char __a, vector unsigned char __b) { - return (vector bool char)__builtin_s390_vsrl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool char +vec_srl(__vector __bool char __a, __vector unsigned char __b) { + return (__vector __bool char)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool char -vec_srl(vector bool char __a, vector unsigned short __b) { - return (vector bool char)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool char +vec_srl(__vector __bool char __a, __vector unsigned short __b) { + return (__vector __bool char)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool char -vec_srl(vector bool char __a, vector unsigned int __b) { - return (vector bool char)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool char +vec_srl(__vector __bool char __a, __vector unsigned int __b) { + return (__vector __bool char)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_srl(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_srl(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsrl(__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_srl(vector unsigned char __a, vector unsigned short __b) { - return __builtin_s390_vsrl(__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_srl(__vector unsigned char __a, __vector unsigned short __b) { + return __builtin_s390_vsrl(__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_srl(vector unsigned char __a, vector unsigned int __b) { - return __builtin_s390_vsrl(__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_srl(__vector unsigned char __a, __vector unsigned int __b) { + return __builtin_s390_vsrl(__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed short -vec_srl(vector signed short __a, vector unsigned char __b) { - return (vector signed short)__builtin_s390_vsrl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed short +vec_srl(__vector signed short __a, __vector unsigned char __b) { + return (__vector signed short)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_srl(vector signed short __a, vector unsigned short __b) { - return (vector signed short)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_srl(__vector signed short __a, __vector unsigned short __b) { + return (__vector signed short)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_srl(vector signed short __a, vector unsigned int __b) { - return (vector signed short)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_srl(__vector signed short __a, __vector unsigned int __b) { + return (__vector signed short)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool short -vec_srl(vector bool short __a, vector unsigned char __b) { - return (vector bool short)__builtin_s390_vsrl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool short +vec_srl(__vector __bool short __a, __vector unsigned char __b) { + return (__vector __bool short)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool short -vec_srl(vector bool short __a, vector unsigned short __b) { - return (vector bool short)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool short +vec_srl(__vector __bool short __a, __vector unsigned short __b) { + return (__vector __bool short)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool short -vec_srl(vector bool short __a, vector unsigned int __b) { - return (vector bool short)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool short +vec_srl(__vector __bool short __a, __vector unsigned int __b) { + return (__vector __bool short)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_srl(vector unsigned short __a, vector unsigned char __b) { - return (vector unsigned short)__builtin_s390_vsrl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector unsigned short +vec_srl(__vector unsigned short __a, __vector unsigned char __b) { + return (__vector unsigned short)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_srl(vector unsigned short __a, vector unsigned short __b) { - return (vector unsigned short)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_srl(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector unsigned short)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_srl(vector unsigned short __a, vector unsigned int __b) { - return (vector unsigned short)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_srl(__vector unsigned short __a, __vector unsigned int __b) { + return (__vector unsigned short)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed int -vec_srl(vector signed int __a, vector unsigned char __b) { - return (vector signed int)__builtin_s390_vsrl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed int +vec_srl(__vector signed int __a, __vector unsigned char __b) { + return (__vector signed int)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_srl(vector signed int __a, vector unsigned short __b) { - return (vector signed int)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_srl(__vector signed int __a, __vector unsigned short __b) { + return (__vector signed int)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_srl(vector signed int __a, vector unsigned int __b) { - return (vector signed int)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_srl(__vector signed int __a, __vector unsigned int __b) { + return (__vector signed int)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool int -vec_srl(vector bool int __a, vector unsigned char __b) { - return (vector bool int)__builtin_s390_vsrl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool int +vec_srl(__vector __bool int __a, __vector unsigned char __b) { + return (__vector __bool int)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool int -vec_srl(vector bool int __a, vector unsigned short __b) { - return (vector bool int)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool int +vec_srl(__vector __bool int __a, __vector unsigned short __b) { + return (__vector __bool int)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool int -vec_srl(vector bool int __a, vector unsigned int __b) { - return (vector bool int)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool int +vec_srl(__vector __bool int __a, __vector unsigned int __b) { + return (__vector __bool int)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_srl(vector unsigned int __a, vector unsigned char __b) { - return (vector unsigned int)__builtin_s390_vsrl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector unsigned int +vec_srl(__vector unsigned int __a, __vector unsigned char __b) { + return (__vector unsigned int)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_srl(vector unsigned int __a, vector unsigned short __b) { - return (vector unsigned int)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_srl(__vector unsigned int __a, __vector unsigned short __b) { + return (__vector unsigned int)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_srl(vector unsigned int __a, vector unsigned int __b) { - return (vector unsigned int)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_srl(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector unsigned int)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed long long -vec_srl(vector signed long long __a, vector unsigned char __b) { - return (vector signed long long)__builtin_s390_vsrl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed long long +vec_srl(__vector signed long long __a, __vector unsigned char __b) { + return (__vector signed long long)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_srl(vector signed long long __a, vector unsigned short __b) { - return (vector signed long long)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_srl(__vector signed long long __a, __vector unsigned short __b) { + return (__vector signed long long)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_srl(vector signed long long __a, vector unsigned int __b) { - return (vector signed long long)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_srl(__vector signed long long __a, __vector unsigned int __b) { + return (__vector signed long long)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool long long -vec_srl(vector bool long long __a, vector unsigned char __b) { - return (vector bool long long)__builtin_s390_vsrl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector __bool long long +vec_srl(__vector __bool long long __a, __vector unsigned char __b) { + return (__vector __bool long long)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool long long -vec_srl(vector bool long long __a, vector unsigned short __b) { - return (vector bool long long)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool long long +vec_srl(__vector __bool long long __a, __vector unsigned short __b) { + return (__vector __bool long long)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector bool long long -vec_srl(vector bool long long __a, vector unsigned int __b) { - return (vector bool long long)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector __bool long long +vec_srl(__vector __bool long long __a, __vector unsigned int __b) { + return (__vector __bool long long)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_srl(vector unsigned long long __a, vector unsigned char __b) { - return (vector unsigned long long)__builtin_s390_vsrl( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_srl(__vector unsigned long long __a, __vector unsigned char __b) { + return (__vector unsigned long long)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_srl(vector unsigned long long __a, vector unsigned short __b) { - return (vector unsigned long long)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_srl(__vector unsigned long long __a, __vector unsigned short __b) { + return (__vector unsigned long long)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_srl(vector unsigned long long __a, vector unsigned int __b) { - return (vector unsigned long long)__builtin_s390_vsrl( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_srl(__vector unsigned long long __a, __vector unsigned int __b) { + return (__vector unsigned long long)__builtin_s390_vsrl( + (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_srb ----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_srb(vector signed char __a, vector signed char __b) { - return (vector signed char)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_srb(__vector signed char __a, __vector signed char __b) { + return (__vector signed char)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed char -vec_srb(vector signed char __a, vector unsigned char __b) { - return (vector signed char)__builtin_s390_vsrlb( - (vector unsigned char)__a, __b); +static inline __ATTRS_o_ai __vector signed char +vec_srb(__vector signed char __a, __vector unsigned char __b) { + return (__vector signed char)__builtin_s390_vsrlb( + (__vector unsigned char)__a, __b); } -static inline __ATTRS_o_ai vector unsigned char -vec_srb(vector unsigned char __a, vector signed char __b) { - return __builtin_s390_vsrlb(__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_srb(__vector unsigned char __a, __vector signed char __b) { + return __builtin_s390_vsrlb(__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_srb(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_srb(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsrlb(__a, __b); } -static inline __ATTRS_o_ai vector signed short -vec_srb(vector signed short __a, vector signed short __b) { - return (vector signed short)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_srb(__vector signed short __a, __vector signed short __b) { + return (__vector signed short)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed short -vec_srb(vector signed short __a, vector unsigned short __b) { - return (vector signed short)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed short +vec_srb(__vector signed short __a, __vector unsigned short __b) { + return (__vector signed short)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_srb(vector unsigned short __a, vector signed short __b) { - return (vector unsigned short)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_srb(__vector unsigned short __a, __vector signed short __b) { + return (__vector unsigned short)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_srb(vector unsigned short __a, vector unsigned short __b) { - return (vector unsigned short)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_srb(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector unsigned short)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed int -vec_srb(vector signed int __a, vector signed int __b) { - return (vector signed int)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_srb(__vector signed int __a, __vector signed int __b) { + return (__vector signed int)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed int -vec_srb(vector signed int __a, vector unsigned int __b) { - return (vector signed int)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed int +vec_srb(__vector signed int __a, __vector unsigned int __b) { + return (__vector signed int)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_srb(vector unsigned int __a, vector signed int __b) { - return (vector unsigned int)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_srb(__vector unsigned int __a, __vector signed int __b) { + return (__vector unsigned int)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_srb(vector unsigned int __a, vector unsigned int __b) { - return (vector unsigned int)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_srb(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector unsigned int)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed long long -vec_srb(vector signed long long __a, vector signed long long __b) { - return (vector signed long long)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_srb(__vector signed long long __a, __vector signed long long __b) { + return (__vector signed long long)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector signed long long -vec_srb(vector signed long long __a, vector unsigned long long __b) { - return (vector signed long long)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed long long +vec_srb(__vector signed long long __a, __vector unsigned long long __b) { + return (__vector signed long long)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_srb(vector unsigned long long __a, vector signed long long __b) { - return (vector unsigned long long)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_srb(__vector unsigned long long __a, __vector signed long long __b) { + return (__vector unsigned long long)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_srb(vector unsigned long long __a, vector unsigned long long __b) { - return (vector unsigned long long)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned long long +vec_srb(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector unsigned long long)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_srb(vector float __a, vector signed int __b) { - return (vector float)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector float +vec_srb(__vector float __a, __vector signed int __b) { + return (__vector float)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector float -vec_srb(vector float __a, vector unsigned int __b) { - return (vector float)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector float +vec_srb(__vector float __a, __vector unsigned int __b) { + return (__vector float)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } #endif -static inline __ATTRS_o_ai vector double -vec_srb(vector double __a, vector signed long long __b) { - return (vector double)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector double +vec_srb(__vector double __a, __vector signed long long __b) { + return (__vector double)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector double -vec_srb(vector double __a, vector unsigned long long __b) { - return (vector double)__builtin_s390_vsrlb( - (vector unsigned char)__a, (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector double +vec_srb(__vector double __a, __vector unsigned long long __b) { + return (__vector double)__builtin_s390_vsrlb( + (__vector unsigned char)__a, (__vector unsigned char)__b); } /*-- vec_srdb ---------------------------------------------------------------*/ #if __ARCH__ >= 13 -extern __ATTRS_o vector signed char -vec_srdb(vector signed char __a, vector signed char __b, int __c) +extern __ATTRS_o __vector signed char +vec_srdb(__vector signed char __a, __vector signed char __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector unsigned char -vec_srdb(vector unsigned char __a, vector unsigned char __b, int __c) +extern __ATTRS_o __vector unsigned char +vec_srdb(__vector unsigned char __a, __vector unsigned char __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector signed short -vec_srdb(vector signed short __a, vector signed short __b, int __c) +extern __ATTRS_o __vector signed short +vec_srdb(__vector signed short __a, __vector signed short __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector unsigned short -vec_srdb(vector unsigned short __a, vector unsigned short __b, int __c) +extern __ATTRS_o __vector unsigned short +vec_srdb(__vector unsigned short __a, __vector unsigned short __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector signed int -vec_srdb(vector signed int __a, vector signed int __b, int __c) +extern __ATTRS_o __vector signed int +vec_srdb(__vector signed int __a, __vector signed int __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector unsigned int -vec_srdb(vector unsigned int __a, vector unsigned int __b, int __c) +extern __ATTRS_o __vector unsigned int +vec_srdb(__vector unsigned int __a, __vector unsigned int __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector signed long long -vec_srdb(vector signed long long __a, vector signed long long __b, int __c) +extern __ATTRS_o __vector signed long long +vec_srdb(__vector signed long long __a, __vector signed long long __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector unsigned long long -vec_srdb(vector unsigned long long __a, vector unsigned long long __b, int __c) +extern __ATTRS_o __vector unsigned long long +vec_srdb(__vector unsigned long long __a, __vector unsigned long long __b, + int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector float -vec_srdb(vector float __a, vector float __b, int __c) +extern __ATTRS_o __vector float +vec_srdb(__vector float __a, __vector float __b, int __c) __constant_range(__c, 0, 7); -extern __ATTRS_o vector double -vec_srdb(vector double __a, vector double __b, int __c) +extern __ATTRS_o __vector double +vec_srdb(__vector double __a, __vector double __b, int __c) __constant_range(__c, 0, 7); #define vec_srdb(X, Y, Z) ((__typeof__((vec_srdb)((X), (Y), (Z)))) \ - __builtin_s390_vsrd((vector unsigned char)(X), \ - (vector unsigned char)(Y), (Z))) + __builtin_s390_vsrd((__vector unsigned char)(X), \ + (__vector unsigned char)(Y), (Z))) #endif /*-- vec_abs ----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_abs(vector signed char __a) { - return vec_sel(__a, -__a, vec_cmplt(__a, (vector signed char)0)); +static inline __ATTRS_o_ai __vector signed char +vec_abs(__vector signed char __a) { + return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed char)0)); } -static inline __ATTRS_o_ai vector signed short -vec_abs(vector signed short __a) { - return vec_sel(__a, -__a, vec_cmplt(__a, (vector signed short)0)); +static inline __ATTRS_o_ai __vector signed short +vec_abs(__vector signed short __a) { + return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed short)0)); } -static inline __ATTRS_o_ai vector signed int -vec_abs(vector signed int __a) { - return vec_sel(__a, -__a, vec_cmplt(__a, (vector signed int)0)); +static inline __ATTRS_o_ai __vector signed int +vec_abs(__vector signed int __a) { + return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed int)0)); } -static inline __ATTRS_o_ai vector signed long long -vec_abs(vector signed long long __a) { - return vec_sel(__a, -__a, vec_cmplt(__a, (vector signed long long)0)); +static inline __ATTRS_o_ai __vector signed long long +vec_abs(__vector signed long long __a) { + return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed long long)0)); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_abs(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_abs(__vector float __a) { return __builtin_s390_vflpsb(__a); } #endif -static inline __ATTRS_o_ai vector double -vec_abs(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_abs(__vector double __a) { return __builtin_s390_vflpdb(__a); } /*-- vec_nabs ---------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_nabs(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_nabs(__vector float __a) { return __builtin_s390_vflnsb(__a); } #endif -static inline __ATTRS_o_ai vector double -vec_nabs(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_nabs(__vector double __a) { return __builtin_s390_vflndb(__a); } /*-- vec_max ----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_max(vector signed char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_max(__vector signed char __a, __vector signed char __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_max(vector signed char __a, vector bool char __b) { - vector signed char __bc = (vector signed char)__b; +static inline __ATTRS_o_ai __vector signed char +vec_max(__vector signed char __a, __vector __bool char __b) { + __vector signed char __bc = (__vector signed char)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_max(vector bool char __a, vector signed char __b) { - vector signed char __ac = (vector signed char)__a; +static inline __ATTRS_o_ai __vector signed char +vec_max(__vector __bool char __a, __vector signed char __b) { + __vector signed char __ac = (__vector signed char)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector unsigned char -vec_max(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_max(__vector unsigned char __a, __vector unsigned char __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_max(vector unsigned char __a, vector bool char __b) { - vector unsigned char __bc = (vector unsigned char)__b; +static inline __ATTRS_o_ai __vector unsigned char +vec_max(__vector unsigned char __a, __vector __bool char __b) { + __vector unsigned char __bc = (__vector unsigned char)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_max(vector bool char __a, vector unsigned char __b) { - vector unsigned char __ac = (vector unsigned char)__a; +static inline __ATTRS_o_ai __vector unsigned char +vec_max(__vector __bool char __a, __vector unsigned char __b) { + __vector unsigned char __ac = (__vector unsigned char)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector signed short -vec_max(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_max(__vector signed short __a, __vector signed short __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_max(vector signed short __a, vector bool short __b) { - vector signed short __bc = (vector signed short)__b; +static inline __ATTRS_o_ai __vector signed short +vec_max(__vector signed short __a, __vector __bool short __b) { + __vector signed short __bc = (__vector signed short)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_max(vector bool short __a, vector signed short __b) { - vector signed short __ac = (vector signed short)__a; +static inline __ATTRS_o_ai __vector signed short +vec_max(__vector __bool short __a, __vector signed short __b) { + __vector signed short __ac = (__vector signed short)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector unsigned short -vec_max(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_max(__vector unsigned short __a, __vector unsigned short __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_max(vector unsigned short __a, vector bool short __b) { - vector unsigned short __bc = (vector unsigned short)__b; +static inline __ATTRS_o_ai __vector unsigned short +vec_max(__vector unsigned short __a, __vector __bool short __b) { + __vector unsigned short __bc = (__vector unsigned short)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_max(vector bool short __a, vector unsigned short __b) { - vector unsigned short __ac = (vector unsigned short)__a; +static inline __ATTRS_o_ai __vector unsigned short +vec_max(__vector __bool short __a, __vector unsigned short __b) { + __vector unsigned short __ac = (__vector unsigned short)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector signed int -vec_max(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_max(__vector signed int __a, __vector signed int __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_max(vector signed int __a, vector bool int __b) { - vector signed int __bc = (vector signed int)__b; +static inline __ATTRS_o_ai __vector signed int +vec_max(__vector signed int __a, __vector __bool int __b) { + __vector signed int __bc = (__vector signed int)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_max(vector bool int __a, vector signed int __b) { - vector signed int __ac = (vector signed int)__a; +static inline __ATTRS_o_ai __vector signed int +vec_max(__vector __bool int __a, __vector signed int __b) { + __vector signed int __ac = (__vector signed int)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector unsigned int -vec_max(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_max(__vector unsigned int __a, __vector unsigned int __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_max(vector unsigned int __a, vector bool int __b) { - vector unsigned int __bc = (vector unsigned int)__b; +static inline __ATTRS_o_ai __vector unsigned int +vec_max(__vector unsigned int __a, __vector __bool int __b) { + __vector unsigned int __bc = (__vector unsigned int)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_max(vector bool int __a, vector unsigned int __b) { - vector unsigned int __ac = (vector unsigned int)__a; +static inline __ATTRS_o_ai __vector unsigned int +vec_max(__vector __bool int __a, __vector unsigned int __b) { + __vector unsigned int __ac = (__vector unsigned int)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector signed long long -vec_max(vector signed long long __a, vector signed long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_max(__vector signed long long __a, __vector signed long long __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_max(vector signed long long __a, vector bool long long __b) { - vector signed long long __bc = (vector signed long long)__b; +static inline __ATTRS_o_ai __vector signed long long +vec_max(__vector signed long long __a, __vector __bool long long __b) { + __vector signed long long __bc = (__vector signed long long)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_max(vector bool long long __a, vector signed long long __b) { - vector signed long long __ac = (vector signed long long)__a; +static inline __ATTRS_o_ai __vector signed long long +vec_max(__vector __bool long long __a, __vector signed long long __b) { + __vector signed long long __ac = (__vector signed long long)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector unsigned long long -vec_max(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_max(__vector unsigned long long __a, __vector unsigned long long __b) { return vec_sel(__b, __a, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_max(vector unsigned long long __a, vector bool long long __b) { - vector unsigned long long __bc = (vector unsigned long long)__b; +static inline __ATTRS_o_ai __vector unsigned long long +vec_max(__vector unsigned long long __a, __vector __bool long long __b) { + __vector unsigned long long __bc = (__vector unsigned long long)__b; return vec_sel(__bc, __a, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_max(vector bool long long __a, vector unsigned long long __b) { - vector unsigned long long __ac = (vector unsigned long long)__a; +static inline __ATTRS_o_ai __vector unsigned long long +vec_max(__vector __bool long long __a, __vector unsigned long long __b) { + __vector unsigned long long __ac = (__vector unsigned long long)__a; return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_max(vector float __a, vector float __b) { +static inline __ATTRS_o_ai __vector float +vec_max(__vector float __a, __vector float __b) { return __builtin_s390_vfmaxsb(__a, __b, 0); } #endif -static inline __ATTRS_o_ai vector double -vec_max(vector double __a, vector double __b) { +static inline __ATTRS_o_ai __vector double +vec_max(__vector double __a, __vector double __b) { #if __ARCH__ >= 12 return __builtin_s390_vfmaxdb(__a, __b, 0); #else @@ -8020,167 +8092,167 @@ vec_max(vector double __a, vector double __b) { /*-- vec_min ----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_min(vector signed char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_min(__vector signed char __a, __vector signed char __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_min(vector signed char __a, vector bool char __b) { - vector signed char __bc = (vector signed char)__b; +static inline __ATTRS_o_ai __vector signed char +vec_min(__vector signed char __a, __vector __bool char __b) { + __vector signed char __bc = (__vector signed char)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed char -vec_min(vector bool char __a, vector signed char __b) { - vector signed char __ac = (vector signed char)__a; +static inline __ATTRS_o_ai __vector signed char +vec_min(__vector __bool char __a, __vector signed char __b) { + __vector signed char __ac = (__vector signed char)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector unsigned char -vec_min(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_min(__vector unsigned char __a, __vector unsigned char __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_min(vector unsigned char __a, vector bool char __b) { - vector unsigned char __bc = (vector unsigned char)__b; +static inline __ATTRS_o_ai __vector unsigned char +vec_min(__vector unsigned char __a, __vector __bool char __b) { + __vector unsigned char __bc = (__vector unsigned char)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned char -vec_min(vector bool char __a, vector unsigned char __b) { - vector unsigned char __ac = (vector unsigned char)__a; +static inline __ATTRS_o_ai __vector unsigned char +vec_min(__vector __bool char __a, __vector unsigned char __b) { + __vector unsigned char __ac = (__vector unsigned char)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector signed short -vec_min(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_min(__vector signed short __a, __vector signed short __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_min(vector signed short __a, vector bool short __b) { - vector signed short __bc = (vector signed short)__b; +static inline __ATTRS_o_ai __vector signed short +vec_min(__vector signed short __a, __vector __bool short __b) { + __vector signed short __bc = (__vector signed short)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed short -vec_min(vector bool short __a, vector signed short __b) { - vector signed short __ac = (vector signed short)__a; +static inline __ATTRS_o_ai __vector signed short +vec_min(__vector __bool short __a, __vector signed short __b) { + __vector signed short __ac = (__vector signed short)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector unsigned short -vec_min(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_min(__vector unsigned short __a, __vector unsigned short __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_min(vector unsigned short __a, vector bool short __b) { - vector unsigned short __bc = (vector unsigned short)__b; +static inline __ATTRS_o_ai __vector unsigned short +vec_min(__vector unsigned short __a, __vector __bool short __b) { + __vector unsigned short __bc = (__vector unsigned short)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned short -vec_min(vector bool short __a, vector unsigned short __b) { - vector unsigned short __ac = (vector unsigned short)__a; +static inline __ATTRS_o_ai __vector unsigned short +vec_min(__vector __bool short __a, __vector unsigned short __b) { + __vector unsigned short __ac = (__vector unsigned short)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector signed int -vec_min(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_min(__vector signed int __a, __vector signed int __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_min(vector signed int __a, vector bool int __b) { - vector signed int __bc = (vector signed int)__b; +static inline __ATTRS_o_ai __vector signed int +vec_min(__vector signed int __a, __vector __bool int __b) { + __vector signed int __bc = (__vector signed int)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed int -vec_min(vector bool int __a, vector signed int __b) { - vector signed int __ac = (vector signed int)__a; +static inline __ATTRS_o_ai __vector signed int +vec_min(__vector __bool int __a, __vector signed int __b) { + __vector signed int __ac = (__vector signed int)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector unsigned int -vec_min(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_min(__vector unsigned int __a, __vector unsigned int __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_min(vector unsigned int __a, vector bool int __b) { - vector unsigned int __bc = (vector unsigned int)__b; +static inline __ATTRS_o_ai __vector unsigned int +vec_min(__vector unsigned int __a, __vector __bool int __b) { + __vector unsigned int __bc = (__vector unsigned int)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned int -vec_min(vector bool int __a, vector unsigned int __b) { - vector unsigned int __ac = (vector unsigned int)__a; +static inline __ATTRS_o_ai __vector unsigned int +vec_min(__vector __bool int __a, __vector unsigned int __b) { + __vector unsigned int __ac = (__vector unsigned int)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector signed long long -vec_min(vector signed long long __a, vector signed long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_min(__vector signed long long __a, __vector signed long long __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_min(vector signed long long __a, vector bool long long __b) { - vector signed long long __bc = (vector signed long long)__b; +static inline __ATTRS_o_ai __vector signed long long +vec_min(__vector signed long long __a, __vector __bool long long __b) { + __vector signed long long __bc = (__vector signed long long)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_min(vector bool long long __a, vector signed long long __b) { - vector signed long long __ac = (vector signed long long)__a; +static inline __ATTRS_o_ai __vector signed long long +vec_min(__vector __bool long long __a, __vector signed long long __b) { + __vector signed long long __ac = (__vector signed long long)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } -static inline __ATTRS_o_ai vector unsigned long long -vec_min(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_min(__vector unsigned long long __a, __vector unsigned long long __b) { return vec_sel(__a, __b, vec_cmpgt(__a, __b)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_min(vector unsigned long long __a, vector bool long long __b) { - vector unsigned long long __bc = (vector unsigned long long)__b; +static inline __ATTRS_o_ai __vector unsigned long long +vec_min(__vector unsigned long long __a, __vector __bool long long __b) { + __vector unsigned long long __bc = (__vector unsigned long long)__b; return vec_sel(__a, __bc, vec_cmpgt(__a, __bc)); } // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_min(vector bool long long __a, vector unsigned long long __b) { - vector unsigned long long __ac = (vector unsigned long long)__a; +static inline __ATTRS_o_ai __vector unsigned long long +vec_min(__vector __bool long long __a, __vector unsigned long long __b) { + __vector unsigned long long __ac = (__vector unsigned long long)__a; return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_min(vector float __a, vector float __b) { +static inline __ATTRS_o_ai __vector float +vec_min(__vector float __a, __vector float __b) { return __builtin_s390_vfminsb(__a, __b, 0); } #endif -static inline __ATTRS_o_ai vector double -vec_min(vector double __a, vector double __b) { +static inline __ATTRS_o_ai __vector double +vec_min(__vector double __a, __vector double __b) { #if __ARCH__ >= 12 return __builtin_s390_vfmindb(__a, __b, 0); #else @@ -8190,439 +8262,440 @@ vec_min(vector double __a, vector double __b) { /*-- vec_add_u128 -----------------------------------------------------------*/ -static inline __ATTRS_ai vector unsigned char -vec_add_u128(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_ai __vector unsigned char +vec_add_u128(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vaq(__a, __b); } /*-- vec_addc ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_addc(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_addc(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vaccb(__a, __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_addc(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_addc(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vacch(__a, __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_addc(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_addc(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vaccf(__a, __b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_addc(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_addc(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vaccg(__a, __b); } /*-- vec_addc_u128 ----------------------------------------------------------*/ -static inline __ATTRS_ai vector unsigned char -vec_addc_u128(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_ai __vector unsigned char +vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vaccq(__a, __b); } /*-- vec_adde_u128 ----------------------------------------------------------*/ -static inline __ATTRS_ai vector unsigned char -vec_adde_u128(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { +static inline __ATTRS_ai __vector unsigned char +vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { return __builtin_s390_vacq(__a, __b, __c); } /*-- vec_addec_u128 ---------------------------------------------------------*/ -static inline __ATTRS_ai vector unsigned char -vec_addec_u128(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { +static inline __ATTRS_ai __vector unsigned char +vec_addec_u128(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { return __builtin_s390_vacccq(__a, __b, __c); } /*-- vec_avg ----------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_avg(vector signed char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_avg(__vector signed char __a, __vector signed char __b) { return __builtin_s390_vavgb(__a, __b); } -static inline __ATTRS_o_ai vector signed short -vec_avg(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_avg(__vector signed short __a, __vector signed short __b) { return __builtin_s390_vavgh(__a, __b); } -static inline __ATTRS_o_ai vector signed int -vec_avg(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_avg(__vector signed int __a, __vector signed int __b) { return __builtin_s390_vavgf(__a, __b); } -static inline __ATTRS_o_ai vector signed long long -vec_avg(vector signed long long __a, vector signed long long __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_avg(__vector signed long long __a, __vector signed long long __b) { return __builtin_s390_vavgg(__a, __b); } -static inline __ATTRS_o_ai vector unsigned char -vec_avg(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_avg(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vavglb(__a, __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_avg(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_avg(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vavglh(__a, __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_avg(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_avg(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vavglf(__a, __b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_avg(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_avg(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vavglg(__a, __b); } /*-- vec_checksum -----------------------------------------------------------*/ -static inline __ATTRS_ai vector unsigned int -vec_checksum(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_ai __vector unsigned int +vec_checksum(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vcksm(__a, __b); } /*-- vec_gfmsum -------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned short -vec_gfmsum(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_gfmsum(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vgfmb(__a, __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_gfmsum(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_gfmsum(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vgfmh(__a, __b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_gfmsum(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_gfmsum(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vgfmf(__a, __b); } /*-- vec_gfmsum_128 ---------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_gfmsum_128(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_gfmsum_128(__vector unsigned long long __a, + __vector unsigned long long __b) { return __builtin_s390_vgfmg(__a, __b); } /*-- vec_gfmsum_accum -------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned short -vec_gfmsum_accum(vector unsigned char __a, vector unsigned char __b, - vector unsigned short __c) { +static inline __ATTRS_o_ai __vector unsigned short +vec_gfmsum_accum(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned short __c) { return __builtin_s390_vgfmab(__a, __b, __c); } -static inline __ATTRS_o_ai vector unsigned int -vec_gfmsum_accum(vector unsigned short __a, vector unsigned short __b, - vector unsigned int __c) { +static inline __ATTRS_o_ai __vector unsigned int +vec_gfmsum_accum(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned int __c) { return __builtin_s390_vgfmah(__a, __b, __c); } -static inline __ATTRS_o_ai vector unsigned long long -vec_gfmsum_accum(vector unsigned int __a, vector unsigned int __b, - vector unsigned long long __c) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_gfmsum_accum(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned long long __c) { return __builtin_s390_vgfmaf(__a, __b, __c); } /*-- vec_gfmsum_accum_128 ---------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_gfmsum_accum_128(vector unsigned long long __a, - vector unsigned long long __b, - vector unsigned char __c) { +static inline __ATTRS_o_ai __vector unsigned char +vec_gfmsum_accum_128(__vector unsigned long long __a, + __vector unsigned long long __b, + __vector unsigned char __c) { return __builtin_s390_vgfmag(__a, __b, __c); } /*-- vec_mladd --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_mladd(vector signed char __a, vector signed char __b, - vector signed char __c) { +static inline __ATTRS_o_ai __vector signed char +vec_mladd(__vector signed char __a, __vector signed char __b, + __vector signed char __c) { return __a * __b + __c; } -static inline __ATTRS_o_ai vector signed char -vec_mladd(vector unsigned char __a, vector signed char __b, - vector signed char __c) { - return (vector signed char)__a * __b + __c; +static inline __ATTRS_o_ai __vector signed char +vec_mladd(__vector unsigned char __a, __vector signed char __b, + __vector signed char __c) { + return (__vector signed char)__a * __b + __c; } -static inline __ATTRS_o_ai vector signed char -vec_mladd(vector signed char __a, vector unsigned char __b, - vector unsigned char __c) { - return __a * (vector signed char)__b + (vector signed char)__c; +static inline __ATTRS_o_ai __vector signed char +vec_mladd(__vector signed char __a, __vector unsigned char __b, + __vector unsigned char __c) { + return __a * (__vector signed char)__b + (__vector signed char)__c; } -static inline __ATTRS_o_ai vector unsigned char -vec_mladd(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { +static inline __ATTRS_o_ai __vector unsigned char +vec_mladd(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { return __a * __b + __c; } -static inline __ATTRS_o_ai vector signed short -vec_mladd(vector signed short __a, vector signed short __b, - vector signed short __c) { +static inline __ATTRS_o_ai __vector signed short +vec_mladd(__vector signed short __a, __vector signed short __b, + __vector signed short __c) { return __a * __b + __c; } -static inline __ATTRS_o_ai vector signed short -vec_mladd(vector unsigned short __a, vector signed short __b, - vector signed short __c) { - return (vector signed short)__a * __b + __c; +static inline __ATTRS_o_ai __vector signed short +vec_mladd(__vector unsigned short __a, __vector signed short __b, + __vector signed short __c) { + return (__vector signed short)__a * __b + __c; } -static inline __ATTRS_o_ai vector signed short -vec_mladd(vector signed short __a, vector unsigned short __b, - vector unsigned short __c) { - return __a * (vector signed short)__b + (vector signed short)__c; +static inline __ATTRS_o_ai __vector signed short +vec_mladd(__vector signed short __a, __vector unsigned short __b, + __vector unsigned short __c) { + return __a * (__vector signed short)__b + (__vector signed short)__c; } -static inline __ATTRS_o_ai vector unsigned short -vec_mladd(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c) { +static inline __ATTRS_o_ai __vector unsigned short +vec_mladd(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c) { return __a * __b + __c; } -static inline __ATTRS_o_ai vector signed int -vec_mladd(vector signed int __a, vector signed int __b, - vector signed int __c) { +static inline __ATTRS_o_ai __vector signed int +vec_mladd(__vector signed int __a, __vector signed int __b, + __vector signed int __c) { return __a * __b + __c; } -static inline __ATTRS_o_ai vector signed int -vec_mladd(vector unsigned int __a, vector signed int __b, - vector signed int __c) { - return (vector signed int)__a * __b + __c; +static inline __ATTRS_o_ai __vector signed int +vec_mladd(__vector unsigned int __a, __vector signed int __b, + __vector signed int __c) { + return (__vector signed int)__a * __b + __c; } -static inline __ATTRS_o_ai vector signed int -vec_mladd(vector signed int __a, vector unsigned int __b, - vector unsigned int __c) { - return __a * (vector signed int)__b + (vector signed int)__c; +static inline __ATTRS_o_ai __vector signed int +vec_mladd(__vector signed int __a, __vector unsigned int __b, + __vector unsigned int __c) { + return __a * (__vector signed int)__b + (__vector signed int)__c; } -static inline __ATTRS_o_ai vector unsigned int -vec_mladd(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c) { +static inline __ATTRS_o_ai __vector unsigned int +vec_mladd(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c) { return __a * __b + __c; } /*-- vec_mhadd --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_mhadd(vector signed char __a, vector signed char __b, - vector signed char __c) { +static inline __ATTRS_o_ai __vector signed char +vec_mhadd(__vector signed char __a, __vector signed char __b, + __vector signed char __c) { return __builtin_s390_vmahb(__a, __b, __c); } -static inline __ATTRS_o_ai vector unsigned char -vec_mhadd(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { +static inline __ATTRS_o_ai __vector unsigned char +vec_mhadd(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { return __builtin_s390_vmalhb(__a, __b, __c); } -static inline __ATTRS_o_ai vector signed short -vec_mhadd(vector signed short __a, vector signed short __b, - vector signed short __c) { +static inline __ATTRS_o_ai __vector signed short +vec_mhadd(__vector signed short __a, __vector signed short __b, + __vector signed short __c) { return __builtin_s390_vmahh(__a, __b, __c); } -static inline __ATTRS_o_ai vector unsigned short -vec_mhadd(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c) { +static inline __ATTRS_o_ai __vector unsigned short +vec_mhadd(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c) { return __builtin_s390_vmalhh(__a, __b, __c); } -static inline __ATTRS_o_ai vector signed int -vec_mhadd(vector signed int __a, vector signed int __b, - vector signed int __c) { +static inline __ATTRS_o_ai __vector signed int +vec_mhadd(__vector signed int __a, __vector signed int __b, + __vector signed int __c) { return __builtin_s390_vmahf(__a, __b, __c); } -static inline __ATTRS_o_ai vector unsigned int -vec_mhadd(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c) { +static inline __ATTRS_o_ai __vector unsigned int +vec_mhadd(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c) { return __builtin_s390_vmalhf(__a, __b, __c); } /*-- vec_meadd --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed short -vec_meadd(vector signed char __a, vector signed char __b, - vector signed short __c) { +static inline __ATTRS_o_ai __vector signed short +vec_meadd(__vector signed char __a, __vector signed char __b, + __vector signed short __c) { return __builtin_s390_vmaeb(__a, __b, __c); } -static inline __ATTRS_o_ai vector unsigned short -vec_meadd(vector unsigned char __a, vector unsigned char __b, - vector unsigned short __c) { +static inline __ATTRS_o_ai __vector unsigned short +vec_meadd(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned short __c) { return __builtin_s390_vmaleb(__a, __b, __c); } -static inline __ATTRS_o_ai vector signed int -vec_meadd(vector signed short __a, vector signed short __b, - vector signed int __c) { +static inline __ATTRS_o_ai __vector signed int +vec_meadd(__vector signed short __a, __vector signed short __b, + __vector signed int __c) { return __builtin_s390_vmaeh(__a, __b, __c); } -static inline __ATTRS_o_ai vector unsigned int -vec_meadd(vector unsigned short __a, vector unsigned short __b, - vector unsigned int __c) { +static inline __ATTRS_o_ai __vector unsigned int +vec_meadd(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned int __c) { return __builtin_s390_vmaleh(__a, __b, __c); } -static inline __ATTRS_o_ai vector signed long long -vec_meadd(vector signed int __a, vector signed int __b, - vector signed long long __c) { +static inline __ATTRS_o_ai __vector signed long long +vec_meadd(__vector signed int __a, __vector signed int __b, + __vector signed long long __c) { return __builtin_s390_vmaef(__a, __b, __c); } -static inline __ATTRS_o_ai vector unsigned long long -vec_meadd(vector unsigned int __a, vector unsigned int __b, - vector unsigned long long __c) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_meadd(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned long long __c) { return __builtin_s390_vmalef(__a, __b, __c); } /*-- vec_moadd --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed short -vec_moadd(vector signed char __a, vector signed char __b, - vector signed short __c) { +static inline __ATTRS_o_ai __vector signed short +vec_moadd(__vector signed char __a, __vector signed char __b, + __vector signed short __c) { return __builtin_s390_vmaob(__a, __b, __c); } -static inline __ATTRS_o_ai vector unsigned short -vec_moadd(vector unsigned char __a, vector unsigned char __b, - vector unsigned short __c) { +static inline __ATTRS_o_ai __vector unsigned short +vec_moadd(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned short __c) { return __builtin_s390_vmalob(__a, __b, __c); } -static inline __ATTRS_o_ai vector signed int -vec_moadd(vector signed short __a, vector signed short __b, - vector signed int __c) { +static inline __ATTRS_o_ai __vector signed int +vec_moadd(__vector signed short __a, __vector signed short __b, + __vector signed int __c) { return __builtin_s390_vmaoh(__a, __b, __c); } -static inline __ATTRS_o_ai vector unsigned int -vec_moadd(vector unsigned short __a, vector unsigned short __b, - vector unsigned int __c) { +static inline __ATTRS_o_ai __vector unsigned int +vec_moadd(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned int __c) { return __builtin_s390_vmaloh(__a, __b, __c); } -static inline __ATTRS_o_ai vector signed long long -vec_moadd(vector signed int __a, vector signed int __b, - vector signed long long __c) { +static inline __ATTRS_o_ai __vector signed long long +vec_moadd(__vector signed int __a, __vector signed int __b, + __vector signed long long __c) { return __builtin_s390_vmaof(__a, __b, __c); } -static inline __ATTRS_o_ai vector unsigned long long -vec_moadd(vector unsigned int __a, vector unsigned int __b, - vector unsigned long long __c) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_moadd(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned long long __c) { return __builtin_s390_vmalof(__a, __b, __c); } /*-- vec_mulh ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_mulh(vector signed char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed char +vec_mulh(__vector signed char __a, __vector signed char __b) { return __builtin_s390_vmhb(__a, __b); } -static inline __ATTRS_o_ai vector unsigned char -vec_mulh(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_mulh(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vmlhb(__a, __b); } -static inline __ATTRS_o_ai vector signed short -vec_mulh(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed short +vec_mulh(__vector signed short __a, __vector signed short __b) { return __builtin_s390_vmhh(__a, __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_mulh(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_mulh(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vmlhh(__a, __b); } -static inline __ATTRS_o_ai vector signed int -vec_mulh(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed int +vec_mulh(__vector signed int __a, __vector signed int __b) { return __builtin_s390_vmhf(__a, __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_mulh(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_mulh(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vmlhf(__a, __b); } /*-- vec_mule ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed short -vec_mule(vector signed char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed short +vec_mule(__vector signed char __a, __vector signed char __b) { return __builtin_s390_vmeb(__a, __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_mule(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_mule(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vmleb(__a, __b); } -static inline __ATTRS_o_ai vector signed int -vec_mule(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed int +vec_mule(__vector signed short __a, __vector signed short __b) { return __builtin_s390_vmeh(__a, __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_mule(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_mule(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vmleh(__a, __b); } -static inline __ATTRS_o_ai vector signed long long -vec_mule(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_mule(__vector signed int __a, __vector signed int __b) { return __builtin_s390_vmef(__a, __b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_mule(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_mule(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vmlef(__a, __b); } /*-- vec_mulo ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed short -vec_mulo(vector signed char __a, vector signed char __b) { +static inline __ATTRS_o_ai __vector signed short +vec_mulo(__vector signed char __a, __vector signed char __b) { return __builtin_s390_vmob(__a, __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_mulo(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_mulo(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vmlob(__a, __b); } -static inline __ATTRS_o_ai vector signed int -vec_mulo(vector signed short __a, vector signed short __b) { +static inline __ATTRS_o_ai __vector signed int +vec_mulo(__vector signed short __a, __vector signed short __b) { return __builtin_s390_vmoh(__a, __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_mulo(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_mulo(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vmloh(__a, __b); } -static inline __ATTRS_o_ai vector signed long long -vec_mulo(vector signed int __a, vector signed int __b) { +static inline __ATTRS_o_ai __vector signed long long +vec_mulo(__vector signed int __a, __vector signed int __b) { return __builtin_s390_vmof(__a, __b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_mulo(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_mulo(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vmlof(__a, __b); } @@ -8630,198 +8703,199 @@ vec_mulo(vector unsigned int __a, vector unsigned int __b) { #if __ARCH__ >= 12 #define vec_msum_u128(X, Y, Z, W) \ - ((vector unsigned char)__builtin_s390_vmslg((X), (Y), (Z), (W))); + ((__vector unsigned char)__builtin_s390_vmslg((X), (Y), (Z), (W))); #endif /*-- vec_sub_u128 -----------------------------------------------------------*/ -static inline __ATTRS_ai vector unsigned char -vec_sub_u128(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_ai __vector unsigned char +vec_sub_u128(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsq(__a, __b); } /*-- vec_subc ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_subc(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_subc(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vscbib(__a, __b); } -static inline __ATTRS_o_ai vector unsigned short -vec_subc(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_subc(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vscbih(__a, __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_subc(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_subc(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vscbif(__a, __b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_subc(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_subc(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vscbig(__a, __b); } /*-- vec_subc_u128 ----------------------------------------------------------*/ -static inline __ATTRS_ai vector unsigned char -vec_subc_u128(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_ai __vector unsigned char +vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vscbiq(__a, __b); } /*-- vec_sube_u128 ----------------------------------------------------------*/ -static inline __ATTRS_ai vector unsigned char -vec_sube_u128(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { +static inline __ATTRS_ai __vector unsigned char +vec_sube_u128(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { return __builtin_s390_vsbiq(__a, __b, __c); } /*-- vec_subec_u128 ---------------------------------------------------------*/ -static inline __ATTRS_ai vector unsigned char -vec_subec_u128(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { +static inline __ATTRS_ai __vector unsigned char +vec_subec_u128(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { return __builtin_s390_vsbcbiq(__a, __b, __c); } /*-- vec_sum2 ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned long long -vec_sum2(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_sum2(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vsumgh(__a, __b); } -static inline __ATTRS_o_ai vector unsigned long long -vec_sum2(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned long long +vec_sum2(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vsumgf(__a, __b); } /*-- vec_sum_u128 -----------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_sum_u128(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_sum_u128(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vsumqf(__a, __b); } -static inline __ATTRS_o_ai vector unsigned char -vec_sum_u128(vector unsigned long long __a, vector unsigned long long __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_sum_u128(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vsumqg(__a, __b); } /*-- vec_sum4 ---------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned int -vec_sum4(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_sum4(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsumb(__a, __b); } -static inline __ATTRS_o_ai vector unsigned int -vec_sum4(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_sum4(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vsumh(__a, __b); } /*-- vec_test_mask ----------------------------------------------------------*/ static inline __ATTRS_o_ai int -vec_test_mask(vector signed char __a, vector unsigned char __b) { - return __builtin_s390_vtm((vector unsigned char)__a, - (vector unsigned char)__b); +vec_test_mask(__vector signed char __a, __vector unsigned char __b) { + return __builtin_s390_vtm((__vector unsigned char)__a, + (__vector unsigned char)__b); } static inline __ATTRS_o_ai int -vec_test_mask(vector unsigned char __a, vector unsigned char __b) { +vec_test_mask(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vtm(__a, __b); } static inline __ATTRS_o_ai int -vec_test_mask(vector signed short __a, vector unsigned short __b) { - return __builtin_s390_vtm((vector unsigned char)__a, - (vector unsigned char)__b); +vec_test_mask(__vector signed short __a, __vector unsigned short __b) { + return __builtin_s390_vtm((__vector unsigned char)__a, + (__vector unsigned char)__b); } static inline __ATTRS_o_ai int -vec_test_mask(vector unsigned short __a, vector unsigned short __b) { - return __builtin_s390_vtm((vector unsigned char)__a, - (vector unsigned char)__b); +vec_test_mask(__vector unsigned short __a, __vector unsigned short __b) { + return __builtin_s390_vtm((__vector unsigned char)__a, + (__vector unsigned char)__b); } static inline __ATTRS_o_ai int -vec_test_mask(vector signed int __a, vector unsigned int __b) { - return __builtin_s390_vtm((vector unsigned char)__a, - (vector unsigned char)__b); +vec_test_mask(__vector signed int __a, __vector unsigned int __b) { + return __builtin_s390_vtm((__vector unsigned char)__a, + (__vector unsigned char)__b); } static inline __ATTRS_o_ai int -vec_test_mask(vector unsigned int __a, vector unsigned int __b) { - return __builtin_s390_vtm((vector unsigned char)__a, - (vector unsigned char)__b); +vec_test_mask(__vector unsigned int __a, __vector unsigned int __b) { + return __builtin_s390_vtm((__vector unsigned char)__a, + (__vector unsigned char)__b); } static inline __ATTRS_o_ai int -vec_test_mask(vector signed long long __a, vector unsigned long long __b) { - return __builtin_s390_vtm((vector unsigned char)__a, - (vector unsigned char)__b); +vec_test_mask(__vector signed long long __a, __vector unsigned long long __b) { + return __builtin_s390_vtm((__vector unsigned char)__a, + (__vector unsigned char)__b); } static inline __ATTRS_o_ai int -vec_test_mask(vector unsigned long long __a, vector unsigned long long __b) { - return __builtin_s390_vtm((vector unsigned char)__a, - (vector unsigned char)__b); +vec_test_mask(__vector unsigned long long __a, + __vector unsigned long long __b) { + return __builtin_s390_vtm((__vector unsigned char)__a, + (__vector unsigned char)__b); } #if __ARCH__ >= 12 static inline __ATTRS_o_ai int -vec_test_mask(vector float __a, vector unsigned int __b) { - return __builtin_s390_vtm((vector unsigned char)__a, - (vector unsigned char)__b); +vec_test_mask(__vector float __a, __vector unsigned int __b) { + return __builtin_s390_vtm((__vector unsigned char)__a, + (__vector unsigned char)__b); } #endif static inline __ATTRS_o_ai int -vec_test_mask(vector double __a, vector unsigned long long __b) { - return __builtin_s390_vtm((vector unsigned char)__a, - (vector unsigned char)__b); +vec_test_mask(__vector double __a, __vector unsigned long long __b) { + return __builtin_s390_vtm((__vector unsigned char)__a, + (__vector unsigned char)__b); } /*-- vec_madd ---------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_madd(vector float __a, vector float __b, vector float __c) { +static inline __ATTRS_o_ai __vector float +vec_madd(__vector float __a, __vector float __b, __vector float __c) { return __builtin_s390_vfmasb(__a, __b, __c); } #endif -static inline __ATTRS_o_ai vector double -vec_madd(vector double __a, vector double __b, vector double __c) { +static inline __ATTRS_o_ai __vector double +vec_madd(__vector double __a, __vector double __b, __vector double __c) { return __builtin_s390_vfmadb(__a, __b, __c); } /*-- vec_msub ---------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_msub(vector float __a, vector float __b, vector float __c) { +static inline __ATTRS_o_ai __vector float +vec_msub(__vector float __a, __vector float __b, __vector float __c) { return __builtin_s390_vfmssb(__a, __b, __c); } #endif -static inline __ATTRS_o_ai vector double -vec_msub(vector double __a, vector double __b, vector double __c) { +static inline __ATTRS_o_ai __vector double +vec_msub(__vector double __a, __vector double __b, __vector double __c) { return __builtin_s390_vfmsdb(__a, __b, __c); } /*-- vec_nmadd ---------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_nmadd(vector float __a, vector float __b, vector float __c) { +static inline __ATTRS_o_ai __vector float +vec_nmadd(__vector float __a, __vector float __b, __vector float __c) { return __builtin_s390_vfnmasb(__a, __b, __c); } -static inline __ATTRS_o_ai vector double -vec_nmadd(vector double __a, vector double __b, vector double __c) { +static inline __ATTRS_o_ai __vector double +vec_nmadd(__vector double __a, __vector double __b, __vector double __c) { return __builtin_s390_vfnmadb(__a, __b, __c); } #endif @@ -8829,13 +8903,13 @@ vec_nmadd(vector double __a, vector double __b, vector double __c) { /*-- vec_nmsub ---------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_nmsub(vector float __a, vector float __b, vector float __c) { +static inline __ATTRS_o_ai __vector float +vec_nmsub(__vector float __a, __vector float __b, __vector float __c) { return __builtin_s390_vfnmssb(__a, __b, __c); } -static inline __ATTRS_o_ai vector double -vec_nmsub(vector double __a, vector double __b, vector double __c) { +static inline __ATTRS_o_ai __vector double +vec_nmsub(__vector double __a, __vector double __b, __vector double __c) { return __builtin_s390_vfnmsdb(__a, __b, __c); } #endif @@ -8843,31 +8917,31 @@ vec_nmsub(vector double __a, vector double __b, vector double __c) { /*-- vec_sqrt ---------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_sqrt(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_sqrt(__vector float __a) { return __builtin_s390_vfsqsb(__a); } #endif -static inline __ATTRS_o_ai vector double -vec_sqrt(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_sqrt(__vector double __a) { return __builtin_s390_vfsqdb(__a); } /*-- vec_ld2f ---------------------------------------------------------------*/ // This prototype is deprecated. -static inline __ATTRS_ai vector double +static inline __ATTRS_ai __vector double vec_ld2f(const float *__ptr) { typedef float __v2f32 __attribute__((__vector_size__(8))); - return __builtin_convertvector(*(const __v2f32 *)__ptr, vector double); + return __builtin_convertvector(*(const __v2f32 *)__ptr, __vector double); } /*-- vec_st2f ---------------------------------------------------------------*/ // This prototype is deprecated. static inline __ATTRS_ai void -vec_st2f(vector double __a, float *__ptr) { +vec_st2f(__vector double __a, float *__ptr) { typedef float __v2f32 __attribute__((__vector_size__(8))); *(__v2f32 *)__ptr = __builtin_convertvector(__a, __v2f32); } @@ -8875,59 +8949,63 @@ vec_st2f(vector double __a, float *__ptr) { /*-- vec_ctd ----------------------------------------------------------------*/ // This prototype is deprecated. -static inline __ATTRS_o_ai vector double -vec_ctd(vector signed long long __a, int __b) +static inline __ATTRS_o_ai __vector double +vec_ctd(__vector signed long long __a, int __b) __constant_range(__b, 0, 31) { - vector double __conv = __builtin_convertvector(__a, vector double); - __conv *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52); + __vector double __conv = __builtin_convertvector(__a, __vector double); + __conv *= ((__vector double)(__vector unsigned long long) + ((0x3ffULL - __b) << 52)); return __conv; } // This prototype is deprecated. -static inline __ATTRS_o_ai vector double -vec_ctd(vector unsigned long long __a, int __b) +static inline __ATTRS_o_ai __vector double +vec_ctd(__vector unsigned long long __a, int __b) __constant_range(__b, 0, 31) { - vector double __conv = __builtin_convertvector(__a, vector double); - __conv *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52); + __vector double __conv = __builtin_convertvector(__a, __vector double); + __conv *= ((__vector double)(__vector unsigned long long) + ((0x3ffULL - __b) << 52)); return __conv; } /*-- vec_ctsl ---------------------------------------------------------------*/ // This prototype is deprecated. -static inline __ATTRS_o_ai vector signed long long -vec_ctsl(vector double __a, int __b) +static inline __ATTRS_o_ai __vector signed long long +vec_ctsl(__vector double __a, int __b) __constant_range(__b, 0, 31) { - __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52); - return __builtin_convertvector(__a, vector signed long long); + __a *= ((__vector double)(__vector unsigned long long) + ((0x3ffULL + __b) << 52)); + return __builtin_convertvector(__a, __vector signed long long); } /*-- vec_ctul ---------------------------------------------------------------*/ // This prototype is deprecated. -static inline __ATTRS_o_ai vector unsigned long long -vec_ctul(vector double __a, int __b) +static inline __ATTRS_o_ai __vector unsigned long long +vec_ctul(__vector double __a, int __b) __constant_range(__b, 0, 31) { - __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52); - return __builtin_convertvector(__a, vector unsigned long long); + __a *= ((__vector double)(__vector unsigned long long) + ((0x3ffULL + __b) << 52)); + return __builtin_convertvector(__a, __vector unsigned long long); } /*-- vec_doublee ------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_ai vector double -vec_doublee(vector float __a) { +static inline __ATTRS_ai __vector double +vec_doublee(__vector float __a) { typedef float __v2f32 __attribute__((__vector_size__(8))); __v2f32 __pack = __builtin_shufflevector(__a, __a, 0, 2); - return __builtin_convertvector(__pack, vector double); + return __builtin_convertvector(__pack, __vector double); } #endif /*-- vec_floate -------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_ai vector float -vec_floate(vector double __a) { +static inline __ATTRS_ai __vector float +vec_floate(__vector double __a) { typedef float __v2f32 __attribute__((__vector_size__(8))); __v2f32 __pack = __builtin_convertvector(__a, __v2f32); return __builtin_shufflevector(__pack, __pack, 0, -1, 1, -1); @@ -8936,86 +9014,86 @@ vec_floate(vector double __a) { /*-- vec_double -------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector double -vec_double(vector signed long long __a) { - return __builtin_convertvector(__a, vector double); +static inline __ATTRS_o_ai __vector double +vec_double(__vector signed long long __a) { + return __builtin_convertvector(__a, __vector double); } -static inline __ATTRS_o_ai vector double -vec_double(vector unsigned long long __a) { - return __builtin_convertvector(__a, vector double); +static inline __ATTRS_o_ai __vector double +vec_double(__vector unsigned long long __a) { + return __builtin_convertvector(__a, __vector double); } /*-- vec_float --------------------------------------------------------------*/ #if __ARCH__ >= 13 -static inline __ATTRS_o_ai vector float -vec_float(vector signed int __a) { - return __builtin_convertvector(__a, vector float); +static inline __ATTRS_o_ai __vector float +vec_float(__vector signed int __a) { + return __builtin_convertvector(__a, __vector float); } -static inline __ATTRS_o_ai vector float -vec_float(vector unsigned int __a) { - return __builtin_convertvector(__a, vector float); +static inline __ATTRS_o_ai __vector float +vec_float(__vector unsigned int __a) { + return __builtin_convertvector(__a, __vector float); } #endif /*-- vec_signed -------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed long long -vec_signed(vector double __a) { - return __builtin_convertvector(__a, vector signed long long); +static inline __ATTRS_o_ai __vector signed long long +vec_signed(__vector double __a) { + return __builtin_convertvector(__a, __vector signed long long); } #if __ARCH__ >= 13 -static inline __ATTRS_o_ai vector signed int -vec_signed(vector float __a) { - return __builtin_convertvector(__a, vector signed int); +static inline __ATTRS_o_ai __vector signed int +vec_signed(__vector float __a) { + return __builtin_convertvector(__a, __vector signed int); } #endif /*-- vec_unsigned -----------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned long long -vec_unsigned(vector double __a) { - return __builtin_convertvector(__a, vector unsigned long long); +static inline __ATTRS_o_ai __vector unsigned long long +vec_unsigned(__vector double __a) { + return __builtin_convertvector(__a, __vector unsigned long long); } #if __ARCH__ >= 13 -static inline __ATTRS_o_ai vector unsigned int -vec_unsigned(vector float __a) { - return __builtin_convertvector(__a, vector unsigned int); +static inline __ATTRS_o_ai __vector unsigned int +vec_unsigned(__vector float __a) { + return __builtin_convertvector(__a, __vector unsigned int); } #endif /*-- vec_roundp -------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_roundp(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_roundp(__vector float __a) { return __builtin_s390_vfisb(__a, 4, 6); } #endif -static inline __ATTRS_o_ai vector double -vec_roundp(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_roundp(__vector double __a) { return __builtin_s390_vfidb(__a, 4, 6); } /*-- vec_ceil ---------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_ceil(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_ceil(__vector float __a) { // On this platform, vec_ceil never triggers the IEEE-inexact exception. return __builtin_s390_vfisb(__a, 4, 6); } #endif -static inline __ATTRS_o_ai vector double -vec_ceil(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_ceil(__vector double __a) { // On this platform, vec_ceil never triggers the IEEE-inexact exception. return __builtin_s390_vfidb(__a, 4, 6); } @@ -9023,29 +9101,29 @@ vec_ceil(vector double __a) { /*-- vec_roundm -------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_roundm(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_roundm(__vector float __a) { return __builtin_s390_vfisb(__a, 4, 7); } #endif -static inline __ATTRS_o_ai vector double -vec_roundm(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_roundm(__vector double __a) { return __builtin_s390_vfidb(__a, 4, 7); } /*-- vec_floor --------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_floor(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_floor(__vector float __a) { // On this platform, vec_floor never triggers the IEEE-inexact exception. return __builtin_s390_vfisb(__a, 4, 7); } #endif -static inline __ATTRS_o_ai vector double -vec_floor(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_floor(__vector double __a) { // On this platform, vec_floor never triggers the IEEE-inexact exception. return __builtin_s390_vfidb(__a, 4, 7); } @@ -9053,29 +9131,29 @@ vec_floor(vector double __a) { /*-- vec_roundz -------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_roundz(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_roundz(__vector float __a) { return __builtin_s390_vfisb(__a, 4, 5); } #endif -static inline __ATTRS_o_ai vector double -vec_roundz(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_roundz(__vector double __a) { return __builtin_s390_vfidb(__a, 4, 5); } /*-- vec_trunc --------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_trunc(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_trunc(__vector float __a) { // On this platform, vec_trunc never triggers the IEEE-inexact exception. return __builtin_s390_vfisb(__a, 4, 5); } #endif -static inline __ATTRS_o_ai vector double -vec_trunc(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_trunc(__vector double __a) { // On this platform, vec_trunc never triggers the IEEE-inexact exception. return __builtin_s390_vfidb(__a, 4, 5); } @@ -9083,29 +9161,29 @@ vec_trunc(vector double __a) { /*-- vec_roundc -------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_roundc(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_roundc(__vector float __a) { return __builtin_s390_vfisb(__a, 4, 0); } #endif -static inline __ATTRS_o_ai vector double -vec_roundc(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_roundc(__vector double __a) { return __builtin_s390_vfidb(__a, 4, 0); } /*-- vec_rint ---------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_rint(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_rint(__vector float __a) { // vec_rint may trigger the IEEE-inexact exception. return __builtin_s390_vfisb(__a, 0, 0); } #endif -static inline __ATTRS_o_ai vector double -vec_rint(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_rint(__vector double __a) { // vec_rint may trigger the IEEE-inexact exception. return __builtin_s390_vfidb(__a, 0, 0); } @@ -9113,45 +9191,45 @@ vec_rint(vector double __a) { /*-- vec_round --------------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_o_ai vector float -vec_round(vector float __a) { +static inline __ATTRS_o_ai __vector float +vec_round(__vector float __a) { return __builtin_s390_vfisb(__a, 4, 4); } #endif -static inline __ATTRS_o_ai vector double -vec_round(vector double __a) { +static inline __ATTRS_o_ai __vector double +vec_round(__vector double __a) { return __builtin_s390_vfidb(__a, 4, 4); } /*-- vec_fp_test_data_class -------------------------------------------------*/ #if __ARCH__ >= 12 -extern __ATTRS_o vector bool int -vec_fp_test_data_class(vector float __a, int __b, int *__c) +extern __ATTRS_o __vector __bool int +vec_fp_test_data_class(__vector float __a, int __b, int *__c) __constant_range(__b, 0, 4095); -extern __ATTRS_o vector bool long long -vec_fp_test_data_class(vector double __a, int __b, int *__c) +extern __ATTRS_o __vector __bool long long +vec_fp_test_data_class(__vector double __a, int __b, int *__c) __constant_range(__b, 0, 4095); #define vec_fp_test_data_class(X, Y, Z) \ ((__typeof__((vec_fp_test_data_class)((X), (Y), (Z)))) \ __extension__ ({ \ - vector unsigned char __res; \ - vector unsigned char __x = (vector unsigned char)(X); \ + __vector unsigned char __res; \ + __vector unsigned char __x = (__vector unsigned char)(X); \ int *__z = (Z); \ switch (sizeof ((X)[0])) { \ - case 4: __res = (vector unsigned char) \ - __builtin_s390_vftcisb((vector float)__x, (Y), __z); \ + case 4: __res = (__vector unsigned char) \ + __builtin_s390_vftcisb((__vector float)__x, (Y), __z); \ break; \ - default: __res = (vector unsigned char) \ - __builtin_s390_vftcidb((vector double)__x, (Y), __z); \ + default: __res = (__vector unsigned char) \ + __builtin_s390_vftcidb((__vector double)__x, (Y), __z); \ break; \ } __res; })) #else #define vec_fp_test_data_class(X, Y, Z) \ - ((vector bool long long)__builtin_s390_vftcidb((X), (Y), (Z))) + ((__vector __bool long long)__builtin_s390_vftcidb((X), (Y), (Z))) #endif #define __VEC_CLASS_FP_ZERO_P (1 << 11) @@ -9183,1527 +9261,1585 @@ vec_fp_test_data_class(vector double __a, int __b, int *__c) /*-- vec_cp_until_zero ------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_cp_until_zero(vector signed char __a) { - return (vector signed char)__builtin_s390_vistrb((vector unsigned char)__a); +static inline __ATTRS_o_ai __vector signed char +vec_cp_until_zero(__vector signed char __a) { + return ((__vector signed char) + __builtin_s390_vistrb((__vector unsigned char)__a)); } -static inline __ATTRS_o_ai vector bool char -vec_cp_until_zero(vector bool char __a) { - return (vector bool char)__builtin_s390_vistrb((vector unsigned char)__a); +static inline __ATTRS_o_ai __vector __bool char +vec_cp_until_zero(__vector __bool char __a) { + return ((__vector __bool char) + __builtin_s390_vistrb((__vector unsigned char)__a)); } -static inline __ATTRS_o_ai vector unsigned char -vec_cp_until_zero(vector unsigned char __a) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cp_until_zero(__vector unsigned char __a) { return __builtin_s390_vistrb(__a); } -static inline __ATTRS_o_ai vector signed short -vec_cp_until_zero(vector signed short __a) { - return (vector signed short)__builtin_s390_vistrh((vector unsigned short)__a); +static inline __ATTRS_o_ai __vector signed short +vec_cp_until_zero(__vector signed short __a) { + return ((__vector signed short) + __builtin_s390_vistrh((__vector unsigned short)__a)); } -static inline __ATTRS_o_ai vector bool short -vec_cp_until_zero(vector bool short __a) { - return (vector bool short)__builtin_s390_vistrh((vector unsigned short)__a); +static inline __ATTRS_o_ai __vector __bool short +vec_cp_until_zero(__vector __bool short __a) { + return ((__vector __bool short) + __builtin_s390_vistrh((__vector unsigned short)__a)); } -static inline __ATTRS_o_ai vector unsigned short -vec_cp_until_zero(vector unsigned short __a) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cp_until_zero(__vector unsigned short __a) { return __builtin_s390_vistrh(__a); } -static inline __ATTRS_o_ai vector signed int -vec_cp_until_zero(vector signed int __a) { - return (vector signed int)__builtin_s390_vistrf((vector unsigned int)__a); +static inline __ATTRS_o_ai __vector signed int +vec_cp_until_zero(__vector signed int __a) { + return ((__vector signed int) + __builtin_s390_vistrf((__vector unsigned int)__a)); } -static inline __ATTRS_o_ai vector bool int -vec_cp_until_zero(vector bool int __a) { - return (vector bool int)__builtin_s390_vistrf((vector unsigned int)__a); +static inline __ATTRS_o_ai __vector __bool int +vec_cp_until_zero(__vector __bool int __a) { + return ((__vector __bool int) + __builtin_s390_vistrf((__vector unsigned int)__a)); } -static inline __ATTRS_o_ai vector unsigned int -vec_cp_until_zero(vector unsigned int __a) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cp_until_zero(__vector unsigned int __a) { return __builtin_s390_vistrf(__a); } /*-- vec_cp_until_zero_cc ---------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_cp_until_zero_cc(vector signed char __a, int *__cc) { - return (vector signed char) - __builtin_s390_vistrbs((vector unsigned char)__a, __cc); +static inline __ATTRS_o_ai __vector signed char +vec_cp_until_zero_cc(__vector signed char __a, int *__cc) { + return (__vector signed char) + __builtin_s390_vistrbs((__vector unsigned char)__a, __cc); } -static inline __ATTRS_o_ai vector bool char -vec_cp_until_zero_cc(vector bool char __a, int *__cc) { - return (vector bool char) - __builtin_s390_vistrbs((vector unsigned char)__a, __cc); +static inline __ATTRS_o_ai __vector __bool char +vec_cp_until_zero_cc(__vector __bool char __a, int *__cc) { + return (__vector __bool char) + __builtin_s390_vistrbs((__vector unsigned char)__a, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_cp_until_zero_cc(vector unsigned char __a, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cp_until_zero_cc(__vector unsigned char __a, int *__cc) { return __builtin_s390_vistrbs(__a, __cc); } -static inline __ATTRS_o_ai vector signed short -vec_cp_until_zero_cc(vector signed short __a, int *__cc) { - return (vector signed short) - __builtin_s390_vistrhs((vector unsigned short)__a, __cc); +static inline __ATTRS_o_ai __vector signed short +vec_cp_until_zero_cc(__vector signed short __a, int *__cc) { + return (__vector signed short) + __builtin_s390_vistrhs((__vector unsigned short)__a, __cc); } -static inline __ATTRS_o_ai vector bool short -vec_cp_until_zero_cc(vector bool short __a, int *__cc) { - return (vector bool short) - __builtin_s390_vistrhs((vector unsigned short)__a, __cc); +static inline __ATTRS_o_ai __vector __bool short +vec_cp_until_zero_cc(__vector __bool short __a, int *__cc) { + return (__vector __bool short) + __builtin_s390_vistrhs((__vector unsigned short)__a, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cp_until_zero_cc(vector unsigned short __a, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cp_until_zero_cc(__vector unsigned short __a, int *__cc) { return __builtin_s390_vistrhs(__a, __cc); } -static inline __ATTRS_o_ai vector signed int -vec_cp_until_zero_cc(vector signed int __a, int *__cc) { - return (vector signed int) - __builtin_s390_vistrfs((vector unsigned int)__a, __cc); +static inline __ATTRS_o_ai __vector signed int +vec_cp_until_zero_cc(__vector signed int __a, int *__cc) { + return (__vector signed int) + __builtin_s390_vistrfs((__vector unsigned int)__a, __cc); } -static inline __ATTRS_o_ai vector bool int -vec_cp_until_zero_cc(vector bool int __a, int *__cc) { - return (vector bool int)__builtin_s390_vistrfs((vector unsigned int)__a, - __cc); +static inline __ATTRS_o_ai __vector __bool int +vec_cp_until_zero_cc(__vector __bool int __a, int *__cc) { + return (__vector __bool int) + __builtin_s390_vistrfs((__vector unsigned int)__a, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cp_until_zero_cc(vector unsigned int __a, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cp_until_zero_cc(__vector unsigned int __a, int *__cc) { return __builtin_s390_vistrfs(__a, __cc); } /*-- vec_cmpeq_idx ----------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_cmpeq_idx(vector signed char __a, vector signed char __b) { - return (vector signed char) - __builtin_s390_vfeeb((vector unsigned char)__a, - (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_cmpeq_idx(__vector signed char __a, __vector signed char __b) { + return (__vector signed char) + __builtin_s390_vfeeb((__vector unsigned char)__a, + (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpeq_idx(vector bool char __a, vector bool char __b) { - return __builtin_s390_vfeeb((vector unsigned char)__a, - (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpeq_idx(__vector __bool char __a, __vector __bool char __b) { + return __builtin_s390_vfeeb((__vector unsigned char)__a, + (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpeq_idx(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpeq_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfeeb(__a, __b); } -static inline __ATTRS_o_ai vector signed short -vec_cmpeq_idx(vector signed short __a, vector signed short __b) { - return (vector signed short) - __builtin_s390_vfeeh((vector unsigned short)__a, - (vector unsigned short)__b); +static inline __ATTRS_o_ai __vector signed short +vec_cmpeq_idx(__vector signed short __a, __vector signed short __b) { + return (__vector signed short) + __builtin_s390_vfeeh((__vector unsigned short)__a, + (__vector unsigned short)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpeq_idx(vector bool short __a, vector bool short __b) { - return __builtin_s390_vfeeh((vector unsigned short)__a, - (vector unsigned short)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpeq_idx(__vector __bool short __a, __vector __bool short __b) { + return __builtin_s390_vfeeh((__vector unsigned short)__a, + (__vector unsigned short)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpeq_idx(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpeq_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfeeh(__a, __b); } -static inline __ATTRS_o_ai vector signed int -vec_cmpeq_idx(vector signed int __a, vector signed int __b) { - return (vector signed int) - __builtin_s390_vfeef((vector unsigned int)__a, - (vector unsigned int)__b); +static inline __ATTRS_o_ai __vector signed int +vec_cmpeq_idx(__vector signed int __a, __vector signed int __b) { + return (__vector signed int) + __builtin_s390_vfeef((__vector unsigned int)__a, + (__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpeq_idx(vector bool int __a, vector bool int __b) { - return __builtin_s390_vfeef((vector unsigned int)__a, - (vector unsigned int)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpeq_idx(__vector __bool int __a, __vector __bool int __b) { + return __builtin_s390_vfeef((__vector unsigned int)__a, + (__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpeq_idx(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpeq_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfeef(__a, __b); } /*-- vec_cmpeq_idx_cc -------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_cmpeq_idx_cc(vector signed char __a, vector signed char __b, int *__cc) { - return (vector signed char) - __builtin_s390_vfeebs((vector unsigned char)__a, - (vector unsigned char)__b, __cc); +static inline __ATTRS_o_ai __vector signed char +vec_cmpeq_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { + return (__vector signed char) + __builtin_s390_vfeebs((__vector unsigned char)__a, + (__vector unsigned char)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpeq_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { - return __builtin_s390_vfeebs((vector unsigned char)__a, - (vector unsigned char)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpeq_idx_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { + return __builtin_s390_vfeebs((__vector unsigned char)__a, + (__vector unsigned char)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpeq_idx_cc(vector unsigned char __a, vector unsigned char __b, +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpeq_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfeebs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector signed short -vec_cmpeq_idx_cc(vector signed short __a, vector signed short __b, int *__cc) { - return (vector signed short) - __builtin_s390_vfeehs((vector unsigned short)__a, - (vector unsigned short)__b, __cc); +static inline __ATTRS_o_ai __vector signed short +vec_cmpeq_idx_cc(__vector signed short __a, __vector signed short __b, + int *__cc) { + return (__vector signed short) + __builtin_s390_vfeehs((__vector unsigned short)__a, + (__vector unsigned short)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpeq_idx_cc(vector bool short __a, vector bool short __b, int *__cc) { - return __builtin_s390_vfeehs((vector unsigned short)__a, - (vector unsigned short)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpeq_idx_cc(__vector __bool short __a, __vector __bool short __b, int *__cc) { + return __builtin_s390_vfeehs((__vector unsigned short)__a, + (__vector unsigned short)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpeq_idx_cc(vector unsigned short __a, vector unsigned short __b, +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpeq_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfeehs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector signed int -vec_cmpeq_idx_cc(vector signed int __a, vector signed int __b, int *__cc) { - return (vector signed int) - __builtin_s390_vfeefs((vector unsigned int)__a, - (vector unsigned int)__b, __cc); +static inline __ATTRS_o_ai __vector signed int +vec_cmpeq_idx_cc(__vector signed int __a, __vector signed int __b, int *__cc) { + return (__vector signed int) + __builtin_s390_vfeefs((__vector unsigned int)__a, + (__vector unsigned int)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpeq_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { - return __builtin_s390_vfeefs((vector unsigned int)__a, - (vector unsigned int)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpeq_idx_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { + return __builtin_s390_vfeefs((__vector unsigned int)__a, + (__vector unsigned int)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpeq_idx_cc(vector unsigned int __a, vector unsigned int __b, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpeq_idx_cc(__vector unsigned int __a, __vector unsigned int __b, + int *__cc) { return __builtin_s390_vfeefs(__a, __b, __cc); } /*-- vec_cmpeq_or_0_idx -----------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_cmpeq_or_0_idx(vector signed char __a, vector signed char __b) { - return (vector signed char) - __builtin_s390_vfeezb((vector unsigned char)__a, - (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_cmpeq_or_0_idx(__vector signed char __a, __vector signed char __b) { + return (__vector signed char) + __builtin_s390_vfeezb((__vector unsigned char)__a, + (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpeq_or_0_idx(vector bool char __a, vector bool char __b) { - return __builtin_s390_vfeezb((vector unsigned char)__a, - (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpeq_or_0_idx(__vector __bool char __a, __vector __bool char __b) { + return __builtin_s390_vfeezb((__vector unsigned char)__a, + (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpeq_or_0_idx(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpeq_or_0_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfeezb(__a, __b); } -static inline __ATTRS_o_ai vector signed short -vec_cmpeq_or_0_idx(vector signed short __a, vector signed short __b) { - return (vector signed short) - __builtin_s390_vfeezh((vector unsigned short)__a, - (vector unsigned short)__b); +static inline __ATTRS_o_ai __vector signed short +vec_cmpeq_or_0_idx(__vector signed short __a, __vector signed short __b) { + return (__vector signed short) + __builtin_s390_vfeezh((__vector unsigned short)__a, + (__vector unsigned short)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpeq_or_0_idx(vector bool short __a, vector bool short __b) { - return __builtin_s390_vfeezh((vector unsigned short)__a, - (vector unsigned short)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpeq_or_0_idx(__vector __bool short __a, __vector __bool short __b) { + return __builtin_s390_vfeezh((__vector unsigned short)__a, + (__vector unsigned short)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpeq_or_0_idx(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpeq_or_0_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfeezh(__a, __b); } -static inline __ATTRS_o_ai vector signed int -vec_cmpeq_or_0_idx(vector signed int __a, vector signed int __b) { - return (vector signed int) - __builtin_s390_vfeezf((vector unsigned int)__a, - (vector unsigned int)__b); +static inline __ATTRS_o_ai __vector signed int +vec_cmpeq_or_0_idx(__vector signed int __a, __vector signed int __b) { + return (__vector signed int) + __builtin_s390_vfeezf((__vector unsigned int)__a, + (__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpeq_or_0_idx(vector bool int __a, vector bool int __b) { - return __builtin_s390_vfeezf((vector unsigned int)__a, - (vector unsigned int)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpeq_or_0_idx(__vector __bool int __a, __vector __bool int __b) { + return __builtin_s390_vfeezf((__vector unsigned int)__a, + (__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpeq_or_0_idx(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpeq_or_0_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfeezf(__a, __b); } /*-- vec_cmpeq_or_0_idx_cc --------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_cmpeq_or_0_idx_cc(vector signed char __a, vector signed char __b, +static inline __ATTRS_o_ai __vector signed char +vec_cmpeq_or_0_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { - return (vector signed char) - __builtin_s390_vfeezbs((vector unsigned char)__a, - (vector unsigned char)__b, __cc); + return (__vector signed char) + __builtin_s390_vfeezbs((__vector unsigned char)__a, + (__vector unsigned char)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpeq_or_0_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { - return __builtin_s390_vfeezbs((vector unsigned char)__a, - (vector unsigned char)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpeq_or_0_idx_cc(__vector __bool char __a, __vector __bool char __b, + int *__cc) { + return __builtin_s390_vfeezbs((__vector unsigned char)__a, + (__vector unsigned char)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpeq_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpeq_or_0_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfeezbs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector signed short -vec_cmpeq_or_0_idx_cc(vector signed short __a, vector signed short __b, +static inline __ATTRS_o_ai __vector signed short +vec_cmpeq_or_0_idx_cc(__vector signed short __a, __vector signed short __b, int *__cc) { - return (vector signed short) - __builtin_s390_vfeezhs((vector unsigned short)__a, - (vector unsigned short)__b, __cc); + return (__vector signed short) + __builtin_s390_vfeezhs((__vector unsigned short)__a, + (__vector unsigned short)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpeq_or_0_idx_cc(vector bool short __a, vector bool short __b, int *__cc) { - return __builtin_s390_vfeezhs((vector unsigned short)__a, - (vector unsigned short)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpeq_or_0_idx_cc(__vector __bool short __a, __vector __bool short __b, + int *__cc) { + return __builtin_s390_vfeezhs((__vector unsigned short)__a, + (__vector unsigned short)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpeq_or_0_idx_cc(vector unsigned short __a, vector unsigned short __b, +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpeq_or_0_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfeezhs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector signed int -vec_cmpeq_or_0_idx_cc(vector signed int __a, vector signed int __b, int *__cc) { - return (vector signed int) - __builtin_s390_vfeezfs((vector unsigned int)__a, - (vector unsigned int)__b, __cc); +static inline __ATTRS_o_ai __vector signed int +vec_cmpeq_or_0_idx_cc(__vector signed int __a, __vector signed int __b, + int *__cc) { + return (__vector signed int) + __builtin_s390_vfeezfs((__vector unsigned int)__a, + (__vector unsigned int)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpeq_or_0_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { - return __builtin_s390_vfeezfs((vector unsigned int)__a, - (vector unsigned int)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpeq_or_0_idx_cc(__vector __bool int __a, __vector __bool int __b, + int *__cc) { + return __builtin_s390_vfeezfs((__vector unsigned int)__a, + (__vector unsigned int)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpeq_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpeq_or_0_idx_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vfeezfs(__a, __b, __cc); } /*-- vec_cmpne_idx ----------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_cmpne_idx(vector signed char __a, vector signed char __b) { - return (vector signed char) - __builtin_s390_vfeneb((vector unsigned char)__a, - (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_cmpne_idx(__vector signed char __a, __vector signed char __b) { + return (__vector signed char) + __builtin_s390_vfeneb((__vector unsigned char)__a, + (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpne_idx(vector bool char __a, vector bool char __b) { - return __builtin_s390_vfeneb((vector unsigned char)__a, - (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpne_idx(__vector __bool char __a, __vector __bool char __b) { + return __builtin_s390_vfeneb((__vector unsigned char)__a, + (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpne_idx(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpne_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfeneb(__a, __b); } -static inline __ATTRS_o_ai vector signed short -vec_cmpne_idx(vector signed short __a, vector signed short __b) { - return (vector signed short) - __builtin_s390_vfeneh((vector unsigned short)__a, - (vector unsigned short)__b); +static inline __ATTRS_o_ai __vector signed short +vec_cmpne_idx(__vector signed short __a, __vector signed short __b) { + return (__vector signed short) + __builtin_s390_vfeneh((__vector unsigned short)__a, + (__vector unsigned short)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpne_idx(vector bool short __a, vector bool short __b) { - return __builtin_s390_vfeneh((vector unsigned short)__a, - (vector unsigned short)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpne_idx(__vector __bool short __a, __vector __bool short __b) { + return __builtin_s390_vfeneh((__vector unsigned short)__a, + (__vector unsigned short)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpne_idx(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpne_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfeneh(__a, __b); } -static inline __ATTRS_o_ai vector signed int -vec_cmpne_idx(vector signed int __a, vector signed int __b) { - return (vector signed int) - __builtin_s390_vfenef((vector unsigned int)__a, - (vector unsigned int)__b); +static inline __ATTRS_o_ai __vector signed int +vec_cmpne_idx(__vector signed int __a, __vector signed int __b) { + return (__vector signed int) + __builtin_s390_vfenef((__vector unsigned int)__a, + (__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpne_idx(vector bool int __a, vector bool int __b) { - return __builtin_s390_vfenef((vector unsigned int)__a, - (vector unsigned int)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpne_idx(__vector __bool int __a, __vector __bool int __b) { + return __builtin_s390_vfenef((__vector unsigned int)__a, + (__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpne_idx(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpne_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfenef(__a, __b); } /*-- vec_cmpne_idx_cc -------------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_cmpne_idx_cc(vector signed char __a, vector signed char __b, int *__cc) { - return (vector signed char) - __builtin_s390_vfenebs((vector unsigned char)__a, - (vector unsigned char)__b, __cc); +static inline __ATTRS_o_ai __vector signed char +vec_cmpne_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { + return (__vector signed char) + __builtin_s390_vfenebs((__vector unsigned char)__a, + (__vector unsigned char)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpne_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { - return __builtin_s390_vfenebs((vector unsigned char)__a, - (vector unsigned char)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpne_idx_cc(__vector __bool char __a, __vector __bool char __b, int *__cc) { + return __builtin_s390_vfenebs((__vector unsigned char)__a, + (__vector unsigned char)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpne_idx_cc(vector unsigned char __a, vector unsigned char __b, +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpne_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfenebs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector signed short -vec_cmpne_idx_cc(vector signed short __a, vector signed short __b, int *__cc) { - return (vector signed short) - __builtin_s390_vfenehs((vector unsigned short)__a, - (vector unsigned short)__b, __cc); +static inline __ATTRS_o_ai __vector signed short +vec_cmpne_idx_cc(__vector signed short __a, __vector signed short __b, + int *__cc) { + return (__vector signed short) + __builtin_s390_vfenehs((__vector unsigned short)__a, + (__vector unsigned short)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpne_idx_cc(vector bool short __a, vector bool short __b, int *__cc) { - return __builtin_s390_vfenehs((vector unsigned short)__a, - (vector unsigned short)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpne_idx_cc(__vector __bool short __a, __vector __bool short __b, + int *__cc) { + return __builtin_s390_vfenehs((__vector unsigned short)__a, + (__vector unsigned short)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpne_idx_cc(vector unsigned short __a, vector unsigned short __b, +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpne_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfenehs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector signed int -vec_cmpne_idx_cc(vector signed int __a, vector signed int __b, int *__cc) { - return (vector signed int) - __builtin_s390_vfenefs((vector unsigned int)__a, - (vector unsigned int)__b, __cc); +static inline __ATTRS_o_ai __vector signed int +vec_cmpne_idx_cc(__vector signed int __a, __vector signed int __b, int *__cc) { + return (__vector signed int) + __builtin_s390_vfenefs((__vector unsigned int)__a, + (__vector unsigned int)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpne_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { - return __builtin_s390_vfenefs((vector unsigned int)__a, - (vector unsigned int)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpne_idx_cc(__vector __bool int __a, __vector __bool int __b, int *__cc) { + return __builtin_s390_vfenefs((__vector unsigned int)__a, + (__vector unsigned int)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpne_idx_cc(vector unsigned int __a, vector unsigned int __b, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpne_idx_cc(__vector unsigned int __a, __vector unsigned int __b, + int *__cc) { return __builtin_s390_vfenefs(__a, __b, __cc); } /*-- vec_cmpne_or_0_idx -----------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_cmpne_or_0_idx(vector signed char __a, vector signed char __b) { - return (vector signed char) - __builtin_s390_vfenezb((vector unsigned char)__a, - (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector signed char +vec_cmpne_or_0_idx(__vector signed char __a, __vector signed char __b) { + return (__vector signed char) + __builtin_s390_vfenezb((__vector unsigned char)__a, + (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpne_or_0_idx(vector bool char __a, vector bool char __b) { - return __builtin_s390_vfenezb((vector unsigned char)__a, - (vector unsigned char)__b); +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpne_or_0_idx(__vector __bool char __a, __vector __bool char __b) { + return __builtin_s390_vfenezb((__vector unsigned char)__a, + (__vector unsigned char)__b); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpne_or_0_idx(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpne_or_0_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfenezb(__a, __b); } -static inline __ATTRS_o_ai vector signed short -vec_cmpne_or_0_idx(vector signed short __a, vector signed short __b) { - return (vector signed short) - __builtin_s390_vfenezh((vector unsigned short)__a, - (vector unsigned short)__b); +static inline __ATTRS_o_ai __vector signed short +vec_cmpne_or_0_idx(__vector signed short __a, __vector signed short __b) { + return (__vector signed short) + __builtin_s390_vfenezh((__vector unsigned short)__a, + (__vector unsigned short)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpne_or_0_idx(vector bool short __a, vector bool short __b) { - return __builtin_s390_vfenezh((vector unsigned short)__a, - (vector unsigned short)__b); +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpne_or_0_idx(__vector __bool short __a, __vector __bool short __b) { + return __builtin_s390_vfenezh((__vector unsigned short)__a, + (__vector unsigned short)__b); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpne_or_0_idx(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpne_or_0_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfenezh(__a, __b); } -static inline __ATTRS_o_ai vector signed int -vec_cmpne_or_0_idx(vector signed int __a, vector signed int __b) { - return (vector signed int) - __builtin_s390_vfenezf((vector unsigned int)__a, - (vector unsigned int)__b); +static inline __ATTRS_o_ai __vector signed int +vec_cmpne_or_0_idx(__vector signed int __a, __vector signed int __b) { + return (__vector signed int) + __builtin_s390_vfenezf((__vector unsigned int)__a, + (__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpne_or_0_idx(vector bool int __a, vector bool int __b) { - return __builtin_s390_vfenezf((vector unsigned int)__a, - (vector unsigned int)__b); +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpne_or_0_idx(__vector __bool int __a, __vector __bool int __b) { + return __builtin_s390_vfenezf((__vector unsigned int)__a, + (__vector unsigned int)__b); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpne_or_0_idx(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpne_or_0_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfenezf(__a, __b); } /*-- vec_cmpne_or_0_idx_cc --------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_cmpne_or_0_idx_cc(vector signed char __a, vector signed char __b, +static inline __ATTRS_o_ai __vector signed char +vec_cmpne_or_0_idx_cc(__vector signed char __a, __vector signed char __b, int *__cc) { - return (vector signed char) - __builtin_s390_vfenezbs((vector unsigned char)__a, - (vector unsigned char)__b, __cc); + return (__vector signed char) + __builtin_s390_vfenezbs((__vector unsigned char)__a, + (__vector unsigned char)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpne_or_0_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { - return __builtin_s390_vfenezbs((vector unsigned char)__a, - (vector unsigned char)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpne_or_0_idx_cc(__vector __bool char __a, __vector __bool char __b, + int *__cc) { + return __builtin_s390_vfenezbs((__vector unsigned char)__a, + (__vector unsigned char)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_cmpne_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpne_or_0_idx_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { return __builtin_s390_vfenezbs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector signed short -vec_cmpne_or_0_idx_cc(vector signed short __a, vector signed short __b, +static inline __ATTRS_o_ai __vector signed short +vec_cmpne_or_0_idx_cc(__vector signed short __a, __vector signed short __b, int *__cc) { - return (vector signed short) - __builtin_s390_vfenezhs((vector unsigned short)__a, - (vector unsigned short)__b, __cc); + return (__vector signed short) + __builtin_s390_vfenezhs((__vector unsigned short)__a, + (__vector unsigned short)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpne_or_0_idx_cc(vector bool short __a, vector bool short __b, int *__cc) { - return __builtin_s390_vfenezhs((vector unsigned short)__a, - (vector unsigned short)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpne_or_0_idx_cc(__vector __bool short __a, __vector __bool short __b, + int *__cc) { + return __builtin_s390_vfenezhs((__vector unsigned short)__a, + (__vector unsigned short)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpne_or_0_idx_cc(vector unsigned short __a, vector unsigned short __b, +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpne_or_0_idx_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { return __builtin_s390_vfenezhs(__a, __b, __cc); } -static inline __ATTRS_o_ai vector signed int -vec_cmpne_or_0_idx_cc(vector signed int __a, vector signed int __b, int *__cc) { - return (vector signed int) - __builtin_s390_vfenezfs((vector unsigned int)__a, - (vector unsigned int)__b, __cc); +static inline __ATTRS_o_ai __vector signed int +vec_cmpne_or_0_idx_cc(__vector signed int __a, __vector signed int __b, + int *__cc) { + return (__vector signed int) + __builtin_s390_vfenezfs((__vector unsigned int)__a, + (__vector unsigned int)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpne_or_0_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { - return __builtin_s390_vfenezfs((vector unsigned int)__a, - (vector unsigned int)__b, __cc); +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpne_or_0_idx_cc(__vector __bool int __a, __vector __bool int __b, + int *__cc) { + return __builtin_s390_vfenezfs((__vector unsigned int)__a, + (__vector unsigned int)__b, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpne_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpne_or_0_idx_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { return __builtin_s390_vfenezfs(__a, __b, __cc); } /*-- vec_cmprg --------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_cmprg(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { - return (vector bool char)__builtin_s390_vstrcb(__a, __b, __c, 4); +static inline __ATTRS_o_ai __vector __bool char +vec_cmprg(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { + return (__vector __bool char)__builtin_s390_vstrcb(__a, __b, __c, 4); } -static inline __ATTRS_o_ai vector bool short -vec_cmprg(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c) { - return (vector bool short)__builtin_s390_vstrch(__a, __b, __c, 4); +static inline __ATTRS_o_ai __vector __bool short +vec_cmprg(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c) { + return (__vector __bool short)__builtin_s390_vstrch(__a, __b, __c, 4); } -static inline __ATTRS_o_ai vector bool int -vec_cmprg(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c) { - return (vector bool int)__builtin_s390_vstrcf(__a, __b, __c, 4); +static inline __ATTRS_o_ai __vector __bool int +vec_cmprg(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c) { + return (__vector __bool int)__builtin_s390_vstrcf(__a, __b, __c, 4); } /*-- vec_cmprg_cc -----------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_cmprg_cc(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c, int *__cc) { - return (vector bool char)__builtin_s390_vstrcbs(__a, __b, __c, 4, __cc); +static inline __ATTRS_o_ai __vector __bool char +vec_cmprg_cc(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c, int *__cc) { + return (__vector __bool char)__builtin_s390_vstrcbs(__a, __b, __c, 4, __cc); } -static inline __ATTRS_o_ai vector bool short -vec_cmprg_cc(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c, int *__cc) { - return (vector bool short)__builtin_s390_vstrchs(__a, __b, __c, 4, __cc); +static inline __ATTRS_o_ai __vector __bool short +vec_cmprg_cc(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c, int *__cc) { + return (__vector __bool short)__builtin_s390_vstrchs(__a, __b, __c, 4, __cc); } -static inline __ATTRS_o_ai vector bool int -vec_cmprg_cc(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c, int *__cc) { - return (vector bool int)__builtin_s390_vstrcfs(__a, __b, __c, 4, __cc); +static inline __ATTRS_o_ai __vector __bool int +vec_cmprg_cc(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c, int *__cc) { + return (__vector __bool int)__builtin_s390_vstrcfs(__a, __b, __c, 4, __cc); } /*-- vec_cmprg_idx ----------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_cmprg_idx(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmprg_idx(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { return __builtin_s390_vstrcb(__a, __b, __c, 0); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmprg_idx(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmprg_idx(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c) { return __builtin_s390_vstrch(__a, __b, __c, 0); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmprg_idx(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmprg_idx(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c) { return __builtin_s390_vstrcf(__a, __b, __c, 0); } /*-- vec_cmprg_idx_cc -------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_cmprg_idx_cc(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmprg_idx_cc(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrcbs(__a, __b, __c, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmprg_idx_cc(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmprg_idx_cc(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c, int *__cc) { return __builtin_s390_vstrchs(__a, __b, __c, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmprg_idx_cc(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmprg_idx_cc(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c, int *__cc) { return __builtin_s390_vstrcfs(__a, __b, __c, 0, __cc); } /*-- vec_cmprg_or_0_idx -----------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_cmprg_or_0_idx(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmprg_or_0_idx(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { return __builtin_s390_vstrczb(__a, __b, __c, 0); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmprg_or_0_idx(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmprg_or_0_idx(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c) { return __builtin_s390_vstrczh(__a, __b, __c, 0); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmprg_or_0_idx(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmprg_or_0_idx(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c) { return __builtin_s390_vstrczf(__a, __b, __c, 0); } /*-- vec_cmprg_or_0_idx_cc --------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_cmprg_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmprg_or_0_idx_cc(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrczbs(__a, __b, __c, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmprg_or_0_idx_cc(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmprg_or_0_idx_cc(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c, int *__cc) { return __builtin_s390_vstrczhs(__a, __b, __c, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmprg_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmprg_or_0_idx_cc(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c, int *__cc) { return __builtin_s390_vstrczfs(__a, __b, __c, 0, __cc); } /*-- vec_cmpnrg -------------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_cmpnrg(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { - return (vector bool char)__builtin_s390_vstrcb(__a, __b, __c, 12); +static inline __ATTRS_o_ai __vector __bool char +vec_cmpnrg(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { + return (__vector __bool char)__builtin_s390_vstrcb(__a, __b, __c, 12); } -static inline __ATTRS_o_ai vector bool short -vec_cmpnrg(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c) { - return (vector bool short)__builtin_s390_vstrch(__a, __b, __c, 12); +static inline __ATTRS_o_ai __vector __bool short +vec_cmpnrg(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c) { + return (__vector __bool short)__builtin_s390_vstrch(__a, __b, __c, 12); } -static inline __ATTRS_o_ai vector bool int -vec_cmpnrg(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c) { - return (vector bool int)__builtin_s390_vstrcf(__a, __b, __c, 12); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpnrg(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c) { + return (__vector __bool int)__builtin_s390_vstrcf(__a, __b, __c, 12); } /*-- vec_cmpnrg_cc ----------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_cmpnrg_cc(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c, int *__cc) { - return (vector bool char)__builtin_s390_vstrcbs(__a, __b, __c, 12, __cc); +static inline __ATTRS_o_ai __vector __bool char +vec_cmpnrg_cc(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c, int *__cc) { + return (__vector __bool char) + __builtin_s390_vstrcbs(__a, __b, __c, 12, __cc); } -static inline __ATTRS_o_ai vector bool short -vec_cmpnrg_cc(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c, int *__cc) { - return (vector bool short)__builtin_s390_vstrchs(__a, __b, __c, 12, __cc); +static inline __ATTRS_o_ai __vector __bool short +vec_cmpnrg_cc(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c, int *__cc) { + return (__vector __bool short) + __builtin_s390_vstrchs(__a, __b, __c, 12, __cc); } -static inline __ATTRS_o_ai vector bool int -vec_cmpnrg_cc(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c, int *__cc) { - return (vector bool int)__builtin_s390_vstrcfs(__a, __b, __c, 12, __cc); +static inline __ATTRS_o_ai __vector __bool int +vec_cmpnrg_cc(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c, int *__cc) { + return (__vector __bool int) + __builtin_s390_vstrcfs(__a, __b, __c, 12, __cc); } /*-- vec_cmpnrg_idx ---------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_cmpnrg_idx(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpnrg_idx(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { return __builtin_s390_vstrcb(__a, __b, __c, 8); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpnrg_idx(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpnrg_idx(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c) { return __builtin_s390_vstrch(__a, __b, __c, 8); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpnrg_idx(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpnrg_idx(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c) { return __builtin_s390_vstrcf(__a, __b, __c, 8); } /*-- vec_cmpnrg_idx_cc ------------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_cmpnrg_idx_cc(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpnrg_idx_cc(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrcbs(__a, __b, __c, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpnrg_idx_cc(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpnrg_idx_cc(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c, int *__cc) { return __builtin_s390_vstrchs(__a, __b, __c, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpnrg_idx_cc(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpnrg_idx_cc(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c, int *__cc) { return __builtin_s390_vstrcfs(__a, __b, __c, 8, __cc); } /*-- vec_cmpnrg_or_0_idx ----------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_cmpnrg_or_0_idx(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpnrg_or_0_idx(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c) { return __builtin_s390_vstrczb(__a, __b, __c, 8); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpnrg_or_0_idx(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpnrg_or_0_idx(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c) { return __builtin_s390_vstrczh(__a, __b, __c, 8); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpnrg_or_0_idx(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpnrg_or_0_idx(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c) { return __builtin_s390_vstrczf(__a, __b, __c, 8); } /*-- vec_cmpnrg_or_0_idx_cc -------------------------------------------------*/ -static inline __ATTRS_o_ai vector unsigned char -vec_cmpnrg_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_cmpnrg_or_0_idx_cc(__vector unsigned char __a, + __vector unsigned char __b, + __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrczbs(__a, __b, __c, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_cmpnrg_or_0_idx_cc(vector unsigned short __a, vector unsigned short __b, - vector unsigned short __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned short +vec_cmpnrg_or_0_idx_cc(__vector unsigned short __a, + __vector unsigned short __b, + __vector unsigned short __c, int *__cc) { return __builtin_s390_vstrczhs(__a, __b, __c, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_cmpnrg_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, - vector unsigned int __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned int +vec_cmpnrg_or_0_idx_cc(__vector unsigned int __a, + __vector unsigned int __b, + __vector unsigned int __c, int *__cc) { return __builtin_s390_vstrczfs(__a, __b, __c, 8, __cc); } /*-- vec_find_any_eq --------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_find_any_eq(vector signed char __a, vector signed char __b) { - return (vector bool char) - __builtin_s390_vfaeb((vector unsigned char)__a, - (vector unsigned char)__b, 4); +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_eq(__vector signed char __a, __vector signed char __b) { + return (__vector __bool char) + __builtin_s390_vfaeb((__vector unsigned char)__a, + (__vector unsigned char)__b, 4); } -static inline __ATTRS_o_ai vector bool char -vec_find_any_eq(vector bool char __a, vector bool char __b) { - return (vector bool char) - __builtin_s390_vfaeb((vector unsigned char)__a, - (vector unsigned char)__b, 4); +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_eq(__vector __bool char __a, __vector __bool char __b) { + return (__vector __bool char) + __builtin_s390_vfaeb((__vector unsigned char)__a, + (__vector unsigned char)__b, 4); } -static inline __ATTRS_o_ai vector bool char -vec_find_any_eq(vector unsigned char __a, vector unsigned char __b) { - return (vector bool char)__builtin_s390_vfaeb(__a, __b, 4); +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_eq(__vector unsigned char __a, __vector unsigned char __b) { + return (__vector __bool char)__builtin_s390_vfaeb(__a, __b, 4); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_eq(vector signed short __a, vector signed short __b) { - return (vector bool short) - __builtin_s390_vfaeh((vector unsigned short)__a, - (vector unsigned short)__b, 4); +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_eq(__vector signed short __a, __vector signed short __b) { + return (__vector __bool short) + __builtin_s390_vfaeh((__vector unsigned short)__a, + (__vector unsigned short)__b, 4); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_eq(vector bool short __a, vector bool short __b) { - return (vector bool short) - __builtin_s390_vfaeh((vector unsigned short)__a, - (vector unsigned short)__b, 4); +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_eq(__vector __bool short __a, __vector __bool short __b) { + return (__vector __bool short) + __builtin_s390_vfaeh((__vector unsigned short)__a, + (__vector unsigned short)__b, 4); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_eq(vector unsigned short __a, vector unsigned short __b) { - return (vector bool short)__builtin_s390_vfaeh(__a, __b, 4); +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_eq(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector __bool short)__builtin_s390_vfaeh(__a, __b, 4); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_eq(vector signed int __a, vector signed int __b) { - return (vector bool int) - __builtin_s390_vfaef((vector unsigned int)__a, - (vector unsigned int)__b, 4); +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_eq(__vector signed int __a, __vector signed int __b) { + return (__vector __bool int) + __builtin_s390_vfaef((__vector unsigned int)__a, + (__vector unsigned int)__b, 4); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_eq(vector bool int __a, vector bool int __b) { - return (vector bool int) - __builtin_s390_vfaef((vector unsigned int)__a, - (vector unsigned int)__b, 4); +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_eq(__vector __bool int __a, __vector __bool int __b) { + return (__vector __bool int) + __builtin_s390_vfaef((__vector unsigned int)__a, + (__vector unsigned int)__b, 4); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_eq(vector unsigned int __a, vector unsigned int __b) { - return (vector bool int)__builtin_s390_vfaef(__a, __b, 4); +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_eq(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector __bool int)__builtin_s390_vfaef(__a, __b, 4); } /*-- vec_find_any_eq_cc -----------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_find_any_eq_cc(vector signed char __a, vector signed char __b, int *__cc) { - return (vector bool char) - __builtin_s390_vfaebs((vector unsigned char)__a, - (vector unsigned char)__b, 4, __cc); +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_eq_cc(__vector signed char __a, __vector signed char __b, + int *__cc) { + return (__vector __bool char) + __builtin_s390_vfaebs((__vector unsigned char)__a, + (__vector unsigned char)__b, 4, __cc); } -static inline __ATTRS_o_ai vector bool char -vec_find_any_eq_cc(vector bool char __a, vector bool char __b, int *__cc) { - return (vector bool char) - __builtin_s390_vfaebs((vector unsigned char)__a, - (vector unsigned char)__b, 4, __cc); +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_eq_cc(__vector __bool char __a, __vector __bool char __b, + int *__cc) { + return (__vector __bool char) + __builtin_s390_vfaebs((__vector unsigned char)__a, + (__vector unsigned char)__b, 4, __cc); } -static inline __ATTRS_o_ai vector bool char -vec_find_any_eq_cc(vector unsigned char __a, vector unsigned char __b, +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_eq_cc(__vector unsigned char __a, __vector unsigned char __b, int *__cc) { - return (vector bool char)__builtin_s390_vfaebs(__a, __b, 4, __cc); + return (__vector __bool char)__builtin_s390_vfaebs(__a, __b, 4, __cc); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_eq_cc(vector signed short __a, vector signed short __b, +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_eq_cc(__vector signed short __a, __vector signed short __b, int *__cc) { - return (vector bool short) - __builtin_s390_vfaehs((vector unsigned short)__a, - (vector unsigned short)__b, 4, __cc); + return (__vector __bool short) + __builtin_s390_vfaehs((__vector unsigned short)__a, + (__vector unsigned short)__b, 4, __cc); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_eq_cc(vector bool short __a, vector bool short __b, int *__cc) { - return (vector bool short) - __builtin_s390_vfaehs((vector unsigned short)__a, - (vector unsigned short)__b, 4, __cc); +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_eq_cc(__vector __bool short __a, __vector __bool short __b, + int *__cc) { + return (__vector __bool short) + __builtin_s390_vfaehs((__vector unsigned short)__a, + (__vector unsigned short)__b, 4, __cc); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_eq_cc(vector unsigned short __a, vector unsigned short __b, +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_eq_cc(__vector unsigned short __a, __vector unsigned short __b, int *__cc) { - return (vector bool short)__builtin_s390_vfaehs(__a, __b, 4, __cc); + return (__vector __bool short)__builtin_s390_vfaehs(__a, __b, 4, __cc); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_eq_cc(vector signed int __a, vector signed int __b, int *__cc) { - return (vector bool int) - __builtin_s390_vfaefs((vector unsigned int)__a, - (vector unsigned int)__b, 4, __cc); +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_eq_cc(__vector signed int __a, __vector signed int __b, + int *__cc) { + return (__vector __bool int) + __builtin_s390_vfaefs((__vector unsigned int)__a, + (__vector unsigned int)__b, 4, __cc); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_eq_cc(vector bool int __a, vector bool int __b, int *__cc) { - return (vector bool int) - __builtin_s390_vfaefs((vector unsigned int)__a, - (vector unsigned int)__b, 4, __cc); +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_eq_cc(__vector __bool int __a, __vector __bool int __b, + int *__cc) { + return (__vector __bool int) + __builtin_s390_vfaefs((__vector unsigned int)__a, + (__vector unsigned int)__b, 4, __cc); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_eq_cc(vector unsigned int __a, vector unsigned int __b, +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_eq_cc(__vector unsigned int __a, __vector unsigned int __b, int *__cc) { - return (vector bool int)__builtin_s390_vfaefs(__a, __b, 4, __cc); + return (__vector __bool int)__builtin_s390_vfaefs(__a, __b, 4, __cc); } /*-- vec_find_any_eq_idx ----------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_find_any_eq_idx(vector signed char __a, vector signed char __b) { - return (vector signed char) - __builtin_s390_vfaeb((vector unsigned char)__a, - (vector unsigned char)__b, 0); +static inline __ATTRS_o_ai __vector signed char +vec_find_any_eq_idx(__vector signed char __a, __vector signed char __b) { + return (__vector signed char) + __builtin_s390_vfaeb((__vector unsigned char)__a, + (__vector unsigned char)__b, 0); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_eq_idx(vector bool char __a, vector bool char __b) { - return __builtin_s390_vfaeb((vector unsigned char)__a, - (vector unsigned char)__b, 0); +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_eq_idx(__vector __bool char __a, __vector __bool char __b) { + return __builtin_s390_vfaeb((__vector unsigned char)__a, + (__vector unsigned char)__b, 0); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_eq_idx(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_eq_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfaeb(__a, __b, 0); } -static inline __ATTRS_o_ai vector signed short -vec_find_any_eq_idx(vector signed short __a, vector signed short __b) { - return (vector signed short) - __builtin_s390_vfaeh((vector unsigned short)__a, - (vector unsigned short)__b, 0); +static inline __ATTRS_o_ai __vector signed short +vec_find_any_eq_idx(__vector signed short __a, __vector signed short __b) { + return (__vector signed short) + __builtin_s390_vfaeh((__vector unsigned short)__a, + (__vector unsigned short)__b, 0); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_eq_idx(vector bool short __a, vector bool short __b) { - return __builtin_s390_vfaeh((vector unsigned short)__a, - (vector unsigned short)__b, 0); +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_eq_idx(__vector __bool short __a, __vector __bool short __b) { + return __builtin_s390_vfaeh((__vector unsigned short)__a, + (__vector unsigned short)__b, 0); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_eq_idx(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_eq_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfaeh(__a, __b, 0); } -static inline __ATTRS_o_ai vector signed int -vec_find_any_eq_idx(vector signed int __a, vector signed int __b) { - return (vector signed int) - __builtin_s390_vfaef((vector unsigned int)__a, - (vector unsigned int)__b, 0); +static inline __ATTRS_o_ai __vector signed int +vec_find_any_eq_idx(__vector signed int __a, __vector signed int __b) { + return (__vector signed int) + __builtin_s390_vfaef((__vector unsigned int)__a, + (__vector unsigned int)__b, 0); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_eq_idx(vector bool int __a, vector bool int __b) { - return __builtin_s390_vfaef((vector unsigned int)__a, - (vector unsigned int)__b, 0); +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_eq_idx(__vector __bool int __a, __vector __bool int __b) { + return __builtin_s390_vfaef((__vector unsigned int)__a, + (__vector unsigned int)__b, 0); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_eq_idx(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_eq_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfaef(__a, __b, 0); } /*-- vec_find_any_eq_idx_cc -------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_find_any_eq_idx_cc(vector signed char __a, vector signed char __b, - int *__cc) { - return (vector signed char) - __builtin_s390_vfaebs((vector unsigned char)__a, - (vector unsigned char)__b, 0, __cc); +static inline __ATTRS_o_ai __vector signed char +vec_find_any_eq_idx_cc(__vector signed char __a, + __vector signed char __b, int *__cc) { + return (__vector signed char) + __builtin_s390_vfaebs((__vector unsigned char)__a, + (__vector unsigned char)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_eq_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { - return __builtin_s390_vfaebs((vector unsigned char)__a, - (vector unsigned char)__b, 0, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_eq_idx_cc(__vector __bool char __a, + __vector __bool char __b, int *__cc) { + return __builtin_s390_vfaebs((__vector unsigned char)__a, + (__vector unsigned char)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_eq_idx_cc(vector unsigned char __a, vector unsigned char __b, - int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_eq_idx_cc(__vector unsigned char __a, + __vector unsigned char __b, int *__cc) { return __builtin_s390_vfaebs(__a, __b, 0, __cc); } -static inline __ATTRS_o_ai vector signed short -vec_find_any_eq_idx_cc(vector signed short __a, vector signed short __b, - int *__cc) { - return (vector signed short) - __builtin_s390_vfaehs((vector unsigned short)__a, - (vector unsigned short)__b, 0, __cc); +static inline __ATTRS_o_ai __vector signed short +vec_find_any_eq_idx_cc(__vector signed short __a, + __vector signed short __b, int *__cc) { + return (__vector signed short) + __builtin_s390_vfaehs((__vector unsigned short)__a, + (__vector unsigned short)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_eq_idx_cc(vector bool short __a, vector bool short __b, - int *__cc) { - return __builtin_s390_vfaehs((vector unsigned short)__a, - (vector unsigned short)__b, 0, __cc); +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_eq_idx_cc(__vector __bool short __a, + __vector __bool short __b, int *__cc) { + return __builtin_s390_vfaehs((__vector unsigned short)__a, + (__vector unsigned short)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_eq_idx_cc(vector unsigned short __a, vector unsigned short __b, - int *__cc) { +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_eq_idx_cc(__vector unsigned short __a, + __vector unsigned short __b, int *__cc) { return __builtin_s390_vfaehs(__a, __b, 0, __cc); } -static inline __ATTRS_o_ai vector signed int -vec_find_any_eq_idx_cc(vector signed int __a, vector signed int __b, - int *__cc) { - return (vector signed int) - __builtin_s390_vfaefs((vector unsigned int)__a, - (vector unsigned int)__b, 0, __cc); +static inline __ATTRS_o_ai __vector signed int +vec_find_any_eq_idx_cc(__vector signed int __a, + __vector signed int __b, int *__cc) { + return (__vector signed int) + __builtin_s390_vfaefs((__vector unsigned int)__a, + (__vector unsigned int)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_eq_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { - return __builtin_s390_vfaefs((vector unsigned int)__a, - (vector unsigned int)__b, 0, __cc); +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_eq_idx_cc(__vector __bool int __a, + __vector __bool int __b, int *__cc) { + return __builtin_s390_vfaefs((__vector unsigned int)__a, + (__vector unsigned int)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_eq_idx_cc(vector unsigned int __a, vector unsigned int __b, - int *__cc) { +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_eq_idx_cc(__vector unsigned int __a, + __vector unsigned int __b, int *__cc) { return __builtin_s390_vfaefs(__a, __b, 0, __cc); } /*-- vec_find_any_eq_or_0_idx -----------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_find_any_eq_or_0_idx(vector signed char __a, vector signed char __b) { - return (vector signed char) - __builtin_s390_vfaezb((vector unsigned char)__a, - (vector unsigned char)__b, 0); +static inline __ATTRS_o_ai __vector signed char +vec_find_any_eq_or_0_idx(__vector signed char __a, + __vector signed char __b) { + return (__vector signed char) + __builtin_s390_vfaezb((__vector unsigned char)__a, + (__vector unsigned char)__b, 0); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_eq_or_0_idx(vector bool char __a, vector bool char __b) { - return __builtin_s390_vfaezb((vector unsigned char)__a, - (vector unsigned char)__b, 0); +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_eq_or_0_idx(__vector __bool char __a, + __vector __bool char __b) { + return __builtin_s390_vfaezb((__vector unsigned char)__a, + (__vector unsigned char)__b, 0); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_eq_or_0_idx(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_eq_or_0_idx(__vector unsigned char __a, + __vector unsigned char __b) { return __builtin_s390_vfaezb(__a, __b, 0); } -static inline __ATTRS_o_ai vector signed short -vec_find_any_eq_or_0_idx(vector signed short __a, vector signed short __b) { - return (vector signed short) - __builtin_s390_vfaezh((vector unsigned short)__a, - (vector unsigned short)__b, 0); +static inline __ATTRS_o_ai __vector signed short +vec_find_any_eq_or_0_idx(__vector signed short __a, + __vector signed short __b) { + return (__vector signed short) + __builtin_s390_vfaezh((__vector unsigned short)__a, + (__vector unsigned short)__b, 0); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_eq_or_0_idx(vector bool short __a, vector bool short __b) { - return __builtin_s390_vfaezh((vector unsigned short)__a, - (vector unsigned short)__b, 0); +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_eq_or_0_idx(__vector __bool short __a, + __vector __bool short __b) { + return __builtin_s390_vfaezh((__vector unsigned short)__a, + (__vector unsigned short)__b, 0); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_eq_or_0_idx(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_eq_or_0_idx(__vector unsigned short __a, + __vector unsigned short __b) { return __builtin_s390_vfaezh(__a, __b, 0); } -static inline __ATTRS_o_ai vector signed int -vec_find_any_eq_or_0_idx(vector signed int __a, vector signed int __b) { - return (vector signed int) - __builtin_s390_vfaezf((vector unsigned int)__a, - (vector unsigned int)__b, 0); +static inline __ATTRS_o_ai __vector signed int +vec_find_any_eq_or_0_idx(__vector signed int __a, + __vector signed int __b) { + return (__vector signed int) + __builtin_s390_vfaezf((__vector unsigned int)__a, + (__vector unsigned int)__b, 0); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_eq_or_0_idx(vector bool int __a, vector bool int __b) { - return __builtin_s390_vfaezf((vector unsigned int)__a, - (vector unsigned int)__b, 0); +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_eq_or_0_idx(__vector __bool int __a, + __vector __bool int __b) { + return __builtin_s390_vfaezf((__vector unsigned int)__a, + (__vector unsigned int)__b, 0); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_eq_or_0_idx(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_eq_or_0_idx(__vector unsigned int __a, + __vector unsigned int __b) { return __builtin_s390_vfaezf(__a, __b, 0); } /*-- vec_find_any_eq_or_0_idx_cc --------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_find_any_eq_or_0_idx_cc(vector signed char __a, vector signed char __b, - int *__cc) { - return (vector signed char) - __builtin_s390_vfaezbs((vector unsigned char)__a, - (vector unsigned char)__b, 0, __cc); +static inline __ATTRS_o_ai __vector signed char +vec_find_any_eq_or_0_idx_cc(__vector signed char __a, + __vector signed char __b, int *__cc) { + return (__vector signed char) + __builtin_s390_vfaezbs((__vector unsigned char)__a, + (__vector unsigned char)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_eq_or_0_idx_cc(vector bool char __a, vector bool char __b, - int *__cc) { - return __builtin_s390_vfaezbs((vector unsigned char)__a, - (vector unsigned char)__b, 0, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_eq_or_0_idx_cc(__vector __bool char __a, + __vector __bool char __b, int *__cc) { + return __builtin_s390_vfaezbs((__vector unsigned char)__a, + (__vector unsigned char)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_eq_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, - int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_eq_or_0_idx_cc(__vector unsigned char __a, + __vector unsigned char __b, int *__cc) { return __builtin_s390_vfaezbs(__a, __b, 0, __cc); } -static inline __ATTRS_o_ai vector signed short -vec_find_any_eq_or_0_idx_cc(vector signed short __a, vector signed short __b, - int *__cc) { - return (vector signed short) - __builtin_s390_vfaezhs((vector unsigned short)__a, - (vector unsigned short)__b, 0, __cc); +static inline __ATTRS_o_ai __vector signed short +vec_find_any_eq_or_0_idx_cc(__vector signed short __a, + __vector signed short __b, int *__cc) { + return (__vector signed short) + __builtin_s390_vfaezhs((__vector unsigned short)__a, + (__vector unsigned short)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_eq_or_0_idx_cc(vector bool short __a, vector bool short __b, - int *__cc) { - return __builtin_s390_vfaezhs((vector unsigned short)__a, - (vector unsigned short)__b, 0, __cc); +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_eq_or_0_idx_cc(__vector __bool short __a, + __vector __bool short __b, int *__cc) { + return __builtin_s390_vfaezhs((__vector unsigned short)__a, + (__vector unsigned short)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_eq_or_0_idx_cc(vector unsigned short __a, - vector unsigned short __b, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_eq_or_0_idx_cc(__vector unsigned short __a, + __vector unsigned short __b, int *__cc) { return __builtin_s390_vfaezhs(__a, __b, 0, __cc); } -static inline __ATTRS_o_ai vector signed int -vec_find_any_eq_or_0_idx_cc(vector signed int __a, vector signed int __b, - int *__cc) { - return (vector signed int) - __builtin_s390_vfaezfs((vector unsigned int)__a, - (vector unsigned int)__b, 0, __cc); +static inline __ATTRS_o_ai __vector signed int +vec_find_any_eq_or_0_idx_cc(__vector signed int __a, + __vector signed int __b, int *__cc) { + return (__vector signed int) + __builtin_s390_vfaezfs((__vector unsigned int)__a, + (__vector unsigned int)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_eq_or_0_idx_cc(vector bool int __a, vector bool int __b, - int *__cc) { - return __builtin_s390_vfaezfs((vector unsigned int)__a, - (vector unsigned int)__b, 0, __cc); +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_eq_or_0_idx_cc(__vector __bool int __a, + __vector __bool int __b, int *__cc) { + return __builtin_s390_vfaezfs((__vector unsigned int)__a, + (__vector unsigned int)__b, 0, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_eq_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, - int *__cc) { +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_eq_or_0_idx_cc(__vector unsigned int __a, + __vector unsigned int __b, int *__cc) { return __builtin_s390_vfaezfs(__a, __b, 0, __cc); } /*-- vec_find_any_ne --------------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_find_any_ne(vector signed char __a, vector signed char __b) { - return (vector bool char) - __builtin_s390_vfaeb((vector unsigned char)__a, - (vector unsigned char)__b, 12); +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_ne(__vector signed char __a, __vector signed char __b) { + return (__vector __bool char) + __builtin_s390_vfaeb((__vector unsigned char)__a, + (__vector unsigned char)__b, 12); } -static inline __ATTRS_o_ai vector bool char -vec_find_any_ne(vector bool char __a, vector bool char __b) { - return (vector bool char) - __builtin_s390_vfaeb((vector unsigned char)__a, - (vector unsigned char)__b, 12); +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_ne(__vector __bool char __a, __vector __bool char __b) { + return (__vector __bool char) + __builtin_s390_vfaeb((__vector unsigned char)__a, + (__vector unsigned char)__b, 12); } -static inline __ATTRS_o_ai vector bool char -vec_find_any_ne(vector unsigned char __a, vector unsigned char __b) { - return (vector bool char)__builtin_s390_vfaeb(__a, __b, 12); +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_ne(__vector unsigned char __a, __vector unsigned char __b) { + return (__vector __bool char)__builtin_s390_vfaeb(__a, __b, 12); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_ne(vector signed short __a, vector signed short __b) { - return (vector bool short) - __builtin_s390_vfaeh((vector unsigned short)__a, - (vector unsigned short)__b, 12); +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_ne(__vector signed short __a, __vector signed short __b) { + return (__vector __bool short) + __builtin_s390_vfaeh((__vector unsigned short)__a, + (__vector unsigned short)__b, 12); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_ne(vector bool short __a, vector bool short __b) { - return (vector bool short) - __builtin_s390_vfaeh((vector unsigned short)__a, - (vector unsigned short)__b, 12); +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_ne(__vector __bool short __a, __vector __bool short __b) { + return (__vector __bool short) + __builtin_s390_vfaeh((__vector unsigned short)__a, + (__vector unsigned short)__b, 12); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_ne(vector unsigned short __a, vector unsigned short __b) { - return (vector bool short)__builtin_s390_vfaeh(__a, __b, 12); +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_ne(__vector unsigned short __a, __vector unsigned short __b) { + return (__vector __bool short)__builtin_s390_vfaeh(__a, __b, 12); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_ne(vector signed int __a, vector signed int __b) { - return (vector bool int) - __builtin_s390_vfaef((vector unsigned int)__a, - (vector unsigned int)__b, 12); +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_ne(__vector signed int __a, __vector signed int __b) { + return (__vector __bool int) + __builtin_s390_vfaef((__vector unsigned int)__a, + (__vector unsigned int)__b, 12); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_ne(vector bool int __a, vector bool int __b) { - return (vector bool int) - __builtin_s390_vfaef((vector unsigned int)__a, - (vector unsigned int)__b, 12); +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_ne(__vector __bool int __a, __vector __bool int __b) { + return (__vector __bool int) + __builtin_s390_vfaef((__vector unsigned int)__a, + (__vector unsigned int)__b, 12); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_ne(vector unsigned int __a, vector unsigned int __b) { - return (vector bool int)__builtin_s390_vfaef(__a, __b, 12); +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_ne(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector __bool int)__builtin_s390_vfaef(__a, __b, 12); } /*-- vec_find_any_ne_cc -----------------------------------------------------*/ -static inline __ATTRS_o_ai vector bool char -vec_find_any_ne_cc(vector signed char __a, vector signed char __b, int *__cc) { - return (vector bool char) - __builtin_s390_vfaebs((vector unsigned char)__a, - (vector unsigned char)__b, 12, __cc); +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_ne_cc(__vector signed char __a, + __vector signed char __b, int *__cc) { + return (__vector __bool char) + __builtin_s390_vfaebs((__vector unsigned char)__a, + (__vector unsigned char)__b, 12, __cc); } -static inline __ATTRS_o_ai vector bool char -vec_find_any_ne_cc(vector bool char __a, vector bool char __b, int *__cc) { - return (vector bool char) - __builtin_s390_vfaebs((vector unsigned char)__a, - (vector unsigned char)__b, 12, __cc); +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_ne_cc(__vector __bool char __a, + __vector __bool char __b, int *__cc) { + return (__vector __bool char) + __builtin_s390_vfaebs((__vector unsigned char)__a, + (__vector unsigned char)__b, 12, __cc); } -static inline __ATTRS_o_ai vector bool char -vec_find_any_ne_cc(vector unsigned char __a, vector unsigned char __b, - int *__cc) { - return (vector bool char)__builtin_s390_vfaebs(__a, __b, 12, __cc); +static inline __ATTRS_o_ai __vector __bool char +vec_find_any_ne_cc(__vector unsigned char __a, + __vector unsigned char __b, int *__cc) { + return (__vector __bool char)__builtin_s390_vfaebs(__a, __b, 12, __cc); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_ne_cc(vector signed short __a, vector signed short __b, - int *__cc) { - return (vector bool short) - __builtin_s390_vfaehs((vector unsigned short)__a, - (vector unsigned short)__b, 12, __cc); +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_ne_cc(__vector signed short __a, + __vector signed short __b, int *__cc) { + return (__vector __bool short) + __builtin_s390_vfaehs((__vector unsigned short)__a, + (__vector unsigned short)__b, 12, __cc); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_ne_cc(vector bool short __a, vector bool short __b, int *__cc) { - return (vector bool short) - __builtin_s390_vfaehs((vector unsigned short)__a, - (vector unsigned short)__b, 12, __cc); +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_ne_cc(__vector __bool short __a, + __vector __bool short __b, int *__cc) { + return (__vector __bool short) + __builtin_s390_vfaehs((__vector unsigned short)__a, + (__vector unsigned short)__b, 12, __cc); } -static inline __ATTRS_o_ai vector bool short -vec_find_any_ne_cc(vector unsigned short __a, vector unsigned short __b, - int *__cc) { - return (vector bool short)__builtin_s390_vfaehs(__a, __b, 12, __cc); +static inline __ATTRS_o_ai __vector __bool short +vec_find_any_ne_cc(__vector unsigned short __a, + __vector unsigned short __b, int *__cc) { + return (__vector __bool short)__builtin_s390_vfaehs(__a, __b, 12, __cc); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_ne_cc(vector signed int __a, vector signed int __b, int *__cc) { - return (vector bool int) - __builtin_s390_vfaefs((vector unsigned int)__a, - (vector unsigned int)__b, 12, __cc); +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_ne_cc(__vector signed int __a, + __vector signed int __b, int *__cc) { + return (__vector __bool int) + __builtin_s390_vfaefs((__vector unsigned int)__a, + (__vector unsigned int)__b, 12, __cc); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_ne_cc(vector bool int __a, vector bool int __b, int *__cc) { - return (vector bool int) - __builtin_s390_vfaefs((vector unsigned int)__a, - (vector unsigned int)__b, 12, __cc); +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_ne_cc(__vector __bool int __a, + __vector __bool int __b, int *__cc) { + return (__vector __bool int) + __builtin_s390_vfaefs((__vector unsigned int)__a, + (__vector unsigned int)__b, 12, __cc); } -static inline __ATTRS_o_ai vector bool int -vec_find_any_ne_cc(vector unsigned int __a, vector unsigned int __b, - int *__cc) { - return (vector bool int)__builtin_s390_vfaefs(__a, __b, 12, __cc); +static inline __ATTRS_o_ai __vector __bool int +vec_find_any_ne_cc(__vector unsigned int __a, + __vector unsigned int __b, int *__cc) { + return (__vector __bool int)__builtin_s390_vfaefs(__a, __b, 12, __cc); } /*-- vec_find_any_ne_idx ----------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_find_any_ne_idx(vector signed char __a, vector signed char __b) { - return (vector signed char) - __builtin_s390_vfaeb((vector unsigned char)__a, - (vector unsigned char)__b, 8); +static inline __ATTRS_o_ai __vector signed char +vec_find_any_ne_idx(__vector signed char __a, __vector signed char __b) { + return (__vector signed char) + __builtin_s390_vfaeb((__vector unsigned char)__a, + (__vector unsigned char)__b, 8); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_ne_idx(vector bool char __a, vector bool char __b) { - return __builtin_s390_vfaeb((vector unsigned char)__a, - (vector unsigned char)__b, 8); +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_ne_idx(__vector __bool char __a, __vector __bool char __b) { + return __builtin_s390_vfaeb((__vector unsigned char)__a, + (__vector unsigned char)__b, 8); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_ne_idx(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_ne_idx(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vfaeb(__a, __b, 8); } -static inline __ATTRS_o_ai vector signed short -vec_find_any_ne_idx(vector signed short __a, vector signed short __b) { - return (vector signed short) - __builtin_s390_vfaeh((vector unsigned short)__a, - (vector unsigned short)__b, 8); +static inline __ATTRS_o_ai __vector signed short +vec_find_any_ne_idx(__vector signed short __a, __vector signed short __b) { + return (__vector signed short) + __builtin_s390_vfaeh((__vector unsigned short)__a, + (__vector unsigned short)__b, 8); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_ne_idx(vector bool short __a, vector bool short __b) { - return __builtin_s390_vfaeh((vector unsigned short)__a, - (vector unsigned short)__b, 8); +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_ne_idx(__vector __bool short __a, __vector __bool short __b) { + return __builtin_s390_vfaeh((__vector unsigned short)__a, + (__vector unsigned short)__b, 8); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_ne_idx(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_ne_idx(__vector unsigned short __a, __vector unsigned short __b) { return __builtin_s390_vfaeh(__a, __b, 8); } -static inline __ATTRS_o_ai vector signed int -vec_find_any_ne_idx(vector signed int __a, vector signed int __b) { - return (vector signed int) - __builtin_s390_vfaef((vector unsigned int)__a, - (vector unsigned int)__b, 8); +static inline __ATTRS_o_ai __vector signed int +vec_find_any_ne_idx(__vector signed int __a, __vector signed int __b) { + return (__vector signed int) + __builtin_s390_vfaef((__vector unsigned int)__a, + (__vector unsigned int)__b, 8); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_ne_idx(vector bool int __a, vector bool int __b) { - return __builtin_s390_vfaef((vector unsigned int)__a, - (vector unsigned int)__b, 8); +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_ne_idx(__vector __bool int __a, __vector __bool int __b) { + return __builtin_s390_vfaef((__vector unsigned int)__a, + (__vector unsigned int)__b, 8); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_ne_idx(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_ne_idx(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vfaef(__a, __b, 8); } /*-- vec_find_any_ne_idx_cc -------------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_find_any_ne_idx_cc(vector signed char __a, vector signed char __b, - int *__cc) { - return (vector signed char) - __builtin_s390_vfaebs((vector unsigned char)__a, - (vector unsigned char)__b, 8, __cc); +static inline __ATTRS_o_ai __vector signed char +vec_find_any_ne_idx_cc(__vector signed char __a, + __vector signed char __b, int *__cc) { + return (__vector signed char) + __builtin_s390_vfaebs((__vector unsigned char)__a, + (__vector unsigned char)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_ne_idx_cc(vector bool char __a, vector bool char __b, int *__cc) { - return __builtin_s390_vfaebs((vector unsigned char)__a, - (vector unsigned char)__b, 8, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_ne_idx_cc(__vector __bool char __a, + __vector __bool char __b, int *__cc) { + return __builtin_s390_vfaebs((__vector unsigned char)__a, + (__vector unsigned char)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_ne_idx_cc(vector unsigned char __a, vector unsigned char __b, +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_ne_idx_cc(__vector unsigned char __a, + __vector unsigned char __b, int *__cc) { return __builtin_s390_vfaebs(__a, __b, 8, __cc); } -static inline __ATTRS_o_ai vector signed short -vec_find_any_ne_idx_cc(vector signed short __a, vector signed short __b, - int *__cc) { - return (vector signed short) - __builtin_s390_vfaehs((vector unsigned short)__a, - (vector unsigned short)__b, 8, __cc); +static inline __ATTRS_o_ai __vector signed short +vec_find_any_ne_idx_cc(__vector signed short __a, + __vector signed short __b, int *__cc) { + return (__vector signed short) + __builtin_s390_vfaehs((__vector unsigned short)__a, + (__vector unsigned short)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_ne_idx_cc(vector bool short __a, vector bool short __b, - int *__cc) { - return __builtin_s390_vfaehs((vector unsigned short)__a, - (vector unsigned short)__b, 8, __cc); +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_ne_idx_cc(__vector __bool short __a, + __vector __bool short __b, int *__cc) { + return __builtin_s390_vfaehs((__vector unsigned short)__a, + (__vector unsigned short)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_ne_idx_cc(vector unsigned short __a, vector unsigned short __b, - int *__cc) { +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_ne_idx_cc(__vector unsigned short __a, + __vector unsigned short __b, int *__cc) { return __builtin_s390_vfaehs(__a, __b, 8, __cc); } -static inline __ATTRS_o_ai vector signed int -vec_find_any_ne_idx_cc(vector signed int __a, vector signed int __b, - int *__cc) { - return (vector signed int) - __builtin_s390_vfaefs((vector unsigned int)__a, - (vector unsigned int)__b, 8, __cc); +static inline __ATTRS_o_ai __vector signed int +vec_find_any_ne_idx_cc(__vector signed int __a, + __vector signed int __b, int *__cc) { + return (__vector signed int) + __builtin_s390_vfaefs((__vector unsigned int)__a, + (__vector unsigned int)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_ne_idx_cc(vector bool int __a, vector bool int __b, int *__cc) { - return __builtin_s390_vfaefs((vector unsigned int)__a, - (vector unsigned int)__b, 8, __cc); +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_ne_idx_cc(__vector __bool int __a, + __vector __bool int __b, int *__cc) { + return __builtin_s390_vfaefs((__vector unsigned int)__a, + (__vector unsigned int)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_ne_idx_cc(vector unsigned int __a, vector unsigned int __b, - int *__cc) { +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_ne_idx_cc(__vector unsigned int __a, + __vector unsigned int __b, int *__cc) { return __builtin_s390_vfaefs(__a, __b, 8, __cc); } /*-- vec_find_any_ne_or_0_idx -----------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_find_any_ne_or_0_idx(vector signed char __a, vector signed char __b) { - return (vector signed char) - __builtin_s390_vfaezb((vector unsigned char)__a, - (vector unsigned char)__b, 8); +static inline __ATTRS_o_ai __vector signed char +vec_find_any_ne_or_0_idx(__vector signed char __a, + __vector signed char __b) { + return (__vector signed char) + __builtin_s390_vfaezb((__vector unsigned char)__a, + (__vector unsigned char)__b, 8); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_ne_or_0_idx(vector bool char __a, vector bool char __b) { - return __builtin_s390_vfaezb((vector unsigned char)__a, - (vector unsigned char)__b, 8); +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_ne_or_0_idx(__vector __bool char __a, + __vector __bool char __b) { + return __builtin_s390_vfaezb((__vector unsigned char)__a, + (__vector unsigned char)__b, 8); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_ne_or_0_idx(vector unsigned char __a, vector unsigned char __b) { +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_ne_or_0_idx(__vector unsigned char __a, + __vector unsigned char __b) { return __builtin_s390_vfaezb(__a, __b, 8); } -static inline __ATTRS_o_ai vector signed short -vec_find_any_ne_or_0_idx(vector signed short __a, vector signed short __b) { - return (vector signed short) - __builtin_s390_vfaezh((vector unsigned short)__a, - (vector unsigned short)__b, 8); +static inline __ATTRS_o_ai __vector signed short +vec_find_any_ne_or_0_idx(__vector signed short __a, + __vector signed short __b) { + return (__vector signed short) + __builtin_s390_vfaezh((__vector unsigned short)__a, + (__vector unsigned short)__b, 8); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_ne_or_0_idx(vector bool short __a, vector bool short __b) { - return __builtin_s390_vfaezh((vector unsigned short)__a, - (vector unsigned short)__b, 8); +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_ne_or_0_idx(__vector __bool short __a, + __vector __bool short __b) { + return __builtin_s390_vfaezh((__vector unsigned short)__a, + (__vector unsigned short)__b, 8); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_ne_or_0_idx(vector unsigned short __a, vector unsigned short __b) { +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_ne_or_0_idx(__vector unsigned short __a, + __vector unsigned short __b) { return __builtin_s390_vfaezh(__a, __b, 8); } -static inline __ATTRS_o_ai vector signed int -vec_find_any_ne_or_0_idx(vector signed int __a, vector signed int __b) { - return (vector signed int) - __builtin_s390_vfaezf((vector unsigned int)__a, - (vector unsigned int)__b, 8); +static inline __ATTRS_o_ai __vector signed int +vec_find_any_ne_or_0_idx(__vector signed int __a, + __vector signed int __b) { + return (__vector signed int) + __builtin_s390_vfaezf((__vector unsigned int)__a, + (__vector unsigned int)__b, 8); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_ne_or_0_idx(vector bool int __a, vector bool int __b) { - return __builtin_s390_vfaezf((vector unsigned int)__a, - (vector unsigned int)__b, 8); +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_ne_or_0_idx(__vector __bool int __a, + __vector __bool int __b) { + return __builtin_s390_vfaezf((__vector unsigned int)__a, + (__vector unsigned int)__b, 8); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_ne_or_0_idx(vector unsigned int __a, vector unsigned int __b) { +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_ne_or_0_idx(__vector unsigned int __a, + __vector unsigned int __b) { return __builtin_s390_vfaezf(__a, __b, 8); } /*-- vec_find_any_ne_or_0_idx_cc --------------------------------------------*/ -static inline __ATTRS_o_ai vector signed char -vec_find_any_ne_or_0_idx_cc(vector signed char __a, vector signed char __b, - int *__cc) { - return (vector signed char) - __builtin_s390_vfaezbs((vector unsigned char)__a, - (vector unsigned char)__b, 8, __cc); +static inline __ATTRS_o_ai __vector signed char +vec_find_any_ne_or_0_idx_cc(__vector signed char __a, + __vector signed char __b, int *__cc) { + return (__vector signed char) + __builtin_s390_vfaezbs((__vector unsigned char)__a, + (__vector unsigned char)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_ne_or_0_idx_cc(vector bool char __a, vector bool char __b, - int *__cc) { - return __builtin_s390_vfaezbs((vector unsigned char)__a, - (vector unsigned char)__b, 8, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_ne_or_0_idx_cc(__vector __bool char __a, + __vector __bool char __b, int *__cc) { + return __builtin_s390_vfaezbs((__vector unsigned char)__a, + (__vector unsigned char)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_find_any_ne_or_0_idx_cc(vector unsigned char __a, vector unsigned char __b, - int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_find_any_ne_or_0_idx_cc(__vector unsigned char __a, + __vector unsigned char __b, int *__cc) { return __builtin_s390_vfaezbs(__a, __b, 8, __cc); } -static inline __ATTRS_o_ai vector signed short -vec_find_any_ne_or_0_idx_cc(vector signed short __a, vector signed short __b, - int *__cc) { - return (vector signed short) - __builtin_s390_vfaezhs((vector unsigned short)__a, - (vector unsigned short)__b, 8, __cc); +static inline __ATTRS_o_ai __vector signed short +vec_find_any_ne_or_0_idx_cc(__vector signed short __a, + __vector signed short __b, int *__cc) { + return (__vector signed short) + __builtin_s390_vfaezhs((__vector unsigned short)__a, + (__vector unsigned short)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_ne_or_0_idx_cc(vector bool short __a, vector bool short __b, - int *__cc) { - return __builtin_s390_vfaezhs((vector unsigned short)__a, - (vector unsigned short)__b, 8, __cc); +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_ne_or_0_idx_cc(__vector __bool short __a, + __vector __bool short __b, int *__cc) { + return __builtin_s390_vfaezhs((__vector unsigned short)__a, + (__vector unsigned short)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned short -vec_find_any_ne_or_0_idx_cc(vector unsigned short __a, - vector unsigned short __b, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned short +vec_find_any_ne_or_0_idx_cc(__vector unsigned short __a, + __vector unsigned short __b, int *__cc) { return __builtin_s390_vfaezhs(__a, __b, 8, __cc); } -static inline __ATTRS_o_ai vector signed int -vec_find_any_ne_or_0_idx_cc(vector signed int __a, vector signed int __b, - int *__cc) { - return (vector signed int) - __builtin_s390_vfaezfs((vector unsigned int)__a, - (vector unsigned int)__b, 8, __cc); +static inline __ATTRS_o_ai __vector signed int +vec_find_any_ne_or_0_idx_cc(__vector signed int __a, + __vector signed int __b, int *__cc) { + return (__vector signed int) + __builtin_s390_vfaezfs((__vector unsigned int)__a, + (__vector unsigned int)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_ne_or_0_idx_cc(vector bool int __a, vector bool int __b, - int *__cc) { - return __builtin_s390_vfaezfs((vector unsigned int)__a, - (vector unsigned int)__b, 8, __cc); +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_ne_or_0_idx_cc(__vector __bool int __a, + __vector __bool int __b, int *__cc) { + return __builtin_s390_vfaezfs((__vector unsigned int)__a, + (__vector unsigned int)__b, 8, __cc); } -static inline __ATTRS_o_ai vector unsigned int -vec_find_any_ne_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, - int *__cc) { +static inline __ATTRS_o_ai __vector unsigned int +vec_find_any_ne_or_0_idx_cc(__vector unsigned int __a, + __vector unsigned int __b, int *__cc) { return __builtin_s390_vfaezfs(__a, __b, 8, __cc); } @@ -10711,63 +10847,63 @@ vec_find_any_ne_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, #if __ARCH__ >= 13 -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_cc(vector signed char __a, vector signed char __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrsb((vector unsigned char)__a, - (vector unsigned char)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_cc(__vector signed char __a, __vector signed char __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsb((__vector unsigned char)__a, + (__vector unsigned char)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_cc(vector bool char __a, vector bool char __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrsb((vector unsigned char)__a, - (vector unsigned char)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_cc(__vector __bool char __a, __vector __bool char __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsb((__vector unsigned char)__a, + (__vector unsigned char)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_cc(vector unsigned char __a, vector unsigned char __b, - vector unsigned char __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_cc(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsb(__a, __b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_cc(vector signed short __a, vector signed short __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrsh((vector unsigned short)__a, - (vector unsigned short)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_cc(__vector signed short __a, __vector signed short __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsh((__vector unsigned short)__a, + (__vector unsigned short)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_cc(vector bool short __a, vector bool short __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrsh((vector unsigned short)__a, - (vector unsigned short)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_cc(__vector __bool short __a, __vector __bool short __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsh((__vector unsigned short)__a, + (__vector unsigned short)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_cc(vector unsigned short __a, vector unsigned short __b, - vector unsigned char __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_cc(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsh(__a, __b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_cc(vector signed int __a, vector signed int __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrsf((vector unsigned int)__a, - (vector unsigned int)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_cc(__vector signed int __a, __vector signed int __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsf((__vector unsigned int)__a, + (__vector unsigned int)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_cc(vector bool int __a, vector bool int __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrsf((vector unsigned int)__a, - (vector unsigned int)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_cc(__vector __bool int __a, __vector __bool int __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsf((__vector unsigned int)__a, + (__vector unsigned int)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_cc(vector unsigned int __a, vector unsigned int __b, - vector unsigned char __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_cc(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrsf(__a, __b, __c, __cc); } @@ -10777,72 +10913,72 @@ vec_search_string_cc(vector unsigned int __a, vector unsigned int __b, #if __ARCH__ >= 13 -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_until_zero_cc(vector signed char __a, - vector signed char __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrszb((vector unsigned char)__a, - (vector unsigned char)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_until_zero_cc(__vector signed char __a, + __vector signed char __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszb((__vector unsigned char)__a, + (__vector unsigned char)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_until_zero_cc(vector bool char __a, - vector bool char __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrszb((vector unsigned char)__a, - (vector unsigned char)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_until_zero_cc(__vector __bool char __a, + __vector __bool char __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszb((__vector unsigned char)__a, + (__vector unsigned char)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_until_zero_cc(vector unsigned char __a, - vector unsigned char __b, - vector unsigned char __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_until_zero_cc(__vector unsigned char __a, + __vector unsigned char __b, + __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszb(__a, __b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_until_zero_cc(vector signed short __a, - vector signed short __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrszh((vector unsigned short)__a, - (vector unsigned short)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_until_zero_cc(__vector signed short __a, + __vector signed short __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszh((__vector unsigned short)__a, + (__vector unsigned short)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_until_zero_cc(vector bool short __a, - vector bool short __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrszh((vector unsigned short)__a, - (vector unsigned short)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_until_zero_cc(__vector __bool short __a, + __vector __bool short __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszh((__vector unsigned short)__a, + (__vector unsigned short)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_until_zero_cc(vector unsigned short __a, - vector unsigned short __b, - vector unsigned char __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_until_zero_cc(__vector unsigned short __a, + __vector unsigned short __b, + __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszh(__a, __b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_until_zero_cc(vector signed int __a, - vector signed int __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrszf((vector unsigned int)__a, - (vector unsigned int)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_until_zero_cc(__vector signed int __a, + __vector signed int __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszf((__vector unsigned int)__a, + (__vector unsigned int)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_until_zero_cc(vector bool int __a, - vector bool int __b, - vector unsigned char __c, int *__cc) { - return __builtin_s390_vstrszf((vector unsigned int)__a, - (vector unsigned int)__b, __c, __cc); +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_until_zero_cc(__vector __bool int __a, + __vector __bool int __b, + __vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszf((__vector unsigned int)__a, + (__vector unsigned int)__b, __c, __cc); } -static inline __ATTRS_o_ai vector unsigned char -vec_search_string_until_zero_cc(vector unsigned int __a, - vector unsigned int __b, - vector unsigned char __c, int *__cc) { +static inline __ATTRS_o_ai __vector unsigned char +vec_search_string_until_zero_cc(__vector unsigned int __a, + __vector unsigned int __b, + __vector unsigned char __c, int *__cc) { return __builtin_s390_vstrszf(__a, __b, __c, __cc); } diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h index c2c57cadfdf24..612aec139723e 100644 --- a/clang/lib/Headers/wasm_simd128.h +++ b/clang/lib/Headers/wasm_simd128.h @@ -1007,20 +1007,6 @@ wasm_u32x4_trunc_saturate_f32x4(v128_t __a) { return (v128_t)__builtin_wasm_trunc_saturate_u_i32x4_f32x4((__f32x4)__a); } -#ifdef __wasm_unimplemented_simd128__ - -static __inline__ v128_t __DEFAULT_FN_ATTRS -wasm_i64x2_trunc_saturate_f64x2(v128_t __a) { - return (v128_t)__builtin_wasm_trunc_saturate_s_i64x2_f64x2((__f64x2)__a); -} - -static __inline__ v128_t __DEFAULT_FN_ATTRS -wasm_u64x2_trunc_saturate_f64x2(v128_t __a) { - return (v128_t)__builtin_wasm_trunc_saturate_s_i64x2_f64x2((__f64x2)__a); -} - -#endif // __wasm_unimplemented_simd128__ - static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_convert_i32x4(v128_t __a) { return (v128_t) __builtin_convertvector((__i32x4)__a, __f32x4); @@ -1031,20 +1017,6 @@ wasm_f32x4_convert_u32x4(v128_t __a) { return (v128_t) __builtin_convertvector((__u32x4)__a, __f32x4); } -#ifdef __wasm_unimplemented_simd128__ - -static __inline__ v128_t __DEFAULT_FN_ATTRS -wasm_f64x2_convert_i64x2(v128_t __a) { - return (v128_t) __builtin_convertvector((__i64x2)__a, __f64x2); -} - -static __inline__ v128_t __DEFAULT_FN_ATTRS -wasm_f64x2_convert_u64x2(v128_t __a) { - return (v128_t) __builtin_convertvector((__u64x2)__a, __f64x2); -} - -#endif // __wasm_unimplemented_simd128__ - #define wasm_v8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \ __c7, __c8, __c9, __c10, __c11, __c12, __c13, \ __c14, __c15) \ diff --git a/clang/lib/Headers/x86intrin.h b/clang/lib/Headers/x86intrin.h index a8b36622d4101..768d0e56ab057 100644 --- a/clang/lib/Headers/x86intrin.h +++ b/clang/lib/Headers/x86intrin.h @@ -14,39 +14,48 @@ #include -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__3dNOW__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__3dNOW__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PRFCHW__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__PRFCHW__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE4A__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__SSE4A__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA4__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__FMA4__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XOP__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__XOP__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__TBM__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__TBM__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LWP__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__LWP__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MWAITX__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__MWAITX__) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__) +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__CLZERO__) #include #endif diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 1199f75f8a3c9..3ac1df1740c81 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -1219,9 +1219,11 @@ HeaderSearch::getExistingFileInfo(const FileEntry *FE, } bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) { - // Check if we've ever seen this file as a header. + // Check if we've entered this file and found an include guard or #pragma + // once. Note that we dor't check for #import, because that's not a property + // of the file itself. if (auto *HFI = getExistingFileInfo(File)) - return HFI->isPragmaOnce || HFI->isImport || HFI->ControllingMacro || + return HFI->isPragmaOnce || HFI->ControllingMacro || HFI->ControllingMacroID; return false; } @@ -1399,6 +1401,16 @@ HeaderSearch::findModuleForHeader(const FileEntry *File, return ModMap.findModuleForHeader(File, AllowTextual); } +ArrayRef +HeaderSearch::findAllModulesForHeader(const FileEntry *File) const { + if (ExternalSource) { + // Make sure the external source has handled header info about this file, + // which includes whether the file is part of a module. + (void)getExistingFileInfo(File); + } + return ModMap.findAllModulesForHeader(File); +} + static bool suggestModule(HeaderSearch &HS, const FileEntry *File, Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule) { diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 4f7d5ab137e64..85bf93ac99491 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -662,7 +662,20 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(const FileEntry *File) { } ArrayRef -ModuleMap::findAllModulesForHeader(const FileEntry *File) const { +ModuleMap::findAllModulesForHeader(const FileEntry *File) { + HeadersMap::iterator Known = findKnownHeader(File); + if (Known != Headers.end()) + return Known->second; + + if (findOrCreateModuleForHeaderInUmbrellaDir(File)) + return Headers.find(File)->second; + + return None; +} + +ArrayRef +ModuleMap::findResolvedModulesForHeader(const FileEntry *File) const { + // FIXME: Is this necessary? resolveHeaderDirectives(File); auto It = Headers.find(File); if (It == Headers.end()) diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 38e5777a0dd3c..aab9038d25e00 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -646,24 +646,8 @@ Module *Preprocessor::getModuleForLocation(SourceLocation Loc) { } const FileEntry * -Preprocessor::getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc, - Module *M, - SourceLocation Loc) { - assert(M && "no module to include"); - - // If the context is the global module fragment of some module, we never - // want to return that file; instead, we want the innermost include-guarded - // header that it included. - bool InGlobalModuleFragment = M->Kind == Module::GlobalModuleFragment; - - // If we have a module import syntax, we shouldn't include a header to - // make a particular module visible. - if ((getLangOpts().ObjC || getLangOpts().CPlusPlusModules || - getLangOpts().ModulesTS) && - !InGlobalModuleFragment) - return nullptr; - - Module *TopM = M->getTopLevelModule(); +Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, + SourceLocation Loc) { Module *IncM = getModuleForLocation(IncLoc); // Walk up through the include stack, looking through textual headers of M @@ -677,37 +661,50 @@ Preprocessor::getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc, if (!FE) break; - if (InGlobalModuleFragment) { - if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE)) - return FE; - Loc = SM.getIncludeLoc(ID); - continue; - } - - bool InTextualHeader = false; - for (auto Header : HeaderInfo.getModuleMap().findAllModulesForHeader(FE)) { - if (!Header.getModule()->isSubModuleOf(TopM)) - continue; - - if (!(Header.getRole() & ModuleMap::TextualHeader)) { - // If this is an accessible, non-textual header of M's top-level module - // that transitively includes the given location and makes the - // corresponding module visible, this is the thing to #include. - if (Header.isAccessibleFrom(IncM)) - return FE; + // We want to find all possible modules that might contain this header, so + // search all enclosing directories for module maps and load them. + HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr, + SourceMgr.isInSystemHeader(Loc)); + bool InPrivateHeader = false; + for (auto Header : HeaderInfo.findAllModulesForHeader(FE)) { + if (!Header.isAccessibleFrom(IncM)) { // It's in a private header; we can't #include it. // FIXME: If there's a public header in some module that re-exports it, // then we could suggest including that, but it's not clear that's the // expected way to make this entity visible. + InPrivateHeader = true; continue; } - InTextualHeader = true; + // We'll suggest including textual headers below if they're + // include-guarded. + if (Header.getRole() & ModuleMap::TextualHeader) + continue; + + // If we have a module import syntax, we shouldn't include a header to + // make a particular module visible. Let the caller know they should + // suggest an import instead. + if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules || + getLangOpts().ModulesTS) + return nullptr; + + // If this is an accessible, non-textual header of M's top-level module + // that transitively includes the given location and makes the + // corresponding module visible, this is the thing to #include. + return FE; } - if (!InTextualHeader) - break; + // FIXME: If we're bailing out due to a private header, we shouldn't suggest + // an import either. + if (InPrivateHeader) + return nullptr; + + // If the header is includable and has an include guard, assume the + // intended way to expose its contents is by #include, not by importing a + // module that transitively includes it. + if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE)) + return FE; Loc = SM.getIncludeLoc(ID); } @@ -1918,14 +1915,18 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( SourceLocation FilenameLoc = FilenameTok.getLocation(); StringRef LookupFilename = Filename; -#ifndef _WIN32 +#ifdef _WIN32 + llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::windows; +#else // Normalize slashes when compiling with -fms-extensions on non-Windows. This // is unnecessary on Windows since the filesystem there handles backslashes. SmallString<128> NormalizedPath; + llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::posix; if (LangOpts.MicrosoftExt) { NormalizedPath = Filename.str(); llvm::sys::path::native(NormalizedPath); LookupFilename = NormalizedPath; + BackslashStyle = llvm::sys::path::Style::windows; } #endif @@ -2110,21 +2111,44 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( SmallString<128> Path; Path.reserve(Name.size()+2); Path.push_back(isAngled ? '<' : '"'); - bool isLeadingSeparator = llvm::sys::path::is_absolute(Name); + + const auto IsSep = [BackslashStyle](char c) { + return llvm::sys::path::is_separator(c, BackslashStyle); + }; + for (auto Component : Components) { - if (isLeadingSeparator) - isLeadingSeparator = false; - else + // On POSIX, Components will contain a single '/' as first element + // exactly if Name is an absolute path. + // On Windows, it will contain "C:" followed by '\' for absolute paths. + // The drive letter is optional for absolute paths on Windows, but + // clang currently cannot process absolute paths in #include lines that + // don't have a drive. + // If the first entry in Components is a directory separator, + // then the code at the bottom of this loop that keeps the original + // directory separator style copies it. If the second entry is + // a directory separator (the C:\ case), then that separator already + // got copied when the C: was processed and we want to skip that entry. + if (!(Component.size() == 1 && IsSep(Component[0]))) Path.append(Component); - // Append the separator the user used, or the close quote - Path.push_back( - Path.size() <= Filename.size() ? Filename[Path.size()-1] : - (isAngled ? '>' : '"')); + else if (!Path.empty()) + continue; + + // Append the separator(s) the user used, or the close quote + if (Path.size() > Filename.size()) { + Path.push_back(isAngled ? '>' : '"'); + continue; + } + assert(IsSep(Filename[Path.size()-1])); + do + Path.push_back(Filename[Path.size()-1]); + while (Path.size() <= Filename.size() && IsSep(Filename[Path.size()-1])); } - // For user files and known standard headers, by default we issue a diagnostic. - // For other system headers, we don't. They can be controlled separately. - auto DiagId = (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name)) ? - diag::pp_nonportable_path : diag::pp_nonportable_system_path; + // For user files and known standard headers, issue a diagnostic. + // For other system headers, don't. They can be controlled separately. + auto DiagId = + (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name)) + ? diag::pp_nonportable_path + : diag::pp_nonportable_system_path; Diag(FilenameTok, DiagId) << Path << FixItHint::CreateReplacement(FilenameRange, Path); } @@ -2804,7 +2828,9 @@ void Preprocessor::HandleDefineDirective( // warn-because-unused-macro set. If it gets used it will be removed from set. if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) && !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) && - !MacroExpansionInDirectivesOverride) { + !MacroExpansionInDirectivesOverride && + getSourceManager().getFileID(MI->getDefinitionLoc()) != + getPredefinesFileID()) { MI->setIsWarnIfUnused(true); WarnUnusedMacroLocs.insert(MI->getDefinitionLoc()); } diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index 57a95815488eb..8d93eb3d6cdc4 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -71,31 +71,36 @@ void EmptyPragmaHandler::HandlePragma(Preprocessor &PP, // PragmaNamespace Implementation. //===----------------------------------------------------------------------===// -PragmaNamespace::~PragmaNamespace() { - llvm::DeleteContainerSeconds(Handlers); -} - /// FindHandler - Check to see if there is already a handler for the /// specified name. If not, return the handler for the null identifier if it /// exists, otherwise return null. If IgnoreNull is true (the default) then /// the null handler isn't returned on failure to match. PragmaHandler *PragmaNamespace::FindHandler(StringRef Name, bool IgnoreNull) const { - if (PragmaHandler *Handler = Handlers.lookup(Name)) - return Handler; - return IgnoreNull ? nullptr : Handlers.lookup(StringRef()); + auto I = Handlers.find(Name); + if (I != Handlers.end()) + return I->getValue().get(); + if (IgnoreNull) + return nullptr; + I = Handlers.find(StringRef()); + if (I != Handlers.end()) + return I->getValue().get(); + return nullptr; } void PragmaNamespace::AddPragma(PragmaHandler *Handler) { - assert(!Handlers.lookup(Handler->getName()) && + assert(!Handlers.count(Handler->getName()) && "A handler with this name is already registered in this namespace"); - Handlers[Handler->getName()] = Handler; + Handlers[Handler->getName()].reset(Handler); } void PragmaNamespace::RemovePragmaHandler(PragmaHandler *Handler) { - assert(Handlers.lookup(Handler->getName()) && + auto I = Handlers.find(Handler->getName()); + assert(I != Handlers.end() && "Handler not registered in this namespace"); - Handlers.erase(Handler->getName()); + // Release ownership back to the caller. + I->getValue().release(); + Handlers.erase(I); } void PragmaNamespace::HandlePragma(Preprocessor &PP, diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 961b55c9387d6..105aa6683c8b5 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -774,6 +774,10 @@ static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, #define CXX20_KEYWORD(NAME, FLAGS) \ .Case(#NAME, diag::warn_cxx20_keyword) #include "clang/Basic/TokenKinds.def" + // char8_t is not modeled as a CXX20_KEYWORD because it's not + // unconditionally enabled in C++20 mode. (It can be disabled + // by -fno-char8_t.) + .Case("char8_t", diag::warn_cxx20_keyword) ; llvm_unreachable( diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index b5a00e559bffe..66fc7623bde9f 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2743,7 +2743,7 @@ bool Parser::ParseImplicitInt(DeclSpec &DS, CXXScopeSpec *SS, default: // This is probably supposed to be a type. This includes cases like: // int f(itn); - // struct S { unsinged : 4; }; + // struct S { unsigned : 4; }; break; } } diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 0c0ef154805c9..19acfe1158fbd 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2436,6 +2436,50 @@ bool Parser::ParseOpenMPSimpleVarList( return !IsCorrect; } +OMPClause *Parser::ParseOpenMPUsesAllocatorClause(OpenMPDirectiveKind DKind) { + SourceLocation Loc = Tok.getLocation(); + ConsumeAnyToken(); + + // Parse '('. + BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); + if (T.expectAndConsume(diag::err_expected_lparen_after, "uses_allocator")) + return nullptr; + SmallVector Data; + do { + ExprResult Allocator = ParseCXXIdExpression(); + if (Allocator.isInvalid()) { + SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end, + StopBeforeMatch); + break; + } + Sema::UsesAllocatorsData &D = Data.emplace_back(); + D.Allocator = Allocator.get(); + if (Tok.is(tok::l_paren)) { + BalancedDelimiterTracker T(*this, tok::l_paren, + tok::annot_pragma_openmp_end); + T.consumeOpen(); + ExprResult AllocatorTraits = ParseCXXIdExpression(); + T.consumeClose(); + if (AllocatorTraits.isInvalid()) { + SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end, + StopBeforeMatch); + break; + } + D.AllocatorTraits = AllocatorTraits.get(); + D.LParenLoc = T.getOpenLocation(); + D.RParenLoc = T.getCloseLocation(); + } + if (Tok.isNot(tok::comma) && Tok.isNot(tok::r_paren)) + Diag(Tok, diag::err_omp_expected_punc) << "uses_allocators" << 0; + // Parse ',' + if (Tok.is(tok::comma)) + ConsumeAnyToken(); + } while (Tok.isNot(tok::r_paren) && Tok.isNot(tok::annot_pragma_openmp_end)); + T.consumeClose(); + return Actions.ActOnOpenMPUsesAllocatorClause(Loc, T.getOpenLocation(), + T.getCloseLocation(), Data); +} + /// Parsing of OpenMP clauses. /// /// clause: @@ -2453,7 +2497,7 @@ bool Parser::ParseOpenMPSimpleVarList( /// in_reduction-clause | allocator-clause | allocate-clause | /// acq_rel-clause | acquire-clause | release-clause | relaxed-clause | /// depobj-clause | destroy-clause | detach-clause | inclusive-clause | -/// exclusive-clause +/// exclusive-clause | uses_allocators-clause /// OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, OpenMPClauseKind CKind, bool FirstClause) { @@ -2626,6 +2670,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_exclusive: Clause = ParseOpenMPVarListClause(DKind, CKind, WrongDirective); break; + case OMPC_uses_allocators: + Clause = ParseOpenMPUsesAllocatorClause(DKind); + break; case OMPC_device_type: case OMPC_unknown: skipUntilPragmaOpenMPEnd(DKind); diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index f3e5f3cc632e6..36e67d2c9cb71 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -10,6 +10,7 @@ if (MSVC) set_source_files_properties(SemaSYCL.cpp PROPERTIES COMPILE_FLAGS /bigobj) set_source_files_properties(SemaTemplate.cpp PROPERTIES COMPILE_FLAGS /bigobj) set_source_files_properties(SemaTemplateDeduction.cpp PROPERTIES COMPILE_FLAGS /bigobj) + set_source_files_properties(SemaOpenMP.cpp PROPERTIES COMPILE_FLAGS /bigobj) endif() clang_tablegen(OpenCLBuiltins.inc -gen-clang-opencl-builtins diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index faab250e58ffd..73d190891b0fe 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -426,6 +426,10 @@ bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { if (CD->getParent()->isDynamicClass()) return false; + // Union ctor does not call ctors of its data members. + if (CD->getParent()->isUnion()) + return true; + // The only form of initializer allowed is an empty constructor. // This will recursively check all base classes and member initializers if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { @@ -465,6 +469,11 @@ bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { if (ClassDecl->isDynamicClass()) return false; + // Union does not have base class and union dtor does not call dtors of its + // data members. + if (DD->getParent()->isUnion()) + return true; + // Only empty destructors are allowed. This will recursively check // destructors for all base classes... if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 23059bbd329ac..878a25e227b58 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -1998,7 +1998,8 @@ static void handleCmseNSEntryAttr(Sema &S, Decl *D, const ParsedAttr &AL) { return; } - if (cast(D)->getStorageClass() == SC_Static) { + const auto *FD = cast(D); + if (!FD->isExternallyVisible()) { S.Diag(AL.getLoc(), diag::warn_attribute_cmse_entry_static); return; } diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 84cc6f0885ed3..58455d5019b57 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -2821,13 +2821,13 @@ void Sema::BuildBasePathArray(const CXXBasePaths &Paths, /// if there is an error, and Range is the source range to highlight /// if there is an error. /// -/// If either InaccessibleBaseID or AmbigiousBaseConvID are 0, then the +/// If either InaccessibleBaseID or AmbiguousBaseConvID are 0, then the /// diagnostic for the respective type of error will be suppressed, but the /// check for ill-formed code will still be performed. bool Sema::CheckDerivedToBaseConversion(QualType Derived, QualType Base, unsigned InaccessibleBaseID, - unsigned AmbigiousBaseConvID, + unsigned AmbiguousBaseConvID, SourceLocation Loc, SourceRange Range, DeclarationName Name, CXXCastPath *BasePath, @@ -2853,7 +2853,7 @@ Sema::CheckDerivedToBaseConversion(QualType Derived, QualType Base, for (const CXXBasePath &PossiblePath : Paths) { if (PossiblePath.size() == 1) { Path = &PossiblePath; - if (AmbigiousBaseConvID) + if (AmbiguousBaseConvID) Diag(Loc, diag::ext_ms_ambiguous_direct_base) << Base << Derived << Range; break; @@ -2881,7 +2881,7 @@ Sema::CheckDerivedToBaseConversion(QualType Derived, QualType Base, return false; } - if (AmbigiousBaseConvID) { + if (AmbiguousBaseConvID) { // We know that the derived-to-base conversion is ambiguous, and // we're going to produce a diagnostic. Perform the derived-to-base // search just one more time to compute all of the possible paths so @@ -2900,7 +2900,7 @@ Sema::CheckDerivedToBaseConversion(QualType Derived, QualType Base, // to each base class subobject. std::string PathDisplayStr = getAmbiguousPathsDisplayString(Paths); - Diag(Loc, AmbigiousBaseConvID) + Diag(Loc, AmbiguousBaseConvID) << Derived << Base << PathDisplayStr << Range << Name; } return true; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 5f0976515065f..49f3e70ab14dd 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1501,7 +1501,7 @@ QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS, return LHSType; // ExtInt types aren't subject to conversions between them or normal integers, - // so this fails. + // so this fails. if(LHSType->isExtIntType() || RHSType->isExtIntType()) return QualType(); @@ -6601,6 +6601,18 @@ ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, if (NDecl) DiagnoseSentinelCalls(NDecl, LParenLoc, Args); + // Warn for unions passing across security boundary (CMSE). + if (FuncT != nullptr && FuncT->getCmseNSCallAttr()) { + for (unsigned i = 0, e = Args.size(); i != e; i++) { + if (const auto *RT = + dyn_cast(Args[i]->getType().getCanonicalType())) { + if (RT->getDecl()->isOrContainsUnion()) + Diag(Args[i]->getBeginLoc(), diag::warn_cmse_nonsecure_union) + << 0 << i; + } + } + } + // Do special checking on direct calls to functions. if (FDecl) { if (CheckFunctionCall(FDecl, TheCall, Proto)) @@ -9627,7 +9639,8 @@ static bool tryGCCVectorConvertAndSplat(Sema &S, ExprResult *Scalar, ScalarCast = CK_IntegralToFloating; } else return true; - } + } else if (ScalarTy->isEnumeralType()) + return true; // Adjust scalar if desired. if (Scalar) { diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 63f1442bc589c..f7e77d9661791 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -5130,20 +5130,19 @@ static bool evaluateTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc, if (RD && RD->isAbstract()) return false; - SmallVector OpaqueArgExprs; + llvm::BumpPtrAllocator OpaqueExprAllocator; SmallVector ArgExprs; ArgExprs.reserve(Args.size() - 1); for (unsigned I = 1, N = Args.size(); I != N; ++I) { QualType ArgTy = Args[I]->getType(); if (ArgTy->isObjectType() || ArgTy->isFunctionType()) ArgTy = S.Context.getRValueReferenceType(ArgTy); - OpaqueArgExprs.push_back( - OpaqueValueExpr(Args[I]->getTypeLoc().getBeginLoc(), - ArgTy.getNonLValueExprType(S.Context), - Expr::getValueKindForType(ArgTy))); + ArgExprs.push_back( + new (OpaqueExprAllocator.Allocate()) + OpaqueValueExpr(Args[I]->getTypeLoc().getBeginLoc(), + ArgTy.getNonLValueExprType(S.Context), + Expr::getValueKindForType(ArgTy))); } - for (Expr &E : OpaqueArgExprs) - ArgExprs.push_back(&E); // Perform the initialization in an unevaluated context within a SFINAE // trap at translation unit scope. diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp index 10a7cf3e22d0e..c327aa41c896b 100644 --- a/clang/lib/Sema/SemaExprObjC.cpp +++ b/clang/lib/Sema/SemaExprObjC.cpp @@ -894,6 +894,62 @@ ExprResult Sema::BuildObjCArrayLiteral(SourceRange SR, MultiExprArg Elements) { ArrayWithObjectsMethod, SR)); } +/// Check for duplicate keys in an ObjC dictionary literal. For instance: +/// NSDictionary *nd = @{ @"foo" : @"bar", @"foo" : @"baz" }; +static void +CheckObjCDictionaryLiteralDuplicateKeys(Sema &S, + ObjCDictionaryLiteral *Literal) { + if (Literal->isValueDependent() || Literal->isTypeDependent()) + return; + + // NSNumber has quite relaxed equality semantics (for instance, @YES is + // considered equal to @1.0). For now, ignore floating points and just do a + // bit-width and sign agnostic integer compare. + struct APSIntCompare { + bool operator()(const llvm::APSInt &LHS, const llvm::APSInt &RHS) const { + return llvm::APSInt::compareValues(LHS, RHS) < 0; + } + }; + + llvm::DenseMap StringKeys; + std::map IntegralKeys; + + auto checkOneKey = [&](auto &Map, const auto &Key, SourceLocation Loc) { + auto Pair = Map.insert({Key, Loc}); + if (!Pair.second) { + S.Diag(Loc, diag::warn_nsdictionary_duplicate_key); + S.Diag(Pair.first->second, diag::note_nsdictionary_duplicate_key_here); + } + }; + + for (unsigned Idx = 0, End = Literal->getNumElements(); Idx != End; ++Idx) { + Expr *Key = Literal->getKeyValueElement(Idx).Key->IgnoreParenImpCasts(); + + if (auto *StrLit = dyn_cast(Key)) { + StringRef Bytes = StrLit->getString()->getBytes(); + SourceLocation Loc = StrLit->getExprLoc(); + checkOneKey(StringKeys, Bytes, Loc); + } + + if (auto *BE = dyn_cast(Key)) { + Expr *Boxed = BE->getSubExpr(); + SourceLocation Loc = BE->getExprLoc(); + + // Check for @("foo"). + if (auto *Str = dyn_cast(Boxed->IgnoreParenImpCasts())) { + checkOneKey(StringKeys, Str->getBytes(), Loc); + continue; + } + + Expr::EvalResult Result; + if (Boxed->EvaluateAsInt(Result, S.getASTContext(), + Expr::SE_AllowSideEffects)) { + checkOneKey(IntegralKeys, Result.Val.getInt(), Loc); + } + } + } +} + ExprResult Sema::BuildObjCDictionaryLiteral(SourceRange SR, MutableArrayRef Elements) { SourceLocation Loc = SR.getBegin(); @@ -1061,12 +1117,14 @@ ExprResult Sema::BuildObjCDictionaryLiteral(SourceRange SR, HasPackExpansions = true; } - QualType Ty - = Context.getObjCObjectPointerType( - Context.getObjCInterfaceType(NSDictionaryDecl)); - return MaybeBindToTemporary(ObjCDictionaryLiteral::Create( - Context, Elements, HasPackExpansions, Ty, - DictionaryWithObjectsMethod, SR)); + QualType Ty = Context.getObjCObjectPointerType( + Context.getObjCInterfaceType(NSDictionaryDecl)); + + auto *Literal = + ObjCDictionaryLiteral::Create(Context, Elements, HasPackExpansions, Ty, + DictionaryWithObjectsMethod, SR); + CheckObjCDictionaryLiteralDuplicateKeys(*this, Literal); + return MaybeBindToTemporary(Literal); } ExprResult Sema::BuildObjCEncodeExpression(SourceLocation AtLoc, diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index 1aec1f7baab91..9e3fe6e87bc8e 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -5379,9 +5379,8 @@ void Sema::diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl, /// Get a "quoted.h" or include path to use in a diagnostic /// suggesting the addition of a #include of the specified file. -static std::string getIncludeStringForHeader(Preprocessor &PP, - const FileEntry *E, - llvm::StringRef IncludingFile) { +static std::string getHeaderNameForHeader(Preprocessor &PP, const FileEntry *E, + llvm::StringRef IncludingFile) { bool IsSystem = false; auto Path = PP.getHeaderSearchInfo().suggestPathToFileForDiagnostics( E, IncludingFile, &IsSystem); @@ -5395,25 +5394,10 @@ void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl, assert(!Modules.empty()); auto NotePrevious = [&] { - unsigned DiagID; - switch (MIK) { - case MissingImportKind::Declaration: - DiagID = diag::note_previous_declaration; - break; - case MissingImportKind::Definition: - DiagID = diag::note_previous_definition; - break; - case MissingImportKind::DefaultArgument: - DiagID = diag::note_default_argument_declared_here; - break; - case MissingImportKind::ExplicitSpecialization: - DiagID = diag::note_explicit_specialization_declared_here; - break; - case MissingImportKind::PartialSpecialization: - DiagID = diag::note_partial_specialization_declared_here; - break; - } - Diag(DeclLoc, DiagID); + // FIXME: Suppress the note backtrace even under + // -fdiagnostics-show-note-include-stack. We don't care how this + // declaration was previously reached. + Diag(DeclLoc, diag::note_unreachable_entity) << (int)MIK; }; // Weed out duplicates from module list. @@ -5426,26 +5410,24 @@ void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl, UniqueModules.push_back(M); } - llvm::StringRef IncludingFile; - if (const FileEntry *FE = - SourceMgr.getFileEntryForID(SourceMgr.getFileID(UseLoc))) - IncludingFile = FE->tryGetRealPathName(); + // Try to find a suitable header-name to #include. + std::string HeaderName; + if (const FileEntry *Header = + PP.getHeaderToIncludeForDiagnostics(UseLoc, DeclLoc)) { + if (const FileEntry *FE = + SourceMgr.getFileEntryForID(SourceMgr.getFileID(UseLoc))) + HeaderName = getHeaderNameForHeader(PP, Header, FE->tryGetRealPathName()); + } - if (UniqueModules.empty()) { - // All candidates were global module fragments. Try to suggest a #include. - const FileEntry *E = - PP.getModuleHeaderToIncludeForDiagnostics(UseLoc, Modules[0], DeclLoc); + // If we have a #include we should suggest, or if all definition locations + // were in global module fragments, don't suggest an import. + if (!HeaderName.empty() || UniqueModules.empty()) { // FIXME: Find a smart place to suggest inserting a #include, and add // a FixItHint there. - Diag(UseLoc, diag::err_module_unimported_use_global_module_fragment) - << (int)MIK << Decl << !!E - << (E ? getIncludeStringForHeader(PP, E, IncludingFile) : ""); - // Produce a "previous" note if it will point to a header rather than some - // random global module fragment. - // FIXME: Suppress the note backtrace even under - // -fdiagnostics-show-note-include-stack. - if (E) - NotePrevious(); + Diag(UseLoc, diag::err_module_unimported_use_header) + << (int)MIK << Decl << !HeaderName.empty() << HeaderName; + // Produce a note showing where the entity was declared. + NotePrevious(); if (Recover) createImplicitModuleImportForErrorRecovery(UseLoc, Modules[0]); return; @@ -5467,16 +5449,6 @@ void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl, Diag(UseLoc, diag::err_module_unimported_use_multiple) << (int)MIK << Decl << ModuleList; - } else if (const FileEntry *E = PP.getModuleHeaderToIncludeForDiagnostics( - UseLoc, Modules[0], DeclLoc)) { - // The right way to make the declaration visible is to include a header; - // suggest doing so. - // - // FIXME: Find a smart place to suggest inserting a #include, and add - // a FixItHint there. - Diag(UseLoc, diag::err_module_unimported_use_header) - << (int)MIK << Decl << Modules[0]->getFullModuleName() - << getIncludeStringForHeader(PP, E, IncludingFile); } else { // FIXME: Add a FixItHint that imports the corresponding module. Diag(UseLoc, diag::err_module_unimported_use) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index c675672f4f29e..82027b0e8d8ae 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -170,6 +170,7 @@ class DSAStackTy { llvm::SmallVector DeclareTargetLinkVarDecls; /// List of decls used in inclusive/exclusive clauses of the scan directive. llvm::DenseSet> UsedInScanDirective; + llvm::DenseSet> UsesAllocatorsDecls; SharingMapTy(OpenMPDirectiveKind DKind, DeclarationNameInfo Name, Scope *CurScope, SourceLocation Loc) : Directive(DKind), DirectiveName(Name), CurScope(CurScope), @@ -279,6 +280,8 @@ class DSAStackTy { QualType OMPDependT; /// omp_event_handle_t type. QualType OMPEventHandleT; + /// omp_alloctrait_t type. + QualType OMPAlloctraitT; /// Expression for the predefined allocators. Expr *OMPPredefinedAllocators[OMPAllocateDeclAttr::OMPUserDefinedMemAlloc] = { nullptr}; @@ -293,6 +296,10 @@ class DSAStackTy { void setOMPAllocatorHandleT(QualType Ty) { OMPAllocatorHandleT = Ty; } /// Gets omp_allocator_handle_t type. QualType getOMPAllocatorHandleT() const { return OMPAllocatorHandleT; } + /// Sets omp_alloctrait_t type. + void setOMPAlloctraitT(QualType Ty) { OMPAlloctraitT = Ty; } + /// Gets omp_alloctrait_t type. + QualType getOMPAlloctraitT() const { return OMPAlloctraitT; } /// Sets the given default allocator. void setAllocator(OMPAllocateDeclAttr::AllocatorTypeTy AllocatorKind, Expr *Allocator) { @@ -512,11 +519,15 @@ class DSAStackTy { getTopMostTaskgroupReductionData(const ValueDecl *D, SourceRange &SR, const Expr *&ReductionRef, Expr *&TaskgroupDescriptor) const; - /// Return reduction reference expression for the current taskgroup. + /// Return reduction reference expression for the current taskgroup or + /// parallel/worksharing directives with task reductions. Expr *getTaskgroupReductionRef() const { - assert(getTopOfStack().Directive == OMPD_taskgroup && - "taskgroup reference expression requested for non taskgroup " - "directive."); + assert((getTopOfStack().Directive == OMPD_taskgroup || + ((isOpenMPParallelDirective(getTopOfStack().Directive) || + isOpenMPWorksharingDirective(getTopOfStack().Directive)) && + !isOpenMPSimdDirective(getTopOfStack().Directive))) && + "taskgroup reference expression requested for non taskgroup or " + "parallel/worksharing directive."); return getTopOfStack().TaskgroupReductionRef; } /// Checks if the given \p VD declaration is actually a taskgroup reduction @@ -1006,6 +1017,19 @@ class DSAStackTy { bool isImplicitTaskFirstprivate(Decl *D) const { return getTopOfStack().ImplicitTaskFirstprivates.count(D) > 0; } + + /// Marks decl as used in uses_allocators clause as the allocator. + void addUsesAllocatorsDecl(const Decl *D) { + getTopOfStack().UsesAllocatorsDecls.insert(D); + } + /// Checks if specified decl is used in uses allocator clause as the + /// allocator. + bool isUsesAllocatorsDecl(unsigned Level, const Decl *D) const { + return getStackElemAtLevel(Level).UsesAllocatorsDecls.count(D) > 0; + } + bool isUsesAllocatorsDecl(const Decl *D) const { + return getTopOfStack().UsesAllocatorsDecls.count(D) > 0; + } }; bool isImplicitTaskingRegion(OpenMPDirectiveKind DKind) { @@ -1331,7 +1355,10 @@ void DSAStackTy::addTaskgroupReductionData(const ValueDecl *D, SourceRange SR, "Additional reduction info may be specified only for reduction items."); ReductionData &ReductionData = getTopOfStack().ReductionMap[D]; assert(ReductionData.ReductionRange.isInvalid() && - getTopOfStack().Directive == OMPD_taskgroup && + (getTopOfStack().Directive == OMPD_taskgroup || + ((isOpenMPParallelDirective(getTopOfStack().Directive) || + isOpenMPWorksharingDirective(getTopOfStack().Directive)) && + !isOpenMPSimdDirective(getTopOfStack().Directive))) && "Additional reduction info may be specified only once for reduction " "items."); ReductionData.set(BOK, SR); @@ -1354,7 +1381,10 @@ void DSAStackTy::addTaskgroupReductionData(const ValueDecl *D, SourceRange SR, "Additional reduction info may be specified only for reduction items."); ReductionData &ReductionData = getTopOfStack().ReductionMap[D]; assert(ReductionData.ReductionRange.isInvalid() && - getTopOfStack().Directive == OMPD_taskgroup && + (getTopOfStack().Directive == OMPD_taskgroup || + ((isOpenMPParallelDirective(getTopOfStack().Directive) || + isOpenMPWorksharingDirective(getTopOfStack().Directive)) && + !isOpenMPSimdDirective(getTopOfStack().Directive))) && "Additional reduction info may be specified only once for reduction " "items."); ReductionData.set(ReductionRef, SR); @@ -1375,7 +1405,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData( assert(!isStackEmpty() && "Data-sharing attributes stack is empty."); for (const_iterator I = begin() + 1, E = end(); I != E; ++I) { const DSAInfo &Data = I->SharingMap.lookup(D); - if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup) + if (Data.Attributes != OMPC_reduction || + Data.Modifier != OMPC_REDUCTION_task) continue; const ReductionData &ReductionData = I->ReductionMap.lookup(D); if (!ReductionData.ReductionOp || @@ -1387,8 +1418,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData( "expression for the descriptor is not " "set."); TaskgroupDescriptor = I->TaskgroupReductionRef; - return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(), - Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0); + return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(), + Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task); } return DSAVarData(); } @@ -1400,7 +1431,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData( assert(!isStackEmpty() && "Data-sharing attributes stack is empty."); for (const_iterator I = begin() + 1, E = end(); I != E; ++I) { const DSAInfo &Data = I->SharingMap.lookup(D); - if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup) + if (Data.Attributes != OMPC_reduction || + Data.Modifier != OMPC_REDUCTION_task) continue; const ReductionData &ReductionData = I->ReductionMap.lookup(D); if (!ReductionData.ReductionOp || @@ -1412,8 +1444,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData( "expression for the descriptor is not " "set."); TaskgroupDescriptor = I->TaskgroupReductionRef; - return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(), - Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0); + return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(), + Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task); } return DSAVarData(); } @@ -1990,10 +2022,13 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, ((IsVariableUsedInMapClause && DSAStack->getCaptureRegion(Level, OpenMPCaptureLevel) == OMPD_target) || - !DSAStack->hasExplicitDSA( - D, - [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; }, - Level, /*NotLastprivate=*/true)) && + !(DSAStack->hasExplicitDSA( + D, + [](OpenMPClauseKind K) -> bool { + return K == OMPC_firstprivate; + }, + Level, /*NotLastprivate=*/true) || + DSAStack->isUsesAllocatorsDecl(Level, D))) && // If the variable is artificial and must be captured by value - try to // capture by value. !(isa(D) && !D->hasAttr() && @@ -2206,7 +2241,12 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, // Consider taskgroup reduction descriptor variable a private // to avoid possible capture in the region. (DSAStack->hasExplicitDirective( - [](OpenMPDirectiveKind K) { return K == OMPD_taskgroup; }, + [](OpenMPDirectiveKind K) { + return K == OMPD_taskgroup || + ((isOpenMPParallelDirective(K) || + isOpenMPWorksharingDirective(K)) && + !isOpenMPSimdDirective(K)); + }, Level) && DSAStack->isTaskgroupReductionRef(D, Level))) ? OMPC_private @@ -2366,7 +2406,7 @@ getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, /// Check consistency of the reduction clauses. static void checkReductionClauses(Sema &S, DSAStackTy *Stack, - ArrayRef Clauses) { + ArrayRef Clauses) { bool InscanFound = false; SourceLocation InscanLoc; // OpenMP 5.0, 2.19.5.4 reduction Clause, Restrictions. @@ -2380,7 +2420,21 @@ static void checkReductionClauses(Sema &S, DSAStackTy *Stack, if (RC->getModifier() == OMPC_REDUCTION_inscan) { InscanFound = true; InscanLoc = RC->getModifierLoc(); - break; + continue; + } + if (RC->getModifier() == OMPC_REDUCTION_task) { + // OpenMP 5.0, 2.19.5.4 reduction Clause. + // A reduction clause with the task reduction-modifier may only appear on + // a parallel construct, a worksharing construct or a combined or + // composite construct for which any of the aforementioned constructs is a + // constituent construct and simd or loop are not constituent constructs. + OpenMPDirectiveKind CurDir = Stack->getCurrentDirective(); + if (!(isOpenMPParallelDirective(CurDir) || + isOpenMPWorksharingDirective(CurDir)) || + isOpenMPSimdDirective(CurDir)) + S.Diag(RC->getModifierLoc(), + diag::err_omp_reduction_task_not_parallel_or_worksharing); + continue; } } if (InscanFound) { @@ -2421,6 +2475,11 @@ static void checkAllocateClauses(Sema &S, DSAStackTy *Stack, static DeclRefExpr *buildCapture(Sema &S, ValueDecl *D, Expr *CaptureExpr, bool WithInit); +static void reportOriginalDsa(Sema &SemaRef, const DSAStackTy *Stack, + const ValueDecl *D, + const DSAStackTy::DSAVarData &DVar, + bool IsLoopIterVar = false); + void Sema::EndOpenMPDSABlock(Stmt *CurDirective) { // OpenMP [2.14.3.5, Restrictions, C/C++, p.1] // A variable of class type (or array thereof) that appears in a lastprivate @@ -2490,6 +2549,51 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) { Clause->setPrivateRefs(PrivateRefs); continue; } + if (auto *Clause = dyn_cast(C)) { + for (unsigned I = 0, E = Clause->getNumberOfAllocators(); I < E; ++I) { + OMPUsesAllocatorsClause::Data D = Clause->getAllocatorData(I); + auto *DRE = dyn_cast(D.Allocator->IgnoreParenImpCasts()); + if (!DRE) + continue; + ValueDecl *VD = DRE->getDecl(); + if (!VD) + continue; + DSAStackTy::DSAVarData DVar = + DSAStack->getTopDSA(VD, /*FromParent=*/false); + // OpenMP [2.12.5, target Construct] + // Memory allocators that appear in a uses_allocators clause cannot + // appear in other data-sharing attribute clauses or data-mapping + // attribute clauses in the same construct. + Expr *MapExpr = nullptr; + if (DVar.RefExpr || + DSAStack->checkMappableExprComponentListsForDecl( + VD, /*CurrentRegionOnly=*/true, + [VD, &MapExpr]( + OMPClauseMappableExprCommon::MappableExprComponentListRef + MapExprComponents, + OpenMPClauseKind C) { + auto MI = MapExprComponents.rbegin(); + auto ME = MapExprComponents.rend(); + if (MI != ME && + MI->getAssociatedDeclaration()->getCanonicalDecl() == + VD->getCanonicalDecl()) { + MapExpr = MI->getAssociatedExpression(); + return true; + } + return false; + })) { + Diag(D.Allocator->getExprLoc(), + diag::err_omp_allocator_used_in_clauses) + << D.Allocator->getSourceRange(); + if (DVar.RefExpr) + reportOriginalDsa(*this, DSAStack, VD, DVar); + else + Diag(MapExpr->getExprLoc(), diag::note_used_here) + << MapExpr->getSourceRange(); + } + } + continue; + } } // Check allocate clauses. if (!CurContext->isDependentContext()) @@ -3018,7 +3122,7 @@ OMPRequiresDecl *Sema::CheckOMPRequiresDecl(SourceLocation Loc, static void reportOriginalDsa(Sema &SemaRef, const DSAStackTy *Stack, const ValueDecl *D, const DSAStackTy::DSAVarData &DVar, - bool IsLoopIterVar = false) { + bool IsLoopIterVar) { if (DVar.RefExpr) { SemaRef.Diag(DVar.RefExpr->getExprLoc(), diag::note_omp_explicit_dsa) << getOpenMPClauseName(DVar.CKind); @@ -3173,6 +3277,9 @@ class DSAAttrChecker final : public StmtVisitor { if (VD->hasLocalStorage() && CS && !CS->capturesVariable(VD) && !Stack->isImplicitTaskFirstprivate(VD)) return; + // Skip allocators in uses_allocators clauses. + if (Stack->isUsesAllocatorsDecl(VD)) + return; DSAStackTy::DSAVarData DVar = Stack->getTopDSA(VD, /*FromParent=*/false); // Check if the variable has explicit DSA set and stop analysis if it so. @@ -4103,7 +4210,8 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, SmallVector PICs; // This is required for proper codegen. for (OMPClause *Clause : Clauses) { - if (isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) && + if (!LangOpts.OpenMPSimd && + isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) && Clause->getClauseKind() == OMPC_in_reduction) { // Capture taskgroup task_reduction descriptors inside the tasking regions // with the corresponding in_reduction items. @@ -4626,6 +4734,27 @@ static std::pair getPrivateItem(Sema &S, Expr *&RefExpr, getCanonicalDecl(DE ? DE->getDecl() : ME->getMemberDecl()), false); } +namespace { +/// Checks if the allocator is used in uses_allocators clause to be allowed in +/// target regions. +class AllocatorChecker final : public ConstStmtVisitor { + DSAStackTy *S = nullptr; + +public: + bool VisitDeclRefExpr(const DeclRefExpr *E) { + return !S->isUsesAllocatorsDecl(E->getDecl()); + } + bool VisitStmt(const Stmt *S) { + for (const Stmt *Child : S->children()) { + if (Child && Visit(Child)) + return true; + } + return false; + } + explicit AllocatorChecker(DSAStackTy *S) : S(S) {} +}; +} // namespace + static void checkAllocateClauses(Sema &S, DSAStackTy *Stack, ArrayRef Clauses) { assert(!S.CurContext->isDependentContext() && @@ -4694,6 +4823,22 @@ static void checkAllocateClauses(Sema &S, DSAStackTy *Stack, } for (OMPClause *C : AllocateRange) { auto *AC = cast(C); + if (S.getLangOpts().OpenMP >= 50 && + !Stack->hasRequiresDeclWithClause() && + isOpenMPTargetExecutionDirective(Stack->getCurrentDirective()) && + AC->getAllocator()) { + Expr *Allocator = AC->getAllocator(); + // OpenMP, 2.12.5 target Construct + // Memory allocators that do not appear in a uses_allocators clause cannot + // appear as an allocator in an allocate clause or be used in the target + // region unless a requires directive with the dynamic_allocators clause + // is present in the same compilation unit. + AllocatorChecker Checker(Stack); + if (Checker.Visit(Allocator)) + S.Diag(Allocator->getExprLoc(), + diag::err_omp_allocator_not_in_uses_allocators) + << Allocator->getSourceRange(); + } OMPAllocateDeclAttr::AllocatorTypeTy AllocatorKind = getAllocatorKind(S, Stack, AC->getAllocator()); // OpenMP, 2.11.4 allocate Clause, Restrictions. @@ -5225,6 +5370,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( case OMPC_destroy: case OMPC_inclusive: case OMPC_exclusive: + case OMPC_uses_allocators: continue; case OMPC_allocator: case OMPC_flush: @@ -6009,6 +6155,7 @@ StmtResult Sema::ActOnOpenMPParallelDirective(ArrayRef Clauses, setFunctionHasBranchProtectedScope(); return OMPParallelDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } @@ -8492,8 +8639,9 @@ Sema::ActOnOpenMPForDirective(ArrayRef Clauses, Stmt *AStmt, } setFunctionHasBranchProtectedScope(); - return OMPForDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount, - Clauses, AStmt, B, DSAStack->isCancelRegion()); + return OMPForDirective::Create( + Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } StmtResult Sema::ActOnOpenMPForSimdDirective( @@ -8570,6 +8718,7 @@ StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef Clauses, setFunctionHasBranchProtectedScope(); return OMPSectionsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } @@ -8730,9 +8879,9 @@ StmtResult Sema::ActOnOpenMPParallelForDirective( } setFunctionHasBranchProtectedScope(); - return OMPParallelForDirective::Create(Context, StartLoc, EndLoc, - NestedLoopCount, Clauses, AStmt, B, - DSAStack->isCancelRegion()); + return OMPParallelForDirective::Create( + Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } StmtResult Sema::ActOnOpenMPParallelForSimdDirective( @@ -8796,8 +8945,9 @@ Sema::ActOnOpenMPParallelMasterDirective(ArrayRef Clauses, setFunctionHasBranchProtectedScope(); - return OMPParallelMasterDirective::Create(Context, StartLoc, EndLoc, Clauses, - AStmt); + return OMPParallelMasterDirective::Create( + Context, StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef()); } StmtResult @@ -8836,7 +8986,8 @@ Sema::ActOnOpenMPParallelSectionsDirective(ArrayRef Clauses, setFunctionHasBranchProtectedScope(); return OMPParallelSectionsDirective::Create( - Context, StartLoc, EndLoc, Clauses, AStmt, DSAStack->isCancelRegion()); + Context, StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } /// detach and mergeable clauses are mutially exclusive, check for it. @@ -9888,8 +10039,9 @@ Sema::ActOnOpenMPTargetParallelDirective(ArrayRef Clauses, setFunctionHasBranchProtectedScope(); - return OMPTargetParallelDirective::Create(Context, StartLoc, EndLoc, Clauses, - AStmt); + return OMPTargetParallelDirective::Create( + Context, StartLoc, EndLoc, Clauses, AStmt, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } StmtResult Sema::ActOnOpenMPTargetParallelForDirective( @@ -9941,9 +10093,9 @@ StmtResult Sema::ActOnOpenMPTargetParallelForDirective( } setFunctionHasBranchProtectedScope(); - return OMPTargetParallelForDirective::Create(Context, StartLoc, EndLoc, - NestedLoopCount, Clauses, AStmt, - B, DSAStack->isCancelRegion()); + return OMPTargetParallelForDirective::Create( + Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } /// Check for existence of a map clause in the list of clauses. @@ -10555,7 +10707,7 @@ StmtResult Sema::ActOnOpenMPDistributeParallelForDirective( setFunctionHasBranchProtectedScope(); return OMPDistributeParallelForDirective::Create( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, - DSAStack->isCancelRegion()); + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } StmtResult Sema::ActOnOpenMPDistributeParallelForSimdDirective( @@ -10996,7 +11148,7 @@ StmtResult Sema::ActOnOpenMPTeamsDistributeParallelForDirective( return OMPTeamsDistributeParallelForDirective::Create( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, - DSAStack->isCancelRegion()); + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } StmtResult Sema::ActOnOpenMPTargetTeamsDirective(ArrayRef Clauses, @@ -11125,7 +11277,7 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForDirective( setFunctionHasBranchProtectedScope(); return OMPTargetTeamsDistributeParallelForDirective::Create( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B, - DSAStack->isCancelRegion()); + DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion()); } StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective( @@ -11351,6 +11503,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, case OMPC_destroy: case OMPC_inclusive: case OMPC_exclusive: + case OMPC_uses_allocators: llvm_unreachable("Clause is not allowed."); } return Res; @@ -12104,6 +12257,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPC_detach: case OMPC_inclusive: case OMPC_exclusive: + case OMPC_uses_allocators: llvm_unreachable("Unexpected OpenMP clause."); } return CaptureRegion; @@ -12543,6 +12697,7 @@ OMPClause *Sema::ActOnOpenMPSimpleClause( case OMPC_detach: case OMPC_inclusive: case OMPC_exclusive: + case OMPC_uses_allocators: llvm_unreachable("Clause is not allowed."); } return Res; @@ -12768,6 +12923,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause( case OMPC_detach: case OMPC_inclusive: case OMPC_exclusive: + case OMPC_uses_allocators: llvm_unreachable("Clause is not allowed."); } return Res; @@ -13000,6 +13156,7 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind, case OMPC_detach: case OMPC_inclusive: case OMPC_exclusive: + case OMPC_uses_allocators: llvm_unreachable("Clause is not allowed."); } return Res; @@ -13265,6 +13422,7 @@ OMPClause *Sema::ActOnOpenMPVarListClause( case OMPC_order: case OMPC_destroy: case OMPC_detach: + case OMPC_uses_allocators: llvm_unreachable("Clause is not allowed."); } return Res; @@ -14969,9 +15127,17 @@ static bool actOnOMPReductionKindClause( } // All reduction items are still marked as reduction (to do not increase // code base size). - Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, - RD.RedModifier); - if (CurrDir == OMPD_taskgroup) { + unsigned Modifier = RD.RedModifier; + // Consider task_reductions as reductions with task modifier. Required for + // correct analysis of in_reduction clauses. + if (CurrDir == OMPD_taskgroup && ClauseKind == OMPC_task_reduction) + Modifier = OMPC_REDUCTION_task; + Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, Modifier); + if (Modifier == OMPC_REDUCTION_task && + (CurrDir == OMPD_taskgroup || + ((isOpenMPParallelDirective(CurrDir) || + isOpenMPWorksharingDirective(CurrDir)) && + !isOpenMPSimdDirective(CurrDir)))) { if (DeclareReductionRef.isUsable()) Stack->addTaskgroupReductionData(D, ReductionIdRange, DeclareReductionRef.get()); @@ -17705,21 +17871,22 @@ OMPClause *Sema::ActOnOpenMPDetachClause(Expr *Evt, SourceLocation StartLoc, // event-handle is a variable of the omp_event_handle_t type. auto *Ref = dyn_cast(Evt->IgnoreParenImpCasts()); if (!Ref) { - Diag(Evt->getExprLoc(), diag::err_omp_event_var_expected) - << 0 << Evt->getSourceRange(); + Diag(Evt->getExprLoc(), diag::err_omp_var_expected) + << "omp_event_handle_t" << 0 << Evt->getSourceRange(); return nullptr; } auto *VD = dyn_cast_or_null(Ref->getDecl()); if (!VD) { - Diag(Evt->getExprLoc(), diag::err_omp_event_var_expected) - << 0 << Evt->getSourceRange(); + Diag(Evt->getExprLoc(), diag::err_omp_var_expected) + << "omp_event_handle_t" << 0 << Evt->getSourceRange(); return nullptr; } if (!Context.hasSameUnqualifiedType(DSAStack->getOMPEventHandleT(), VD->getType()) || VD->getType().isConstant(Context)) { - Diag(Evt->getExprLoc(), diag::err_omp_event_var_expected) - << 1 << VD->getType() << Evt->getSourceRange(); + Diag(Evt->getExprLoc(), diag::err_omp_var_expected) + << "omp_event_handle_t" << 1 << VD->getType() + << Evt->getSourceRange(); return nullptr; } // OpenMP 5.0, 2.10.1 task Construct @@ -18436,3 +18603,134 @@ OMPClause *Sema::ActOnOpenMPExclusiveClause(ArrayRef VarList, return OMPExclusiveClause::Create(Context, StartLoc, LParenLoc, EndLoc, Vars); } + +/// Tries to find omp_alloctrait_t type. +static bool findOMPAlloctraitT(Sema &S, SourceLocation Loc, DSAStackTy *Stack) { + QualType OMPAlloctraitT = Stack->getOMPAlloctraitT(); + if (!OMPAlloctraitT.isNull()) + return true; + IdentifierInfo &II = S.PP.getIdentifierTable().get("omp_alloctrait_t"); + ParsedType PT = S.getTypeName(II, Loc, S.getCurScope()); + if (!PT.getAsOpaquePtr() || PT.get().isNull()) { + S.Diag(Loc, diag::err_omp_implied_type_not_found) << "omp_alloctrait_t"; + return false; + } + Stack->setOMPAlloctraitT(PT.get()); + return true; +} + +OMPClause *Sema::ActOnOpenMPUsesAllocatorClause( + SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc, + ArrayRef Data) { + // OpenMP [2.12.5, target Construct] + // allocator is an identifier of omp_allocator_handle_t type. + if (!findOMPAllocatorHandleT(*this, StartLoc, DSAStack)) + return nullptr; + // OpenMP [2.12.5, target Construct] + // allocator-traits-array is an identifier of const omp_alloctrait_t * type. + if (llvm::any_of( + Data, + [](const UsesAllocatorsData &D) { return D.AllocatorTraits; }) && + !findOMPAlloctraitT(*this, StartLoc, DSAStack)) + return nullptr; + llvm::SmallSet, 4> PredefinedAllocators; + for (int I = OMPAllocateDeclAttr::OMPDefaultMemAlloc; + I < OMPAllocateDeclAttr::OMPUserDefinedMemAlloc; ++I) { + auto AllocatorKind = static_cast(I); + StringRef Allocator = + OMPAllocateDeclAttr::ConvertAllocatorTypeTyToStr(AllocatorKind); + DeclarationName AllocatorName = &Context.Idents.get(Allocator); + PredefinedAllocators.insert(LookupSingleName( + TUScope, AllocatorName, StartLoc, Sema::LookupAnyName)); + } + + SmallVector NewData; + for (const UsesAllocatorsData &D : Data) { + Expr *AllocatorExpr = nullptr; + // Check allocator expression. + if (D.Allocator->isTypeDependent()) { + AllocatorExpr = D.Allocator; + } else { + // Traits were specified - need to assign new allocator to the specified + // allocator, so it must be an lvalue. + AllocatorExpr = D.Allocator->IgnoreParenImpCasts(); + auto *DRE = dyn_cast(AllocatorExpr); + bool IsPredefinedAllocator = false; + if (DRE) + IsPredefinedAllocator = PredefinedAllocators.count(DRE->getDecl()); + if (!DRE || + !(Context.hasSameUnqualifiedType( + AllocatorExpr->getType(), DSAStack->getOMPAllocatorHandleT()) || + Context.typesAreCompatible(AllocatorExpr->getType(), + DSAStack->getOMPAllocatorHandleT(), + /*CompareUnqualified=*/true)) || + (!IsPredefinedAllocator && + (AllocatorExpr->getType().isConstant(Context) || + !AllocatorExpr->isLValue()))) { + Diag(D.Allocator->getExprLoc(), diag::err_omp_var_expected) + << "omp_allocator_handle_t" << (DRE ? 1 : 0) + << AllocatorExpr->getType() << D.Allocator->getSourceRange(); + continue; + } + // OpenMP [2.12.5, target Construct] + // Predefined allocators appearing in a uses_allocators clause cannot have + // traits specified. + if (IsPredefinedAllocator && D.AllocatorTraits) { + Diag(D.AllocatorTraits->getExprLoc(), + diag::err_omp_predefined_allocator_with_traits) + << D.AllocatorTraits->getSourceRange(); + Diag(D.Allocator->getExprLoc(), diag::note_omp_predefined_allocator) + << cast(DRE->getDecl())->getName() + << D.Allocator->getSourceRange(); + continue; + } + // OpenMP [2.12.5, target Construct] + // Non-predefined allocators appearing in a uses_allocators clause must + // have traits specified. + if (!IsPredefinedAllocator && !D.AllocatorTraits) { + Diag(D.Allocator->getExprLoc(), + diag::err_omp_nonpredefined_allocator_without_traits); + continue; + } + // No allocator traits - just convert it to rvalue. + if (!D.AllocatorTraits) + AllocatorExpr = DefaultLvalueConversion(AllocatorExpr).get(); + DSAStack->addUsesAllocatorsDecl(DRE->getDecl()); + } + Expr *AllocatorTraitsExpr = nullptr; + if (D.AllocatorTraits) { + if (D.AllocatorTraits->isTypeDependent()) { + AllocatorTraitsExpr = D.AllocatorTraits; + } else { + // OpenMP [2.12.5, target Construct] + // Arrays that contain allocator traits that appear in a uses_allocators + // clause must be constant arrays, have constant values and be defined + // in the same scope as the construct in which the clause appears. + AllocatorTraitsExpr = D.AllocatorTraits->IgnoreParenImpCasts(); + // Check that traits expr is a constant array. + QualType TraitTy; + if (const ArrayType *Ty = + AllocatorTraitsExpr->getType()->getAsArrayTypeUnsafe()) + if (const auto *ConstArrayTy = dyn_cast(Ty)) + TraitTy = ConstArrayTy->getElementType(); + if (TraitTy.isNull() || + !(Context.hasSameUnqualifiedType(TraitTy, + DSAStack->getOMPAlloctraitT()) || + Context.typesAreCompatible(TraitTy, DSAStack->getOMPAlloctraitT(), + /*CompareUnqualified=*/true))) { + Diag(D.AllocatorTraits->getExprLoc(), + diag::err_omp_expected_array_alloctraits) + << AllocatorTraitsExpr->getType(); + continue; + } + } + } + OMPUsesAllocatorsClause::Data &NewD = NewData.emplace_back(); + NewD.Allocator = AllocatorExpr; + NewD.AllocatorTraits = AllocatorTraitsExpr; + NewD.LParenLoc = D.LParenLoc; + NewD.RParenLoc = D.RParenLoc; + } + return OMPUsesAllocatorsClause::Create(Context, StartLoc, LParenLoc, EndLoc, + NewData); +} diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index ecc4e7ee19fbc..fb5eff006fd34 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -3003,13 +3003,13 @@ bool Sema::CheckPointerConversion(Expr *From, QualType ToType, // We must have a derived-to-base conversion. Check an // ambiguous or inaccessible conversion. unsigned InaccessibleID = 0; - unsigned AmbigiousID = 0; + unsigned AmbiguousID = 0; if (Diagnose) { InaccessibleID = diag::err_upcast_to_inaccessible_base; - AmbigiousID = diag::err_ambiguous_derived_to_base_conv; + AmbiguousID = diag::err_ambiguous_derived_to_base_conv; } if (CheckDerivedToBaseConversion( - FromPointeeType, ToPointeeType, InaccessibleID, AmbigiousID, + FromPointeeType, ToPointeeType, InaccessibleID, AmbiguousID, From->getExprLoc(), From->getSourceRange(), DeclarationName(), &BasePath, IgnoreBaseAccess)) return true; @@ -9779,10 +9779,9 @@ bool clang::isBetterOverloadCandidate( // If other rules cannot determine which is better, CUDA preference is used // to determine which is better. if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) { - if (FunctionDecl *Caller = dyn_cast(S.CurContext)) { - return S.IdentifyCUDAPreference(Caller, Cand1.Function) > - S.IdentifyCUDAPreference(Caller, Cand2.Function); - } + FunctionDecl *Caller = dyn_cast(S.CurContext); + return S.IdentifyCUDAPreference(Caller, Cand1.Function) > + S.IdentifyCUDAPreference(Caller, Cand2.Function); } return false; diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index aaab0d61193f1..457781d6e1dd5 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3631,6 +3631,12 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { if (isa(RetValExp)) Diag(ReturnLoc, diag::warn_main_returns_bool_literal) << RetValExp->getSourceRange(); + if (FD->hasAttr() && RetValExp) { + if (const auto *RT = dyn_cast(FnRetType.getCanonicalType())) { + if (RT->getDecl()->isOrContainsUnion()) + Diag(RetValExp->getBeginLoc(), diag::warn_cmse_nonsecure_union) << 1; + } + } } else if (ObjCMethodDecl *MD = getCurMethodDecl()) { FnRetType = MD->getReturnType(); isObjCMethod = true; diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index ecb052be9fc9e..d8e0efd8ba752 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1449,6 +1449,9 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { break; } case DeclSpec::TST_extint: { + if (!S.Context.getTargetInfo().hasExtIntType()) + S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) + << "_ExtInt"; Result = S.BuildExtIntType(DS.getTypeSpecSign() == TSS_unsigned, DS.getRepAsExpr(), DS.getBeginLoc()); if (Result.isNull()) { diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index fc5d369889cf4..972a2435df829 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -2090,6 +2090,17 @@ class TreeTransform { EndLoc); } + /// Build a new OpenMP 'uses_allocators' clause. + /// + /// By default, performs semantic analysis to build the new OpenMP clause. + /// Subclasses may override this routine to provide different behavior. + OMPClause *RebuildOMPUsesAllocatorsClause( + ArrayRef Data, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc) { + return getSema().ActOnOpenMPUsesAllocatorClause(StartLoc, LParenLoc, EndLoc, + Data); + } + /// Build a new OpenMP 'order' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -9680,8 +9691,33 @@ TreeTransform::TransformOMPExclusiveClause(OMPExclusiveClause *C) { } template -OMPClause * -TreeTransform::TransformOMPOrderClause(OMPOrderClause *C) { +OMPClause *TreeTransform::TransformOMPUsesAllocatorsClause( + OMPUsesAllocatorsClause *C) { + SmallVector Data; + Data.reserve(C->getNumberOfAllocators()); + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { + OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); + ExprResult Allocator = getDerived().TransformExpr(D.Allocator); + if (Allocator.isInvalid()) + continue; + ExprResult AllocatorTraits; + if (Expr *AT = D.AllocatorTraits) { + AllocatorTraits = getDerived().TransformExpr(AT); + if (AllocatorTraits.isInvalid()) + continue; + } + Sema::UsesAllocatorsData &NewD = Data.emplace_back(); + NewD.Allocator = Allocator.get(); + NewD.AllocatorTraits = AllocatorTraits.get(); + NewD.LParenLoc = D.LParenLoc; + NewD.RParenLoc = D.RParenLoc; + } + return getDerived().RebuildOMPUsesAllocatorsClause( + Data, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc()); +} + +template +OMPClause *TreeTransform::TransformOMPOrderClause(OMPOrderClause *C) { return getDerived().RebuildOMPOrderClause(C->getKind(), C->getKindKwLoc(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc()); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 873115835e5c5..3ead713b5e064 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11882,6 +11882,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_detach: C = new (Context) OMPDetachClause(); break; + case llvm::omp::OMPC_uses_allocators: + C = OMPUsesAllocatorsClause::CreateEmpty(Context, Record.readInt()); + break; #define OMP_CLAUSE_NO_CLASS(Enum, Str) \ case llvm::omp::Enum: \ break; @@ -12710,6 +12713,21 @@ void OMPClauseReader::VisitOMPExclusiveClause(OMPExclusiveClause *C) { C->setVarRefs(Vars); } +void OMPClauseReader::VisitOMPUsesAllocatorsClause(OMPUsesAllocatorsClause *C) { + C->setLParenLoc(Record.readSourceLocation()); + unsigned NumOfAllocators = C->getNumberOfAllocators(); + SmallVector Data; + Data.reserve(NumOfAllocators); + for (unsigned I = 0; I != NumOfAllocators; ++I) { + OMPUsesAllocatorsClause::Data &D = Data.emplace_back(); + D.Allocator = Record.readSubExpr(); + D.AllocatorTraits = Record.readSubExpr(); + D.LParenLoc = Record.readSourceLocation(); + D.RParenLoc = Record.readSourceLocation(); + } + C->setAllocatorsData(Data); +} + void OMPClauseReader::VisitOMPOrderClause(OMPOrderClause *C) { C->setKind(Record.readEnum()); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index b53653852f02a..0e92c86e3a844 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -2879,7 +2879,7 @@ ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) { return RecordLocation(I->second, GlobalOffset - I->second->GlobalBitOffset); } -uint64_t ASTReader::getGlobalBitOffset(ModuleFile &M, uint32_t LocalOffset) { +uint64_t ASTReader::getGlobalBitOffset(ModuleFile &M, uint64_t LocalOffset) { return LocalOffset + M.GlobalBitOffset; } diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 71d862c82d7c0..ea21d5e33c2e8 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2308,6 +2308,7 @@ void ASTStmtReader::VisitOMPParallelDirective(OMPParallelDirective *D) { // The NumClauses field was read in ReadStmtFromStream. Record.skipInts(1); VisitOMPExecutableDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2317,6 +2318,7 @@ void ASTStmtReader::VisitOMPSimdDirective(OMPSimdDirective *D) { void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2329,6 +2331,7 @@ void ASTStmtReader::VisitOMPSectionsDirective(OMPSectionsDirective *D) { // The NumClauses field was read in ReadStmtFromStream. Record.skipInts(1); VisitOMPExecutableDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2360,6 +2363,7 @@ void ASTStmtReader::VisitOMPCriticalDirective(OMPCriticalDirective *D) { void ASTStmtReader::VisitOMPParallelForDirective(OMPParallelForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2374,6 +2378,7 @@ void ASTStmtReader::VisitOMPParallelMasterDirective( // The NumClauses field was read in ReadStmtFromStream. Record.skipInts(1); VisitOMPExecutableDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); } void ASTStmtReader::VisitOMPParallelSectionsDirective( @@ -2382,6 +2387,7 @@ void ASTStmtReader::VisitOMPParallelSectionsDirective( // The NumClauses field was read in ReadStmtFromStream. Record.skipInts(1); VisitOMPExecutableDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2489,11 +2495,14 @@ void ASTStmtReader::VisitOMPTargetParallelDirective( VisitStmt(D); Record.skipInts(1); VisitOMPExecutableDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); + D->setHasCancel(Record.readBool()); } void ASTStmtReader::VisitOMPTargetParallelForDirective( OMPTargetParallelForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2563,6 +2572,7 @@ void ASTStmtReader::VisitOMPTargetUpdateDirective(OMPTargetUpdateDirective *D) { void ASTStmtReader::VisitOMPDistributeParallelForDirective( OMPDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2603,6 +2613,7 @@ void ASTStmtReader::VisitOMPTeamsDistributeParallelForSimdDirective( void ASTStmtReader::VisitOMPTeamsDistributeParallelForDirective( OMPTeamsDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } @@ -2621,6 +2632,7 @@ void ASTStmtReader::VisitOMPTargetTeamsDistributeDirective( void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForDirective( OMPTargetTeamsDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + D->setTaskReductionRefExpr(Record.readSubExpr()); D->setHasCancel(Record.readInt()); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 31d004f6c946c..462f27578c0f3 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1812,7 +1812,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { Filename, File->getSize(), getTimestampForOutput(File) }; HeaderFileInfoTrait::data_type Data = { - *HFI, HS.getModuleMap().findAllModulesForHeader(File), {} + *HFI, HS.getModuleMap().findResolvedModulesForHeader(File), {} }; Generator.insert(Key, Data, GeneratorTrait); ++NumHeaderSearchEntries; @@ -2898,8 +2898,10 @@ void ASTWriter::WriteTypeDeclOffsets() { void ASTWriter::WriteFileDeclIDsMap() { using namespace llvm; - SmallVector, 64> SortedFileDeclIDs( - FileDeclIDs.begin(), FileDeclIDs.end()); + SmallVector, 64> SortedFileDeclIDs; + SortedFileDeclIDs.reserve(FileDeclIDs.size()); + for (const auto &P : FileDeclIDs) + SortedFileDeclIDs.push_back(std::make_pair(P.first, P.second.get())); llvm::sort(SortedFileDeclIDs, llvm::less_first()); // Join the vectors of DeclIDs from all files. @@ -4297,9 +4299,7 @@ ASTWriter::ASTWriter(llvm::BitstreamWriter &Stream, } } -ASTWriter::~ASTWriter() { - llvm::DeleteContainerSeconds(FileDeclIDs); -} +ASTWriter::~ASTWriter() = default; const LangOptions &ASTWriter::getLangOpts() const { assert(WritingAST && "can't determine lang opts when not writing AST"); @@ -5366,9 +5366,9 @@ void ASTWriter::associateDeclWithFile(const Decl *D, DeclID ID) { return; assert(SM.getSLocEntry(FID).isFile()); - DeclIDInFileInfo *&Info = FileDeclIDs[FID]; + std::unique_ptr &Info = FileDeclIDs[FID]; if (!Info) - Info = new DeclIDInFileInfo(); + Info = std::make_unique(); std::pair LocDecl(Offset, ID); LocDeclIDsTy &Decls = Info->DeclIDs; @@ -6651,6 +6651,18 @@ void OMPClauseWriter::VisitOMPOrderClause(OMPOrderClause *C) { Record.AddSourceLocation(C->getKindKwLoc()); } +void OMPClauseWriter::VisitOMPUsesAllocatorsClause(OMPUsesAllocatorsClause *C) { + Record.push_back(C->getNumberOfAllocators()); + Record.AddSourceLocation(C->getLParenLoc()); + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { + OMPUsesAllocatorsClause::Data Data = C->getAllocatorData(I); + Record.AddStmt(Data.Allocator); + Record.AddStmt(Data.AllocatorTraits); + Record.AddSourceLocation(Data.LParenLoc); + Record.AddSourceLocation(Data.RParenLoc); + } +} + void ASTRecordWriter::writeOMPTraitInfo(const OMPTraitInfo *TI) { writeUInt32(TI->Sets.size()); for (const auto &Set : TI->Sets) { diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index b7e46bc40c770..7c450bccc709a 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2195,6 +2195,7 @@ void ASTStmtWriter::VisitOMPParallelDirective(OMPParallelDirective *D) { VisitStmt(D); Record.push_back(D->getNumClauses()); VisitOMPExecutableDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_PARALLEL_DIRECTIVE; } @@ -2206,6 +2207,7 @@ void ASTStmtWriter::VisitOMPSimdDirective(OMPSimdDirective *D) { void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_FOR_DIRECTIVE; } @@ -2219,6 +2221,7 @@ void ASTStmtWriter::VisitOMPSectionsDirective(OMPSectionsDirective *D) { VisitStmt(D); Record.push_back(D->getNumClauses()); VisitOMPExecutableDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_SECTIONS_DIRECTIVE; } @@ -2253,6 +2256,7 @@ void ASTStmtWriter::VisitOMPCriticalDirective(OMPCriticalDirective *D) { void ASTStmtWriter::VisitOMPParallelForDirective(OMPParallelForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_PARALLEL_FOR_DIRECTIVE; } @@ -2268,6 +2272,7 @@ void ASTStmtWriter::VisitOMPParallelMasterDirective( VisitStmt(D); Record.push_back(D->getNumClauses()); VisitOMPExecutableDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Code = serialization::STMT_OMP_PARALLEL_MASTER_DIRECTIVE; } @@ -2276,6 +2281,7 @@ void ASTStmtWriter::VisitOMPParallelSectionsDirective( VisitStmt(D); Record.push_back(D->getNumClauses()); VisitOMPExecutableDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_PARALLEL_SECTIONS_DIRECTIVE; } @@ -2336,12 +2342,15 @@ void ASTStmtWriter::VisitOMPTargetParallelDirective( VisitStmt(D); Record.push_back(D->getNumClauses()); VisitOMPExecutableDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); + Record.writeBool(D->hasCancel()); Code = serialization::STMT_OMP_TARGET_PARALLEL_DIRECTIVE; } void ASTStmtWriter::VisitOMPTargetParallelForDirective( OMPTargetParallelForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_TARGET_PARALLEL_FOR_DIRECTIVE; } @@ -2475,6 +2484,7 @@ void ASTStmtWriter::VisitOMPTargetUpdateDirective(OMPTargetUpdateDirective *D) { void ASTStmtWriter::VisitOMPDistributeParallelForDirective( OMPDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE; } @@ -2523,6 +2533,7 @@ void ASTStmtWriter::VisitOMPTeamsDistributeParallelForSimdDirective( void ASTStmtWriter::VisitOMPTeamsDistributeParallelForDirective( OMPTeamsDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE; } @@ -2543,6 +2554,7 @@ void ASTStmtWriter::VisitOMPTargetTeamsDistributeDirective( void ASTStmtWriter::VisitOMPTargetTeamsDistributeParallelForDirective( OMPTargetTeamsDistributeParallelForDirective *D) { VisitOMPLoopDirective(D); + Record.AddStmt(D->getTaskReductionRefExpr()); Record.push_back(D->hasCancel() ? 1 : 0); Code = serialization::STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE; } diff --git a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp index 85e2b71e53ea1..1ef70b650414e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp @@ -30,7 +30,7 @@ using namespace clang; using namespace ento; namespace { -class CastValueChecker : public Checker { +class CastValueChecker : public Checker { enum class CallKind { Function, Method, InstanceOf }; using CastCheck = @@ -51,6 +51,7 @@ class CastValueChecker : public Checker { // 1) isa: The parameter is non-null, returns boolean. // 2) isa_and_nonnull: The parameter is null or non-null, returns boolean. bool evalCall(const CallEvent &Call, CheckerContext &C) const; + void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; private: // These are known in the LLVM project. The pairs are in the following form: @@ -432,6 +433,11 @@ bool CastValueChecker::evalCall(const CallEvent &Call, return true; } +void CastValueChecker::checkDeadSymbols(SymbolReaper &SR, + CheckerContext &C) const { + C.addTransition(removeDeadCasts(C.getState(), SR)); +} + void ento::registerCastValueChecker(CheckerManager &Mgr) { Mgr.registerChecker(); } diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp index 71681594a0a14..1502c0f9d6567 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp @@ -109,11 +109,127 @@ class DynamicTypePropagation: /// This value is set to true, when the Generics checker is turned on. DefaultBool CheckGenerics; }; + +bool isObjCClassType(QualType Type) { + if (const auto *PointerType = dyn_cast(Type)) { + return PointerType->getObjectType()->isObjCClass(); + } + return false; +} + +struct RuntimeType { + const ObjCObjectType *Type = nullptr; + bool Precise = false; + + operator bool() const { return Type != nullptr; } +}; + +RuntimeType inferReceiverType(const ObjCMethodCall &Message, + CheckerContext &C) { + const ObjCMessageExpr *MessageExpr = Message.getOriginExpr(); + + // Check if we can statically infer the actual type precisely. + // + // 1. Class is written directly in the message: + // \code + // [ActualClass classMethod]; + // \endcode + if (MessageExpr->getReceiverKind() == ObjCMessageExpr::Class) { + return {MessageExpr->getClassReceiver()->getAs(), + /*Precise=*/true}; + } + + // 2. Receiver is 'super' from a class method (a.k.a 'super' is a + // class object). + // \code + // [super classMethod]; + // \endcode + if (MessageExpr->getReceiverKind() == ObjCMessageExpr::SuperClass) { + return {MessageExpr->getSuperType()->getAs(), + /*Precise=*/true}; + } + + // 3. Receiver is 'super' from an instance method (a.k.a 'super' is an + // instance of a super class). + // \code + // [super instanceMethod]; + // \encode + if (MessageExpr->getReceiverKind() == ObjCMessageExpr::SuperInstance) { + if (const auto *ObjTy = + MessageExpr->getSuperType()->getAs()) + return {ObjTy->getObjectType(), /*Precise=*/true}; + } + + const Expr *RecE = MessageExpr->getInstanceReceiver(); + + if (!RecE) + return {}; + + // Otherwise, let's try to get type information from our estimations of + // runtime types. + QualType InferredType; + SVal ReceiverSVal = C.getSVal(RecE); + ProgramStateRef State = C.getState(); + + if (const MemRegion *ReceiverRegion = ReceiverSVal.getAsRegion()) { + if (DynamicTypeInfo DTI = getDynamicTypeInfo(State, ReceiverRegion)) { + InferredType = DTI.getType().getCanonicalType(); + } + } + + if (SymbolRef ReceiverSymbol = ReceiverSVal.getAsSymbol()) { + if (InferredType.isNull()) { + InferredType = ReceiverSymbol->getType(); + } + + // If receiver is a Class object, we want to figure out the type it + // represents. + if (isObjCClassType(InferredType)) { + // We actually might have some info on what type is contained in there. + if (DynamicTypeInfo DTI = + getClassObjectDynamicTypeInfo(State, ReceiverSymbol)) { + + // Types in Class objects can be ONLY Objective-C types + return {cast(DTI.getType()), !DTI.canBeASubClass()}; + } + + SVal SelfSVal = State->getSelfSVal(C.getLocationContext()); + + // Another way we can guess what is in Class object, is when it is a + // 'self' variable of the current class method. + if (ReceiverSVal == SelfSVal) { + // In this case, we should return the type of the enclosing class + // declaration. + if (const ObjCMethodDecl *MD = + dyn_cast(C.getStackFrame()->getDecl())) + if (const ObjCObjectType *ObjTy = dyn_cast( + MD->getClassInterface()->getTypeForDecl())) + return {ObjTy}; + } + } + } + + // Unfortunately, it seems like we have no idea what that type is. + if (InferredType.isNull()) { + return {}; + } + + // We can end up here if we got some dynamic type info and the + // receiver is not one of the known Class objects. + if (const auto *ReceiverInferredType = + dyn_cast(InferredType)) { + return {ReceiverInferredType->getObjectType()}; + } + + // Any other type (like 'Class') is not really useful at this point. + return {}; +} } // end anonymous namespace void DynamicTypePropagation::checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const { ProgramStateRef State = removeDeadTypes(C.getState(), SR); + State = removeDeadClassObjectTypes(State, SR); MostSpecializedTypeArgsMapTy TyArgMap = State->get(); @@ -209,12 +325,21 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call, case OMF_alloc: case OMF_new: { // Get the type of object that will get created. - const ObjCMessageExpr *MsgE = Msg->getOriginExpr(); - const ObjCObjectType *ObjTy = getObjectTypeForAllocAndNew(MsgE, C); + RuntimeType ObjTy = inferReceiverType(*Msg, C); + if (!ObjTy) return; + QualType DynResTy = - C.getASTContext().getObjCObjectPointerType(QualType(ObjTy, 0)); + C.getASTContext().getObjCObjectPointerType(QualType(ObjTy.Type, 0)); + // We used to assume that whatever type we got from inferring the + // type is actually precise (and it is not exactly correct). + // A big portion of the existing behavior depends on that assumption + // (e.g. certain inlining won't take place). For this reason, we don't + // use ObjTy.Precise flag here. + // + // TODO: We should mitigate this problem some time in the future + // and replace hardcoded 'false' with '!ObjTy.Precise'. C.addTransition(setDynamicTypeInfo(State, RetReg, DynResTy, false)); break; } @@ -303,40 +428,6 @@ void DynamicTypePropagation::checkPostStmt(const CXXNewExpr *NewE, /*CanBeSubClassed=*/false)); } -const ObjCObjectType * -DynamicTypePropagation::getObjectTypeForAllocAndNew(const ObjCMessageExpr *MsgE, - CheckerContext &C) const { - if (MsgE->getReceiverKind() == ObjCMessageExpr::Class) { - if (const ObjCObjectType *ObjTy - = MsgE->getClassReceiver()->getAs()) - return ObjTy; - } - - if (MsgE->getReceiverKind() == ObjCMessageExpr::SuperClass) { - if (const ObjCObjectType *ObjTy - = MsgE->getSuperType()->getAs()) - return ObjTy; - } - - const Expr *RecE = MsgE->getInstanceReceiver(); - if (!RecE) - return nullptr; - - RecE= RecE->IgnoreParenImpCasts(); - if (const DeclRefExpr *DRE = dyn_cast(RecE)) { - const StackFrameContext *SFCtx = C.getStackFrame(); - // Are we calling [self alloc]? If this is self, get the type of the - // enclosing ObjC class. - if (DRE->getDecl() == SFCtx->getSelfDecl()) { - if (const ObjCMethodDecl *MD = dyn_cast(SFCtx->getDecl())) - if (const ObjCObjectType *ObjTy = - dyn_cast(MD->getClassInterface()->getTypeForDecl())) - return ObjTy; - } - } - return nullptr; -} - // Return a better dynamic type if one can be derived from the cast. // Compare the current dynamic type of the region and the new type to which we // are casting. If the new type is lower in the inheritance hierarchy, pick it. @@ -821,25 +912,56 @@ void DynamicTypePropagation::checkPostObjCMessage(const ObjCMethodCall &M, Selector Sel = MessageExpr->getSelector(); ProgramStateRef State = C.getState(); - // Inference for class variables. - // We are only interested in cases where the class method is invoked on a - // class. This method is provided by the runtime and available on all classes. - if (MessageExpr->getReceiverKind() == ObjCMessageExpr::Class && - Sel.getAsString() == "class") { - QualType ReceiverType = MessageExpr->getClassReceiver(); - const auto *ReceiverClassType = ReceiverType->castAs(); - if (!ReceiverClassType->isSpecialized()) - return; - QualType ReceiverClassPointerType = - C.getASTContext().getObjCObjectPointerType( - QualType(ReceiverClassType, 0)); - const auto *InferredType = - ReceiverClassPointerType->castAs(); + // Here we try to propagate information on Class objects. + if (Sel.getAsString() == "class") { + // We try to figure out the type from the receiver of the 'class' message. + if (RuntimeType ReceiverRuntimeType = inferReceiverType(M, C)) { + + ReceiverRuntimeType.Type->getSuperClassType(); + QualType ReceiverClassType(ReceiverRuntimeType.Type, 0); + + // We want to consider only precise information on generics. + if (ReceiverRuntimeType.Type->isSpecialized() && + ReceiverRuntimeType.Precise) { + QualType ReceiverClassPointerType = + C.getASTContext().getObjCObjectPointerType(ReceiverClassType); + const auto *InferredType = + ReceiverClassPointerType->castAs(); + State = State->set(RetSym, InferredType); + } + + // Constrain the resulting class object to the inferred type. + State = setClassObjectDynamicTypeInfo(State, RetSym, ReceiverClassType, + !ReceiverRuntimeType.Precise); - State = State->set(RetSym, InferredType); - C.addTransition(State); - return; + C.addTransition(State); + return; + } + } + + if (Sel.getAsString() == "superclass") { + // We try to figure out the type from the receiver of the 'superclass' + // message. + if (RuntimeType ReceiverRuntimeType = inferReceiverType(M, C)) { + + // Result type would be a super class of the receiver's type. + QualType ReceiversSuperClass = + ReceiverRuntimeType.Type->getSuperClassType(); + + // Check if it really had super class. + // + // TODO: we can probably pay closer attention to cases when the class + // object can be 'nil' as the result of such message. + if (!ReceiversSuperClass.isNull()) { + // Constrain the resulting class object to the inferred type. + State = setClassObjectDynamicTypeInfo( + State, RetSym, ReceiversSuperClass, !ReceiverRuntimeType.Precise); + + C.addTransition(State); + } + return; + } } // Tracking for return types. diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp index 39ffbca8d393a..3547b7bb61a24 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp @@ -116,13 +116,14 @@ void ObjCSuperDeallocChecker::checkPostObjCMessage(const ObjCMethodCall &M, return; ProgramStateRef State = C.getState(); - SymbolRef ReceiverSymbol = M.getSelfSVal().getAsSymbol(); - assert(ReceiverSymbol && "No receiver symbol at call to [super dealloc]?"); + const LocationContext *LC = C.getLocationContext(); + SymbolRef SelfSymbol = State->getSelfSVal(LC).getAsSymbol(); + assert(SelfSymbol && "No receiver symbol at call to [super dealloc]?"); // We add this transition in checkPostObjCMessage to avoid warning when // we inline a call to [super dealloc] where the inlined call itself // calls [super dealloc]. - State = State->add(ReceiverSymbol); + State = State->add(SelfSymbol); C.addTransition(State); } diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index 5e36938b613d0..b9719a086668b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -266,21 +266,15 @@ class StdLibraryFunctionsChecker return T; } - /// Try our best to figure out if the call expression is the call of + /// Try our best to figure out if the summary's signature matches /// *the* library function to which this specification applies. - bool matchesCall(const FunctionDecl *FD) const; + bool matchesSignature(const FunctionDecl *FD) const; }; - // The same function (as in, function identifier) may have different - // summaries assigned to it, with different argument and return value types. - // We call these "variants" of the function. This can be useful for handling - // C++ function overloads, and also it can be used when the same function - // may have different definitions on different platforms. - typedef std::vector Summaries; - // The map of all functions supported by the checker. It is initialized // lazily, and it doesn't change after initialization. - mutable llvm::StringMap FunctionSummaryMap; + using FunctionSummaryMapType = llvm::DenseMap; + mutable FunctionSummaryMapType FunctionSummaryMap; mutable std::unique_ptr BT_InvalidArg; @@ -289,14 +283,6 @@ class StdLibraryFunctionsChecker static QualType getArgType(const Summary &Summary, ArgNo ArgN) { return Summary.getArgType(ArgN); } - static QualType getArgType(const CallEvent &Call, ArgNo ArgN) { - return ArgN == Ret ? Call.getResultType().getCanonicalType() - : Call.getArgExpr(ArgN)->getType().getCanonicalType(); - } - static QualType getArgType(const CallExpr *CE, ArgNo ArgN) { - return ArgN == Ret ? CE->getType().getCanonicalType() - : CE->getArg(ArgN)->getType().getCanonicalType(); - } static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) { return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN); } @@ -440,7 +426,7 @@ ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply( BinaryOperator::Opcode Op = getOpcode(); ArgNo OtherArg = getOtherArgNo(); SVal OtherV = getArgSVal(Call, OtherArg); - QualType OtherT = getArgType(Call, OtherArg); + QualType OtherT = getArgType(Summary, OtherArg); // Note: we avoid integral promotion for comparison. OtherV = SVB.evalCast(OtherV, T, OtherT); if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT) @@ -530,7 +516,7 @@ bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call, llvm_unreachable("Unknown invalidation kind!"); } -bool StdLibraryFunctionsChecker::Summary::matchesCall( +bool StdLibraryFunctionsChecker::Summary::matchesSignature( const FunctionDecl *FD) const { // Check number of arguments: if (FD->param_size() != ArgTys.size()) @@ -565,28 +551,10 @@ StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD, initFunctionSummaries(C); - IdentifierInfo *II = FD->getIdentifier(); - if (!II) - return None; - StringRef Name = II->getName(); - if (Name.empty() || !C.isCLibraryFunction(FD, Name)) - return None; - - auto FSMI = FunctionSummaryMap.find(Name); + auto FSMI = FunctionSummaryMap.find(FD->getCanonicalDecl()); if (FSMI == FunctionSummaryMap.end()) return None; - - // Verify that function signature matches the spec in advance. - // Otherwise we might be modeling the wrong function. - // Strict checking is important because we will be conducting - // very integral-type-sensitive operations on arguments and - // return values. - const Summaries &SpecVariants = FSMI->second; - for (const Summary &Spec : SpecVariants) - if (Spec.matchesCall(FD)) - return Spec; - - return None; + return FSMI->second; } Optional @@ -598,6 +566,21 @@ StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call, return findFunctionSummary(FD, C); } +llvm::Optional +lookupGlobalCFunction(StringRef Name, const ASTContext &ACtx) { + IdentifierInfo &II = ACtx.Idents.get(Name); + auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II); + if (LookupRes.size() == 0) + return None; + + assert(LookupRes.size() == 1 && "In C, identifiers should be unique"); + Decl *D = LookupRes.front()->getCanonicalDecl(); + auto *FD = dyn_cast(D); + if (!FD) + return None; + return FD->getCanonicalDecl(); +} + void StdLibraryFunctionsChecker::initFunctionSummaries( CheckerContext &C) const { if (!FunctionSummaryMap.empty()) @@ -652,6 +635,38 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( return -1; }(); + // Auxiliary class to aid adding summaries to the summary map. + struct AddToFunctionSummaryMap { + const ASTContext &ACtx; + FunctionSummaryMapType ⤅ + AddToFunctionSummaryMap(const ASTContext &ACtx, FunctionSummaryMapType &FSM) + : ACtx(ACtx), Map(FSM) {} + // Add a summary to a FunctionDecl found by lookup. The lookup is performed + // by the given Name, and in the global scope. The summary will be attached + // to the found FunctionDecl only if the signatures match. + void operator()(StringRef Name, const Summary &S) { + IdentifierInfo &II = ACtx.Idents.get(Name); + auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II); + if (LookupRes.size() == 0) + return; + for (Decl *D : LookupRes) { + if (auto *FD = dyn_cast(D)) { + if (S.matchesSignature(FD)) { + auto Res = Map.insert({FD->getCanonicalDecl(), S}); + assert(Res.second && "Function already has a summary set!"); + (void)Res; + return; + } + } + } + } + // Add several summaries for the given name. + void operator()(StringRef Name, const std::vector &Summaries) { + for (const Summary &S : Summaries) + operator()(Name, S); + } + } addToFunctionSummaryMap(ACtx, FunctionSummaryMap); + // We are finally ready to define specifications for all supported functions. // // The signature needs to have the correct number of arguments. @@ -677,7 +692,6 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( // return value, however the correct range is [-1, 10]. // // Please update the list of functions in the header after editing! - // // Below are helpers functions to create the summaries. auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind, @@ -739,238 +753,186 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})}); }; - FunctionSummaryMap = { - // The isascii() family of functions. - // The behavior is undefined if the value of the argument is not - // representable as unsigned char or is not equal to EOF. See e.g. C99 - // 7.4.1.2 The isalpha function (p: 181-182). - { - "isalnum", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - // Boils down to isupper() or islower() or isdigit(). - .Case( - {ArgumentCondition(0U, WithinRange, - {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - // The locale-specific range. - // No post-condition. We are completely unaware of - // locale-specific return values. - .Case({ArgumentCondition(0U, WithinRange, - {{128, UCharRangeMax}})}) - .Case({ArgumentCondition(0U, OutOfRange, - {{'0', '9'}, - {'A', 'Z'}, - {'a', 'z'}, - {128, UCharRangeMax}}), - ReturnValueCondition(WithinRange, SingleValue(0))}) - .ArgConstraint(ArgumentCondition( - 0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))}, - }, - { - "isalpha", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - .Case({ArgumentCondition(0U, WithinRange, - {{'A', 'Z'}, {'a', 'z'}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - // The locale-specific range. - .Case({ArgumentCondition(0U, WithinRange, - {{128, UCharRangeMax}})}) - .Case({ArgumentCondition( - 0U, OutOfRange, - {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - { - "isascii", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - { - "isblank", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - .Case({ArgumentCondition(0U, WithinRange, - {{'\t', '\t'}, {' ', ' '}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - .Case({ArgumentCondition(0U, OutOfRange, - {{'\t', '\t'}, {' ', ' '}}), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - { - "iscntrl", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - .Case({ArgumentCondition(0U, WithinRange, - {{0, 32}, {127, 127}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - .Case( - {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - { - "isdigit", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - { - "isgraph", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - { - "islower", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - // Is certainly lowercase. - .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - // Is ascii but not lowercase. - .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), - ArgumentCondition(0U, OutOfRange, Range('a', 'z')), - ReturnValueCondition(WithinRange, SingleValue(0))}) - // The locale-specific range. - .Case({ArgumentCondition(0U, WithinRange, - {{128, UCharRangeMax}})}) - // Is not an unsigned char. - .Case({ArgumentCondition(0U, OutOfRange, - Range(0, UCharRangeMax)), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - { - "isprint", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - { - "ispunct", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - .Case({ArgumentCondition( - 0U, WithinRange, - {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - .Case({ArgumentCondition( - 0U, OutOfRange, - {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - { - "isspace", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - // Space, '\f', '\n', '\r', '\t', '\v'. - .Case({ArgumentCondition(0U, WithinRange, - {{9, 13}, {' ', ' '}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - // The locale-specific range. - .Case({ArgumentCondition(0U, WithinRange, - {{128, UCharRangeMax}})}) - .Case({ArgumentCondition( - 0U, OutOfRange, - {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - { - "isupper", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - // Is certainly uppercase. - .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - // The locale-specific range. - .Case({ArgumentCondition(0U, WithinRange, - {{128, UCharRangeMax}})}) - // Other. - .Case({ArgumentCondition(0U, OutOfRange, - {{'A', 'Z'}, {128, UCharRangeMax}}), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - { - "isxdigit", - Summaries{ - Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - .Case( - {ArgumentCondition(0U, WithinRange, - {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), - ReturnValueCondition(OutOfRange, SingleValue(0))}) - .Case( - {ArgumentCondition(0U, OutOfRange, - {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), - ReturnValueCondition(WithinRange, SingleValue(0))})}, - }, - - // The getc() family of functions that returns either a char or an EOF. - {"getc", Summaries{Getc()}}, - {"fgetc", Summaries{Getc()}}, - {"getchar", - Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall) + // The isascii() family of functions. + // The behavior is undefined if the value of the argument is not + // representable as unsigned char or is not equal to EOF. See e.g. C99 + // 7.4.1.2 The isalpha function (p: 181-182). + addToFunctionSummaryMap( + "isalnum", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + // Boils down to isupper() or islower() or isdigit(). + .Case({ArgumentCondition(0U, WithinRange, + {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + // The locale-specific range. + // No post-condition. We are completely unaware of + // locale-specific return values. + .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) + .Case( + {ArgumentCondition( + 0U, OutOfRange, + {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), + ReturnValueCondition(WithinRange, SingleValue(0))}) + .ArgConstraint(ArgumentCondition( + 0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))); + addToFunctionSummaryMap( + "isalpha", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + .Case({ArgumentCondition(0U, WithinRange, {{'A', 'Z'}, {'a', 'z'}}), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + // The locale-specific range. + .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) + .Case({ArgumentCondition( + 0U, OutOfRange, + {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), + ReturnValueCondition(WithinRange, SingleValue(0))})); + addToFunctionSummaryMap( + "isascii", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)), + ReturnValueCondition(WithinRange, SingleValue(0))})); + addToFunctionSummaryMap( + "isblank", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + .Case({ArgumentCondition(0U, WithinRange, {{'\t', '\t'}, {' ', ' '}}), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + .Case({ArgumentCondition(0U, OutOfRange, {{'\t', '\t'}, {' ', ' '}}), + ReturnValueCondition(WithinRange, SingleValue(0))})); + addToFunctionSummaryMap( + "iscntrl", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + .Case({ArgumentCondition(0U, WithinRange, {{0, 32}, {127, 127}}), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + .Case({ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}), + ReturnValueCondition(WithinRange, SingleValue(0))})); + addToFunctionSummaryMap( + "isdigit", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')), + ReturnValueCondition(WithinRange, SingleValue(0))})); + addToFunctionSummaryMap( + "isgraph", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)), + ReturnValueCondition(WithinRange, SingleValue(0))})); + addToFunctionSummaryMap( + "islower", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + // Is certainly lowercase. + .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + // Is ascii but not lowercase. + .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), + ArgumentCondition(0U, OutOfRange, Range('a', 'z')), + ReturnValueCondition(WithinRange, SingleValue(0))}) + // The locale-specific range. + .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) + // Is not an unsigned char. + .Case({ArgumentCondition(0U, OutOfRange, Range(0, UCharRangeMax)), + ReturnValueCondition(WithinRange, SingleValue(0))})); + addToFunctionSummaryMap( + "isprint", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)), + ReturnValueCondition(WithinRange, SingleValue(0))})); + addToFunctionSummaryMap( + "ispunct", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + .Case({ArgumentCondition( + 0U, WithinRange, + {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + .Case({ArgumentCondition( + 0U, OutOfRange, + {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), + ReturnValueCondition(WithinRange, SingleValue(0))})); + addToFunctionSummaryMap( + "isspace", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + // Space, '\f', '\n', '\r', '\t', '\v'. + .Case({ArgumentCondition(0U, WithinRange, {{9, 13}, {' ', ' '}}), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + // The locale-specific range. + .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) + .Case({ArgumentCondition(0U, OutOfRange, + {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}), + ReturnValueCondition(WithinRange, SingleValue(0))})); + addToFunctionSummaryMap( + "isupper", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + // Is certainly uppercase. + .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + // The locale-specific range. + .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) + // Other. + .Case({ArgumentCondition(0U, OutOfRange, + {{'A', 'Z'}, {128, UCharRangeMax}}), + ReturnValueCondition(WithinRange, SingleValue(0))})); + addToFunctionSummaryMap( + "isxdigit", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + .Case({ArgumentCondition(0U, WithinRange, + {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), + ReturnValueCondition(OutOfRange, SingleValue(0))}) + .Case({ArgumentCondition(0U, OutOfRange, + {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), + ReturnValueCondition(WithinRange, SingleValue(0))})); + + // The getc() family of functions that returns either a char or an EOF. + addToFunctionSummaryMap("getc", Getc()); + addToFunctionSummaryMap("fgetc", Getc()); + addToFunctionSummaryMap( + "getchar", Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall) .Case({ReturnValueCondition( - WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}}, - - // read()-like functions that never return more than buffer size. - // We are not sure how ssize_t is defined on every platform, so we - // provide three variants that should cover common cases. - {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax), - Read(LongLongTy, LongLongMax)}}, - {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax), - Read(LongLongTy, LongLongMax)}}, - {"fread", Summaries{Fread()}}, - {"fwrite", Summaries{Fwrite()}}, - // getline()-like functions either fail or read at least the delimiter. - {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax), - Getline(LongLongTy, LongLongMax)}}, - {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax), - Getline(LongLongTy, LongLongMax)}}, - }; + WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})); + + // read()-like functions that never return more than buffer size. + // We are not sure how ssize_t is defined on every platform, so we + // provide three variants that should cover common cases. + addToFunctionSummaryMap("read", {Read(IntTy, IntMax), Read(LongTy, LongMax), + Read(LongLongTy, LongLongMax)}); + addToFunctionSummaryMap("write", {Read(IntTy, IntMax), Read(LongTy, LongMax), + Read(LongLongTy, LongLongMax)}); + addToFunctionSummaryMap("fread", Fread()); + addToFunctionSummaryMap("fwrite", Fwrite()); + // getline()-like functions either fail or read at least the delimiter. + addToFunctionSummaryMap("getline", + {Getline(IntTy, IntMax), Getline(LongTy, LongMax), + Getline(LongLongTy, LongLongMax)}); + addToFunctionSummaryMap("getdelim", + {Getline(IntTy, IntMax), Getline(LongTy, LongMax), + Getline(LongLongTy, LongLongMax)}); // Functions for testing. if (ChecksEnabled[CK_StdCLibraryFunctionsTesterChecker]) { - llvm::StringMap TestFunctionSummaryMap = { - {"__two_constrained_args", - Summaries{ - Summary(ArgTypes{IntTy, IntTy}, RetType{IntTy}, EvalCallAsPure) - .ArgConstraint( - ArgumentCondition(0U, WithinRange, SingleValue(1))) - .ArgConstraint( - ArgumentCondition(1U, WithinRange, SingleValue(1)))}}, - {"__arg_constrained_twice", - Summaries{Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) - .ArgConstraint( - ArgumentCondition(0U, OutOfRange, SingleValue(1))) - .ArgConstraint( - ArgumentCondition(0U, OutOfRange, SingleValue(2)))}}, - {"__defaultparam", Summaries{Summary(ArgTypes{Irrelevant, IntTy}, - RetType{IntTy}, EvalCallAsPure) - .ArgConstraint(NotNull(ArgNo(0)))}}, - {"__variadic", Summaries{Summary(ArgTypes{VoidPtrTy, ConstCharPtrTy}, - RetType{IntTy}, EvalCallAsPure) - .ArgConstraint(NotNull(ArgNo(0))) - .ArgConstraint(NotNull(ArgNo(1)))}}}; - for (auto &E : TestFunctionSummaryMap) { - auto InsertRes = - FunctionSummaryMap.insert({std::string(E.getKey()), E.getValue()}); - assert(InsertRes.second && - "Test functions must not clash with modeled functions"); - (void)InsertRes; - } + addToFunctionSummaryMap( + "__two_constrained_args", + Summary(ArgTypes{IntTy, IntTy}, RetType{IntTy}, EvalCallAsPure) + .ArgConstraint(ArgumentCondition(0U, WithinRange, SingleValue(1))) + .ArgConstraint(ArgumentCondition(1U, WithinRange, SingleValue(1)))); + addToFunctionSummaryMap( + "__arg_constrained_twice", + Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) + .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(1))) + .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(2)))); + addToFunctionSummaryMap( + "__defaultparam", + Summary(ArgTypes{Irrelevant, IntTy}, RetType{IntTy}, EvalCallAsPure) + .ArgConstraint(NotNull(ArgNo(0)))); + addToFunctionSummaryMap("__variadic", + Summary(ArgTypes{VoidPtrTy, ConstCharPtrTy}, + RetType{IntTy}, EvalCallAsPure) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint(NotNull(ArgNo(1)))); } } diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp index 0848d6a6ec61f..efae511da83b4 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -2390,6 +2390,7 @@ ProgramStateManager &PathSensitiveBugReporter::getStateManager() const { return Eng.getStateManager(); } +BugReporter::BugReporter(BugReporterData &d) : D(d) {} BugReporter::~BugReporter() { // Make sure reports are flushed. assert(StrBugTypes.empty() && @@ -2410,7 +2411,7 @@ void BugReporter::FlushReports() { // EmitBasicReport. // FIXME: There are leaks from checkers that assume that the BugTypes they // create will be destroyed by the BugReporter. - llvm::DeleteContainerSeconds(StrBugTypes); + StrBugTypes.clear(); } //===----------------------------------------------------------------------===// @@ -3263,8 +3264,8 @@ BugType *BugReporter::getBugTypeForName(CheckerNameRef CheckName, SmallString<136> fullDesc; llvm::raw_svector_ostream(fullDesc) << CheckName.getName() << ":" << name << ":" << category; - BugType *&BT = StrBugTypes[fullDesc]; + std::unique_ptr &BT = StrBugTypes[fullDesc]; if (!BT) - BT = new BugType(CheckName, name, category); - return BT; + BT = std::make_unique(CheckName, name, category); + return BT.get(); } diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index ea366d0994b56..cd15cd872d9de 100644 --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -972,14 +972,6 @@ void ObjCMethodCall::getExtraInvalidatedValues( Values.push_back(getReceiverSVal()); } -SVal ObjCMethodCall::getSelfSVal() const { - const LocationContext *LCtx = getLocationContext(); - const ImplicitParamDecl *SelfDecl = LCtx->getSelfDecl(); - if (!SelfDecl) - return SVal(); - return getState()->getSVal(getState()->getRegion(SelfDecl, LCtx)); -} - SVal ObjCMethodCall::getReceiverSVal() const { // FIXME: Is this the best way to handle class receivers? if (!isInstanceMessage()) @@ -991,7 +983,7 @@ SVal ObjCMethodCall::getReceiverSVal() const { // An instance message with no expression means we are sending to super. // In this case the object reference is the same as 'self'. assert(getOriginExpr()->getReceiverKind() == ObjCMessageExpr::SuperInstance); - SVal SelfVal = getSelfSVal(); + SVal SelfVal = getState()->getSelfSVal(getLocationContext()); assert(SelfVal.isValid() && "Calling super but not in ObjC method"); return SelfVal; } @@ -1005,8 +997,9 @@ bool ObjCMethodCall::isReceiverSelfOrSuper() const { return false; SVal RecVal = getSVal(getOriginExpr()->getInstanceReceiver()); + SVal SelfVal = getState()->getSelfSVal(getLocationContext()); - return (RecVal == getSelfSVal()); + return (RecVal == SelfVal); } SourceRange ObjCMethodCall::getSourceRange() const { @@ -1173,23 +1166,77 @@ static const ObjCMethodDecl *findDefiningRedecl(const ObjCMethodDecl *MD) { return MD; } -static bool isCallToSelfClass(const ObjCMessageExpr *ME) { - const Expr* InstRec = ME->getInstanceReceiver(); - if (!InstRec) - return false; - const auto *InstRecIg = dyn_cast(InstRec->IgnoreParenImpCasts()); +struct PrivateMethodKey { + const ObjCInterfaceDecl *Interface; + Selector LookupSelector; + bool IsClassMethod; +}; - // Check that receiver is called 'self'. - if (!InstRecIg || !InstRecIg->getFoundDecl() || - !InstRecIg->getFoundDecl()->getName().equals("self")) - return false; +namespace llvm { +template <> struct DenseMapInfo { + using InterfaceInfo = DenseMapInfo; + using SelectorInfo = DenseMapInfo; - // Check that the method name is 'class'. - if (ME->getSelector().getNumArgs() != 0 || - !ME->getSelector().getNameForSlot(0).equals("class")) - return false; + static inline PrivateMethodKey getEmptyKey() { + return {InterfaceInfo::getEmptyKey(), SelectorInfo::getEmptyKey(), false}; + } - return true; + static inline PrivateMethodKey getTombstoneKey() { + return {InterfaceInfo::getTombstoneKey(), SelectorInfo::getTombstoneKey(), + true}; + } + + static unsigned getHashValue(const PrivateMethodKey &Key) { + return llvm::hash_combine( + llvm::hash_code(InterfaceInfo::getHashValue(Key.Interface)), + llvm::hash_code(SelectorInfo::getHashValue(Key.LookupSelector)), + Key.IsClassMethod); + } + + static bool isEqual(const PrivateMethodKey &LHS, + const PrivateMethodKey &RHS) { + return InterfaceInfo::isEqual(LHS.Interface, RHS.Interface) && + SelectorInfo::isEqual(LHS.LookupSelector, RHS.LookupSelector) && + LHS.IsClassMethod == RHS.IsClassMethod; + } +}; +} // end namespace llvm + +const ObjCMethodDecl * +lookupRuntimeDefinition(const ObjCInterfaceDecl *Interface, + Selector LookupSelector, bool InstanceMethod) { + // Repeatedly calling lookupPrivateMethod() is expensive, especially + // when in many cases it returns null. We cache the results so + // that repeated queries on the same ObjCIntefaceDecl and Selector + // don't incur the same cost. On some test cases, we can see the + // same query being issued thousands of times. + // + // NOTE: This cache is essentially a "global" variable, but it + // only gets lazily created when we get here. The value of the + // cache probably comes from it being global across ExprEngines, + // where the same queries may get issued. If we are worried about + // concurrency, or possibly loading/unloading ASTs, etc., we may + // need to revisit this someday. In terms of memory, this table + // stays around until clang quits, which also may be bad if we + // need to release memory. + using PrivateMethodCache = + llvm::DenseMap>; + + static PrivateMethodCache PMC; + Optional &Val = + PMC[{Interface, LookupSelector, InstanceMethod}]; + + // Query lookupPrivateMethod() if the cache does not hit. + if (!Val.hasValue()) { + Val = Interface->lookupPrivateMethod(LookupSelector, InstanceMethod); + + if (!*Val) { + // Query 'lookupMethod' as a backup. + Val = Interface->lookupMethod(LookupSelector, InstanceMethod); + } + } + + return Val.getValue(); } RuntimeDefinition ObjCMethodCall::getRuntimeDefinition() const { @@ -1199,8 +1246,9 @@ RuntimeDefinition ObjCMethodCall::getRuntimeDefinition() const { if (E->isInstanceMessage()) { // Find the receiver type. - const ObjCObjectPointerType *ReceiverT = nullptr; + const ObjCObjectType *ReceiverT = nullptr; bool CanBeSubClassed = false; + bool LookingForInstanceMethod = true; QualType SupersType = E->getSuperType(); const MemRegion *Receiver = nullptr; @@ -1208,7 +1256,7 @@ RuntimeDefinition ObjCMethodCall::getRuntimeDefinition() const { // The receiver is guaranteed to be 'super' in this case. // Super always means the type of immediate predecessor to the method // where the call occurs. - ReceiverT = cast(SupersType); + ReceiverT = cast(SupersType)->getObjectType(); } else { Receiver = getReceiverSVal().getAsRegion(); if (!Receiver) @@ -1223,100 +1271,59 @@ RuntimeDefinition ObjCMethodCall::getRuntimeDefinition() const { QualType DynType = DTI.getType(); CanBeSubClassed = DTI.canBeASubClass(); - ReceiverT = dyn_cast(DynType.getCanonicalType()); - if (ReceiverT && CanBeSubClassed) - if (ObjCInterfaceDecl *IDecl = ReceiverT->getInterfaceDecl()) - if (!canBeOverridenInSubclass(IDecl, Sel)) - CanBeSubClassed = false; - } + const auto *ReceiverDynT = + dyn_cast(DynType.getCanonicalType()); + + if (ReceiverDynT) { + ReceiverT = ReceiverDynT->getObjectType(); + + // It can be actually class methods called with Class object as a + // receiver. This type of messages is treated by the compiler as + // instance (not class). + if (ReceiverT->isObjCClass()) { - // Handle special cases of '[self classMethod]' and - // '[[self class] classMethod]', which are treated by the compiler as - // instance (not class) messages. We will statically dispatch to those. - if (auto *PT = dyn_cast_or_null(ReceiverT)) { - // For [self classMethod], return the compiler visible declaration. - if (PT->getObjectType()->isObjCClass() && - Receiver == getSelfSVal().getAsRegion()) - return RuntimeDefinition(findDefiningRedecl(E->getMethodDecl())); - - // Similarly, handle [[self class] classMethod]. - // TODO: We are currently doing a syntactic match for this pattern with is - // limiting as the test cases in Analysis/inlining/InlineObjCClassMethod.m - // shows. A better way would be to associate the meta type with the symbol - // using the dynamic type info tracking and use it here. We can add a new - // SVal for ObjC 'Class' values that know what interface declaration they - // come from. Then 'self' in a class method would be filled in with - // something meaningful in ObjCMethodCall::getReceiverSVal() and we could - // do proper dynamic dispatch for class methods just like we do for - // instance methods now. - if (E->getInstanceReceiver()) - if (const auto *M = dyn_cast(E->getInstanceReceiver())) - if (isCallToSelfClass(M)) + SVal SelfVal = getState()->getSelfSVal(getLocationContext()); + // For [self classMethod], return compiler visible declaration. + if (Receiver == SelfVal.getAsRegion()) { return RuntimeDefinition(findDefiningRedecl(E->getMethodDecl())); + } + + // Otherwise, let's check if we know something about the type + // inside of this class object. + if (SymbolRef ReceiverSym = getReceiverSVal().getAsSymbol()) { + DynamicTypeInfo DTI = + getClassObjectDynamicTypeInfo(getState(), ReceiverSym); + if (DTI.isValid()) { + // Let's use this type for lookup. + ReceiverT = + cast(DTI.getType().getCanonicalType()); + + CanBeSubClassed = DTI.canBeASubClass(); + // And it should be a class method instead. + LookingForInstanceMethod = false; + } + } + } + + if (CanBeSubClassed) + if (ObjCInterfaceDecl *IDecl = ReceiverT->getInterface()) + // Even if `DynamicTypeInfo` told us that it can be + // not necessarily this type, but its descendants, we still want + // to check again if this selector can be actually overridden. + CanBeSubClassed = canBeOverridenInSubclass(IDecl, Sel); + } } // Lookup the instance method implementation. if (ReceiverT) - if (ObjCInterfaceDecl *IDecl = ReceiverT->getInterfaceDecl()) { - // Repeatedly calling lookupPrivateMethod() is expensive, especially - // when in many cases it returns null. We cache the results so - // that repeated queries on the same ObjCIntefaceDecl and Selector - // don't incur the same cost. On some test cases, we can see the - // same query being issued thousands of times. - // - // NOTE: This cache is essentially a "global" variable, but it - // only gets lazily created when we get here. The value of the - // cache probably comes from it being global across ExprEngines, - // where the same queries may get issued. If we are worried about - // concurrency, or possibly loading/unloading ASTs, etc., we may - // need to revisit this someday. In terms of memory, this table - // stays around until clang quits, which also may be bad if we - // need to release memory. - using PrivateMethodKey = std::pair; - using PrivateMethodCache = - llvm::DenseMap>; - - static PrivateMethodCache PMC; - Optional &Val = PMC[std::make_pair(IDecl, Sel)]; - - // Query lookupPrivateMethod() if the cache does not hit. - if (!Val.hasValue()) { - Val = IDecl->lookupPrivateMethod(Sel); - - // If the method is a property accessor, we should try to "inline" it - // even if we don't actually have an implementation. - if (!*Val) - if (const ObjCMethodDecl *CompileTimeMD = E->getMethodDecl()) - if (CompileTimeMD->isPropertyAccessor()) { - if (!CompileTimeMD->getSelfDecl() && - isa(CompileTimeMD->getDeclContext())) { - // If the method is an accessor in a category, and it doesn't - // have a self declaration, first - // try to find the method in a class extension. This - // works around a bug in Sema where multiple accessors - // are synthesized for properties in class - // extensions that are redeclared in a category and the - // the implicit parameters are not filled in for - // the method on the category. - // This ensures we find the accessor in the extension, which - // has the implicit parameters filled in. - auto *ID = CompileTimeMD->getClassInterface(); - for (auto *CatDecl : ID->visible_extensions()) { - Val = CatDecl->getMethod(Sel, - CompileTimeMD->isInstanceMethod()); - if (*Val) - break; - } - } - if (!*Val) - Val = IDecl->lookupInstanceMethod(Sel); - } - } + if (ObjCInterfaceDecl *IDecl = ReceiverT->getInterface()) { + const ObjCMethodDecl *MD = + lookupRuntimeDefinition(IDecl, Sel, LookingForInstanceMethod); - const ObjCMethodDecl *MD = Val.getValue(); if (MD && !MD->hasBody()) MD = MD->getCanonicalDecl(); + if (CanBeSubClassed) return RuntimeDefinition(MD, Receiver); else diff --git a/clang/lib/StaticAnalyzer/Core/DynamicType.cpp b/clang/lib/StaticAnalyzer/Core/DynamicType.cpp index a78e0e05e903b..e9b64fd79614d 100644 --- a/clang/lib/StaticAnalyzer/Core/DynamicType.cpp +++ b/clang/lib/StaticAnalyzer/Core/DynamicType.cpp @@ -34,6 +34,10 @@ REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(CastSet, clang::ento::DynamicCastInfo) REGISTER_MAP_WITH_PROGRAMSTATE(DynamicCastMap, const clang::ento::MemRegion *, CastSet) +// A map from Class object symbols to the most likely pointed-to type. +REGISTER_MAP_WITH_PROGRAMSTATE(DynamicClassObjectMap, clang::ento::SymbolRef, + clang::ento::DynamicTypeInfo) + namespace clang { namespace ento { @@ -76,6 +80,12 @@ const DynamicCastInfo *getDynamicCastInfo(ProgramStateRef State, return nullptr; } +DynamicTypeInfo getClassObjectDynamicTypeInfo(ProgramStateRef State, + SymbolRef Sym) { + const DynamicTypeInfo *DTI = State->get(Sym); + return DTI ? *DTI : DynamicTypeInfo{}; +} + ProgramStateRef setDynamicTypeInfo(ProgramStateRef State, const MemRegion *MR, DynamicTypeInfo NewTy) { State = State->set(MR->StripCasts(), NewTy); @@ -118,111 +128,165 @@ ProgramStateRef setDynamicTypeAndCastInfo(ProgramStateRef State, return State; } +ProgramStateRef setClassObjectDynamicTypeInfo(ProgramStateRef State, + SymbolRef Sym, + DynamicTypeInfo NewTy) { + State = State->set(Sym, NewTy); + return State; +} + +ProgramStateRef setClassObjectDynamicTypeInfo(ProgramStateRef State, + SymbolRef Sym, QualType NewTy, + bool CanBeSubClassed) { + return setClassObjectDynamicTypeInfo(State, Sym, + DynamicTypeInfo(NewTy, CanBeSubClassed)); +} + +static bool isLive(SymbolReaper &SR, const MemRegion *MR) { + return SR.isLiveRegion(MR); +} + +static bool isLive(SymbolReaper &SR, SymbolRef Sym) { return SR.isLive(Sym); } + template -ProgramStateRef removeDead(ProgramStateRef State, const MapTy &Map, - SymbolReaper &SR) { +static ProgramStateRef removeDeadImpl(ProgramStateRef State, SymbolReaper &SR) { + const auto &Map = State->get(); + for (const auto &Elem : Map) - if (!SR.isLiveRegion(Elem.first)) - State = State->remove(Elem.first); + if (!isLive(SR, Elem.first)) + State = State->remove(Elem.first); return State; } ProgramStateRef removeDeadTypes(ProgramStateRef State, SymbolReaper &SR) { - return removeDead(State, State->get(), SR); + return removeDeadImpl(State, SR); } ProgramStateRef removeDeadCasts(ProgramStateRef State, SymbolReaper &SR) { - return removeDead(State, State->get(), SR); + return removeDeadImpl(State, SR); } -static void printDynamicTypesJson(raw_ostream &Out, ProgramStateRef State, - const char *NL, unsigned int Space, - bool IsDot) { - Indent(Out, Space, IsDot) << "\"dynamic_types\": "; +ProgramStateRef removeDeadClassObjectTypes(ProgramStateRef State, + SymbolReaper &SR) { + return removeDeadImpl(State, SR); +} - const DynamicTypeMapTy &Map = State->get(); - if (Map.isEmpty()) { - Out << "null," << NL; - return; - } +//===----------------------------------------------------------------------===// +// Implementation of the 'printer-to-JSON' function +//===----------------------------------------------------------------------===// - ++Space; - Out << '[' << NL; - for (DynamicTypeMapTy::iterator I = Map.begin(); I != Map.end(); ++I) { - const MemRegion *MR = I->first; - const DynamicTypeInfo &DTI = I->second; - Indent(Out, Space, IsDot) - << "{ \"region\": \"" << MR << "\", \"dyn_type\": "; - if (!DTI.isValid()) { - Out << "null"; - } else { - Out << '\"' << DTI.getType()->getPointeeType().getAsString() - << "\", \"sub_classable\": " - << (DTI.canBeASubClass() ? "true" : "false"); - } - Out << " }"; - - if (std::next(I) != Map.end()) - Out << ','; - Out << NL; +static raw_ostream &printJson(const MemRegion *Region, raw_ostream &Out, + const char *NL, unsigned int Space, bool IsDot) { + return Out << "\"region\": \"" << Region << "\""; +} + +static raw_ostream &printJson(const SymExpr *Symbol, raw_ostream &Out, + const char *NL, unsigned int Space, bool IsDot) { + return Out << "\"symbol\": \"" << Symbol << "\""; +} + +static raw_ostream &printJson(const DynamicTypeInfo &DTI, raw_ostream &Out, + const char *NL, unsigned int Space, bool IsDot) { + Out << "\"dyn_type\": "; + if (!DTI.isValid()) { + Out << "null"; + } else { + QualType ToPrint = DTI.getType(); + if (ToPrint->isAnyPointerType()) + ToPrint = ToPrint->getPointeeType(); + + Out << '\"' << ToPrint.getAsString() << "\", \"sub_classable\": " + << (DTI.canBeASubClass() ? "true" : "false"); } + return Out; +} - --Space; - Indent(Out, Space, IsDot) << "]," << NL; +static raw_ostream &printJson(const DynamicCastInfo &DCI, raw_ostream &Out, + const char *NL, unsigned int Space, bool IsDot) { + return Out << "\"from\": \"" << DCI.from().getAsString() << "\", \"to\": \"" + << DCI.to().getAsString() << "\", \"kind\": \"" + << (DCI.succeeds() ? "success" : "fail") << "\""; } -static void printDynamicCastsJson(raw_ostream &Out, ProgramStateRef State, - const char *NL, unsigned int Space, - bool IsDot) { - Indent(Out, Space, IsDot) << "\"dynamic_casts\": "; +template +static raw_ostream &printJson(const std::pair &Pair, raw_ostream &Out, + const char *NL, unsigned int Space, bool IsDot) { + printJson(Pair.first, Out, NL, Space, IsDot) << ", "; + return printJson(Pair.second, Out, NL, Space, IsDot); +} - const DynamicCastMapTy &Map = State->get(); - if (Map.isEmpty()) { - Out << "null," << NL; - return; +template +static raw_ostream &printJsonContainer(const ContainerTy &Container, + raw_ostream &Out, const char *NL, + unsigned int Space, bool IsDot) { + if (Container.isEmpty()) { + return Out << "null"; } ++Space; Out << '[' << NL; - for (DynamicCastMapTy::iterator I = Map.begin(); I != Map.end(); ++I) { - const MemRegion *MR = I->first; - const CastSet &Set = I->second; - - Indent(Out, Space, IsDot) << "{ \"region\": \"" << MR << "\", \"casts\": "; - if (Set.isEmpty()) { - Out << "null "; - } else { - ++Space; - Out << '[' << NL; - for (CastSet::iterator SI = Set.begin(); SI != Set.end(); ++SI) { - Indent(Out, Space, IsDot) - << "{ \"from\": \"" << SI->from().getAsString() << "\", \"to\": \"" - << SI->to().getAsString() << "\", \"kind\": \"" - << (SI->succeeds() ? "success" : "fail") << "\" }"; - - if (std::next(SI) != Set.end()) - Out << ','; - Out << NL; - } - --Space; - Indent(Out, Space, IsDot) << ']'; - } - Out << '}'; - - if (std::next(I) != Map.end()) + for (auto I = Container.begin(); I != Container.end(); ++I) { + const auto &Element = *I; + + Indent(Out, Space, IsDot) << "{ "; + printJson(Element, Out, NL, Space, IsDot) << " }"; + + if (std::next(I) != Container.end()) Out << ','; Out << NL; } --Space; - Indent(Out, Space, IsDot) << "]," << NL; + return Indent(Out, Space, IsDot) << "]"; +} + +static raw_ostream &printJson(const CastSet &Set, raw_ostream &Out, + const char *NL, unsigned int Space, bool IsDot) { + Out << "\"casts\": "; + return printJsonContainer(Set, Out, NL, Space, IsDot); +} + +template +static void printJsonImpl(raw_ostream &Out, ProgramStateRef State, + const char *Name, const char *NL, unsigned int Space, + bool IsDot, bool PrintEvenIfEmpty = true) { + const auto &Map = State->get(); + if (Map.isEmpty() && !PrintEvenIfEmpty) + return; + + Indent(Out, Space, IsDot) << "\"" << Name << "\": "; + printJsonContainer(Map, Out, NL, Space, IsDot) << "," << NL; +} + +static void printDynamicTypesJson(raw_ostream &Out, ProgramStateRef State, + const char *NL, unsigned int Space, + bool IsDot) { + printJsonImpl(Out, State, "dynamic_types", NL, Space, IsDot); +} + +static void printDynamicCastsJson(raw_ostream &Out, ProgramStateRef State, + const char *NL, unsigned int Space, + bool IsDot) { + printJsonImpl(Out, State, "dynamic_casts", NL, Space, IsDot); +} + +static void printClassObjectDynamicTypesJson(raw_ostream &Out, + ProgramStateRef State, + const char *NL, unsigned int Space, + bool IsDot) { + // Let's print Class object type information only if we have something + // meaningful to print. + printJsonImpl(Out, State, "class_object_types", NL, + Space, IsDot, + /*PrintEvenIfEmpty=*/false); } void printDynamicTypeInfoJson(raw_ostream &Out, ProgramStateRef State, const char *NL, unsigned int Space, bool IsDot) { printDynamicTypesJson(Out, State, NL, Space, IsDot); printDynamicCastsJson(Out, State, NL, Space, IsDot); + printClassObjectDynamicTypesJson(Out, State, NL, Space, IsDot); } } // namespace ento diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp index 14006f79fd0f6..3ecee758c676c 100644 --- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -240,6 +240,13 @@ ProgramState::enterStackFrame(const CallEvent &Call, return makeWithStore(NewStore); } +SVal ProgramState::getSelfSVal(const LocationContext *LCtx) const { + const ImplicitParamDecl *SelfDecl = LCtx->getSelfDecl(); + if (!SelfDecl) + return SVal(); + return getSVal(getRegion(SelfDecl, LCtx)); +} + SVal ProgramState::getSValAsScalarOrLoc(const MemRegion *R) const { // We only want to do fetches from regions that we can actually bind // values. For example, SymbolicRegions of type 'id<...>' cannot diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index e655aae45832d..ff6cd21b09d63 100644 --- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -341,10 +341,6 @@ QualType SymbolRegionValue::getType() const { return R->getValueType(); } -SymbolManager::~SymbolManager() { - llvm::DeleteContainerSeconds(SymbolDependencies); -} - bool SymbolManager::canSymbolicate(QualType T) { T = T.getCanonicalType(); @@ -362,13 +358,9 @@ bool SymbolManager::canSymbolicate(QualType T) { void SymbolManager::addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent) { - SymbolDependTy::iterator I = SymbolDependencies.find(Primary); - SymbolRefSmallVectorTy *dependencies = nullptr; - if (I == SymbolDependencies.end()) { - dependencies = new SymbolRefSmallVectorTy(); - SymbolDependencies[Primary] = dependencies; - } else { - dependencies = I->second; + auto &dependencies = SymbolDependencies[Primary]; + if (!dependencies) { + dependencies = std::make_unique(); } dependencies->push_back(Dependent); } @@ -378,7 +370,7 @@ const SymbolRefSmallVectorTy *SymbolManager::getDependentSymbols( SymbolDependTy::const_iterator I = SymbolDependencies.find(Primary); if (I == SymbolDependencies.end()) return nullptr; - return I->second; + return I->second.get(); } void SymbolReaper::markDependentsLive(SymbolRef sym) { diff --git a/clang/lib/Tooling/ArgumentsAdjusters.cpp b/clang/lib/Tooling/ArgumentsAdjusters.cpp index d8cd11efedd28..a857b57fbf7bc 100644 --- a/clang/lib/Tooling/ArgumentsAdjusters.cpp +++ b/clang/lib/Tooling/ArgumentsAdjusters.cpp @@ -98,7 +98,8 @@ ArgumentsAdjuster getClangStripDependencyFileAdjuster() { StringRef Arg = Args[i]; // All dependency-file options begin with -M. These include -MM, // -MF, -MG, -MP, -MT, -MQ, -MD, and -MMD. - if (!Arg.startswith("-M") && !Arg.startswith("/showIncludes")) { + if (!Arg.startswith("-M") && !Arg.startswith("/showIncludes") && + !Arg.startswith("-showIncludes")) { AdjustedArgs.push_back(Args[i]); continue; } diff --git a/clang/test/AST/ast-print-enum-decl.c b/clang/test/AST/ast-print-enum-decl.c index fba9313442850..0e2a8ed7d8e57 100644 --- a/clang/test/AST/ast-print-enum-decl.c +++ b/clang/test/AST/ast-print-enum-decl.c @@ -103,3 +103,8 @@ struct DeclGroupInMemberList { enum T2 { T20 } *p1, *p2; // PRINT-NEXT: }; }; + +enum fixedEnum : int { fixedEnumerator }; +// PRINT-LABEL: enum fixedEnum : int { +// PRINT-NEXT: fixedEnumerator +// PRINT-NEXT: }; diff --git a/clang/test/AST/spurious-regparm.c b/clang/test/AST/spurious-regparm.c new file mode 100644 index 0000000000000..4ae23f0172413 --- /dev/null +++ b/clang/test/AST/spurious-regparm.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -triple armv8.1m.main-eabi -mcmse -fsyntax-only %s -ast-dump | FileCheck %s +// REQUIRES: arm-registered-target +typedef int (*fn_t)(int) __attribute__((cmse_nonsecure_call)); +// CHECK-NOT: regparm 0 diff --git a/clang/test/Analysis/a_flaky_crash.cpp b/clang/test/Analysis/a_flaky_crash.cpp index 04bd57883fce8..f350c1e1280cf 100644 --- a/clang/test/Analysis/a_flaky_crash.cpp +++ b/clang/test/Analysis/a_flaky_crash.cpp @@ -1,6 +1,6 @@ // This code used to crash but unpredictably and rarely. -// Even with the current set of run-lines, if a buildbot tells you that -// you broke this test there's a chance that someone else broke it +// Even with the current set of run-lines, if a buildbot tells you +// that you broke this test there's a chance that someone else broke it // a few commits ago. struct S { diff --git a/clang/test/Analysis/cast-value-state-dump.cpp b/clang/test/Analysis/cast-value-state-dump.cpp index 9abdaae0d4592..3dffb78767cf4 100644 --- a/clang/test/Analysis/cast-value-state-dump.cpp +++ b/clang/test/Analysis/cast-value-state-dump.cpp @@ -37,7 +37,7 @@ void evalNonNullParamNonNullReturn(const Shape *S) { // CHECK: { "region": "SymRegion{reg_$0}", "casts": [ // CHECK-NEXT: { "from": "const struct clang::Shape *", "to": "const class clang::Circle *", "kind": "success" }, // CHECK-NEXT: { "from": "const struct clang::Shape *", "to": "const class clang::Square *", "kind": "fail" } - // CHECK-NEXT: ]} + // CHECK-NEXT: ] } (void)(1 / !C); // expected-note@-1 {{'C' is non-null}} diff --git a/clang/test/Analysis/class-object-state-dump.m b/clang/test/Analysis/class-object-state-dump.m new file mode 100644 index 0000000000000..66519b82adb1d --- /dev/null +++ b/clang/test/Analysis/class-object-state-dump.m @@ -0,0 +1,38 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection \ +// RUN: -verify %s 2>&1 | FileCheck %s + +// expected-no-diagnostics + +void clang_analyzer_printState(); + +@interface NSObject { +} ++ (id)alloc; ++ (Class)class; +- (id)init; +- (Class)class; +@end + +@interface Parent : NSObject +@end +@interface Child : Parent +@end + +void foo(id A, id B); + +@implementation Child ++ (void)test { + id ClassAsID = [self class]; + id Object = [[ClassAsID alloc] init]; + Class TheSameClass = [Object class]; + + clang_analyzer_printState(); + // CHECK: "class_object_types": [ + // CHECK-NEXT: { "symbol": "conj_$[[#]]{Class, LC[[#]], S[[#]], #[[#]]}", "dyn_type": "Child", "sub_classable": true }, + // CHECK-NEXT: { "symbol": "conj_$[[#]]{Class, LC[[#]], S[[#]], #[[#]]}", "dyn_type": "Child", "sub_classable": true } + // CHECK-NEXT: ] + + // Let's make sure that the information is not GC'd away. + foo(ClassAsID, TheSameClass); +} +@end diff --git a/clang/test/Analysis/inlining/InlineObjCClassMethod.m b/clang/test/Analysis/inlining/InlineObjCClassMethod.m index 11b2d63afa592..48f5ac211657c 100644 --- a/clang/test/Analysis/inlining/InlineObjCClassMethod.m +++ b/clang/test/Analysis/inlining/InlineObjCClassMethod.m @@ -6,18 +6,25 @@ // Test inlining of ObjC class methods. typedef signed char BOOL; +#define YES ((BOOL)1) +#define NO ((BOOL)0) typedef struct objc_class *Class; typedef struct objc_object { - Class isa; -} *id; -@protocol NSObject - (BOOL)isEqual:(id)object; @end + Class isa; +} * id; +@protocol NSObject +- (BOOL)isEqual:(id)object; +@end @interface NSObject {} -+(id)alloc; --(id)init; --(id)autorelease; --(id)copy; ++ (id)alloc; ++ (Class)class; ++ (Class)superclass; +- (id)init; +- (id)autorelease; +- (id)copy; - (Class)class; --(id)retain; +- (instancetype)self; +- (id)retain; @end // Vanila: ObjC class method is called by name. @@ -133,10 +140,7 @@ + (int)getInt { } @end - -// False negative. // ObjC class method call through a decl with a known type. -// We should be able to track the type of currentClass and inline this call. // Note, [self class] could be a subclass. Do we still want to inline here? @interface MyClassKT : NSObject @end @@ -152,7 +156,7 @@ @implementation MyClassKT - (int)testClassMethodByKnownVarDecl { Class currentClass = [self class]; int y = [currentClass getInt]; - return 5/y; // Would be great to get a warning here. + return 5 / y; // expected-warning{{Division by zero}} } @end @@ -240,37 +244,124 @@ +(unsigned)returns20; +(unsigned)returns30; @end -@implementation SelfClassTestParent --(unsigned)returns10 { return 100; } -+(unsigned)returns20 { return 100; } -+(unsigned)returns30 { return 100; } +@interface SelfClassTest : SelfClassTestParent +- (unsigned)returns10; ++ (unsigned)returns20; ++ (unsigned)returns30; @end -@interface SelfClassTest : SelfClassTestParent --(unsigned)returns10; -+(unsigned)returns20; -+(unsigned)returns30; +@implementation SelfClassTestParent +- (unsigned)returns10 { + return 100; +} ++ (unsigned)returns20 { + return 100; +} ++ (unsigned)returns30 { + return 100; +} + +- (void)testSelfReassignment { + // Check that we didn't hardcode type for self. + self = [[[SelfClassTest class] alloc] init]; + Class actuallyChildClass = [self class]; + unsigned result = [actuallyChildClass returns30]; + clang_analyzer_eval(result == 30); // expected-warning{{TRUE}} +} @end @implementation SelfClassTest --(unsigned)returns10 { return 10; } -+(unsigned)returns20 { return 20; } -+(unsigned)returns30 { return 30; } -+(void)classMethod { +- (unsigned)returns10 { + return 10; +} ++ (unsigned)returns20 { + return 20; +} ++ (unsigned)returns30 { + return 30; +} ++ (BOOL)isClass { + return YES; +} +- (BOOL)isClass { + return NO; +} ++ (SelfClassTest *)create { + return [[self alloc] init]; +} ++ (void)classMethod { unsigned result1 = [self returns20]; clang_analyzer_eval(result1 == 20); // expected-warning{{TRUE}} + unsigned result2 = [[self class] returns30]; clang_analyzer_eval(result2 == 30); // expected-warning{{TRUE}} + unsigned result3 = [[super class] returns30]; - clang_analyzer_eval(result3 == 100); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(result3 == 100); // expected-warning{{TRUE}} + + // Check that class info is propagated with data + Class class41 = [self class]; + Class class42 = class41; + unsigned result4 = [class42 returns30]; + clang_analyzer_eval(result4 == 30); // expected-warning{{TRUE}} + + Class class51 = [super class]; + Class class52 = class51; + unsigned result5 = [class52 returns30]; + clang_analyzer_eval(result5 == 100); // expected-warning{{TRUE}} } --(void)instanceMethod { +- (void)instanceMethod { unsigned result0 = [self returns10]; clang_analyzer_eval(result0 == 10); // expected-warning{{TRUE}} + unsigned result2 = [[self class] returns30]; clang_analyzer_eval(result2 == 30); // expected-warning{{TRUE}} + unsigned result3 = [[super class] returns30]; - clang_analyzer_eval(result3 == 100); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(result3 == 100); // expected-warning{{TRUE}} + + // Check that class info is propagated with data + Class class41 = [self class]; + Class class42 = class41; + unsigned result4 = [class42 returns30]; + clang_analyzer_eval(result4 == 30); // expected-warning{{TRUE}} + + Class class51 = [super class]; + Class class52 = class51; + unsigned result5 = [class52 returns30]; + clang_analyzer_eval(result5 == 100); // expected-warning{{TRUE}} + + // Check that we inline class methods when class object is a receiver + Class class6 = [self class]; + BOOL calledClassMethod = [class6 isClass]; + clang_analyzer_eval(calledClassMethod == YES); // expected-warning{{TRUE}} + + // Check that class info is propagated through the 'self' method + Class class71 = [self class]; + Class class72 = [class71 self]; + unsigned result7 = [class72 returns30]; + clang_analyzer_eval(result7 == 30); // expected-warning{{TRUE}} + + // Check that 'class' and 'super' info from direct invocation of the + // corresponding class methods is propagated with data + Class class8 = [SelfClassTest class]; + unsigned result8 = [class8 returns30]; + clang_analyzer_eval(result8 == 30); // expected-warning{{TRUE}} + + Class class9 = [SelfClassTest superclass]; + unsigned result9 = [class9 returns30]; + clang_analyzer_eval(result9 == 100); // expected-warning{{TRUE}} + + // Check that we get class from a propagated type + SelfClassTestParent *selfAsParent10 = [[SelfClassTest alloc] init]; + Class class10 = [selfAsParent10 class]; + unsigned result10 = [class10 returns30]; + clang_analyzer_eval(result10 == 30); // expected-warning{{TRUE}} + + SelfClassTestParent *selfAsParent11 = [[[self class] alloc] init]; + Class class11 = [selfAsParent11 class]; + unsigned result11 = [class11 returns30]; + clang_analyzer_eval(result11 == 30); // expected-warning{{TRUE}} } @end diff --git a/clang/test/Analysis/inlining/ObjCDynTypePopagation.m b/clang/test/Analysis/inlining/ObjCDynTypePopagation.m index 0c1d4f2a31cbc..b5e70229c7c89 100644 --- a/clang/test/Analysis/inlining/ObjCDynTypePopagation.m +++ b/clang/test/Analysis/inlining/ObjCDynTypePopagation.m @@ -7,68 +7,67 @@ PublicSubClass2 *getObj(); @implementation PublicParent -- (int) getZeroOverridden { - return 1; +- (int)getZeroOverridden { + return 1; } -- (int) getZero { - return 0; +- (int)getZero { + return 0; } @end @implementation PublicSubClass2 -- (int) getZeroOverridden { - return 0; +- (int)getZeroOverridden { + return 0; } /* Test that we get the right type from call to alloc. */ -+ (void) testAllocSelf { ++ (void)testAllocSelf { id a = [self alloc]; clang_analyzer_eval([a getZeroOverridden] == 0); // expected-warning{{TRUE}} } - -+ (void) testAllocClass { ++ (void)testAllocClass { id a = [PublicSubClass2 alloc]; clang_analyzer_eval([a getZeroOverridden] == 0); // expected-warning{{TRUE}} } -+ (void) testAllocSuperOverriden { ++ (void)testAllocSuperOverriden { id a = [super alloc]; // Evaluates to 1 in the parent. - clang_analyzer_eval([a getZeroOverridden] == 0); // expected-warning{{FALSE}} + clang_analyzer_eval([a getZeroOverridden] == 0); // expected-warning{{FALSE}} } -+ (void) testAllocSuper { ++ (void)testAllocSuper { id a = [super alloc]; clang_analyzer_eval([a getZero] == 0); // expected-warning{{TRUE}} } -+ (void) testAllocInit { ++ (void)testAllocInit { id a = [[self alloc] init]; clang_analyzer_eval([a getZeroOverridden] == 0); // expected-warning{{TRUE}} } -+ (void) testNewSelf { ++ (void)testNewSelf { id a = [self new]; clang_analyzer_eval([a getZeroOverridden] == 0); // expected-warning{{TRUE}} } -// Casting to parent should not pessimize the dynamic type. -+ (void) testCastToParent { - id a = [[self alloc] init]; - PublicParent *p = a; +// Casting to parent should not pessimize the dynamic type. ++ (void)testCastToParent { + id a = [[self alloc] init]; + PublicParent *p = a; clang_analyzer_eval([p getZeroOverridden] == 0); // expected-warning{{TRUE}} } // The type of parameter gets used. -+ (void)testTypeFromParam:(PublicParent*) p { ++ (void)testTypeFromParam:(PublicParent *)p { clang_analyzer_eval([p getZero] == 0); // expected-warning{{TRUE}} } // Test implicit cast. // Note, in this case, p could also be a subclass of MyParent. -+ (void) testCastFromId:(id) a { - PublicParent *p = a; ++ (void)testCastFromId:(id)a { + PublicParent *p = a; clang_analyzer_eval([p getZero] == 0); // expected-warning{{TRUE}} } @end @@ -76,25 +75,27 @@ + (void) testCastFromId:(id) a { // TODO: Would be nice to handle the case of dynamically obtained class info // as well. We need a MemRegion for class types for this. int testDynamicClass(BOOL coin) { - Class AllocClass = (coin ? [NSObject class] : [PublicSubClass2 class]); - id x = [[AllocClass alloc] init]; - if (coin) - return [x getZero]; - return 1; + Class AllocClass = (coin ? [NSObject class] : [PublicSubClass2 class]); + id x = [[AllocClass alloc] init]; + if (coin) + return [x getZero]; + return 1; } @interface UserClass : NSObject -- (PublicSubClass2 *) _newPublicSubClass2; -- (int) getZero; -- (void) callNew; +- (PublicSubClass2 *)_newPublicSubClass2; +- (int)getZero; +- (void)callNew; @end @implementation UserClass -- (PublicSubClass2 *) _newPublicSubClass2 { +- (PublicSubClass2 *)_newPublicSubClass2 { return [[PublicSubClass2 alloc] init]; } -- (int) getZero { return 5; } -- (void) callNew { +- (int)getZero { + return 5; +} +- (void)callNew { PublicSubClass2 *x = [self _newPublicSubClass2]; clang_analyzer_eval([x getZero] == 0); //expected-warning{{TRUE}} } diff --git a/clang/test/Analysis/retain-release-inline.m b/clang/test/Analysis/retain-release-inline.m index 4fe6bca4a44a4..ea2acbd7f602e 100644 --- a/clang/test/Analysis/retain-release-inline.m +++ b/clang/test/Analysis/retain-release-inline.m @@ -13,6 +13,7 @@ // It includes the basic definitions for the test cases below. //===----------------------------------------------------------------------===// #define NULL 0 +#define nil ((id)0) typedef unsigned int __darwin_natural_t; typedef unsigned long uintptr_t; typedef unsigned int uint32_t; @@ -21,14 +22,14 @@ typedef signed long CFIndex; typedef CFIndex CFByteOrder; typedef struct { - CFIndex location; - CFIndex length; + CFIndex location; + CFIndex length; } CFRange; static __inline__ __attribute__((always_inline)) CFRange CFRangeMake(CFIndex loc, CFIndex len) { - CFRange range; - range.location = loc; - range.length = len; - return range; + CFRange range; + range.location = loc; + range.length = len; + return range; } typedef const void * CFTypeRef; typedef const struct __CFString * CFStringRef; @@ -91,6 +92,7 @@ @protocol NSObject - (BOOL)isEqual:(id)object; - (id)retain; - (oneway void)release; +- (Class)class; - (id)autorelease; - (id)init; @end @protocol NSCopying - (id)copyWithZone:(NSZone *)zone; @@ -100,6 +102,7 @@ - (id)init; @interface NSObject {} + (id)allocWithZone:(NSZone *)zone; + (id)alloc; ++ (Class)class; - (void)dealloc; @end @interface NSObject (NSCoderMethods) @@ -481,3 +484,33 @@ - (void)inline_test_reanalyze_as_top_level { [self test_inline_tiny_when_reanalyzing]; } @end + +// Original problem: rdar://problem/50739539 +@interface MyClassThatLeaksDuringInit : NSObject + ++ (MyClassThatLeaksDuringInit *)getAnInstance1; ++ (MyClassThatLeaksDuringInit *)getAnInstance2; + +@end + +@implementation MyClassThatLeaksDuringInit + ++ (MyClassThatLeaksDuringInit *)getAnInstance1 { + return [[[MyClassThatLeaksDuringInit alloc] init] autorelease]; // expected-warning{{leak}} +} + ++ (MyClassThatLeaksDuringInit *)getAnInstance2 { + return [[[[self class] alloc] init] autorelease]; // expected-warning{{leak}} +} + +- (instancetype)init { + if (1) { + return nil; + } + + if (nil != (self = [super init])) { + } + return self; +} + +@end diff --git a/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp b/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp index 55c5ce6c4cbec..79529c6c0fc0a 100644 --- a/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp +++ b/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp @@ -11,6 +11,8 @@ #ifdef INTERFACE module; #include "foo.h" +// FIXME: The following need to be moved to a header file. The global module +// fragment is only permitted to contain preprocessor directives. int global_module_fragment; export module A; export int exported; @@ -28,12 +30,13 @@ module; void test_early() { in_header = 1; // expected-error {{missing '#include "foo.h"'; 'in_header' must be declared before it is used}} - // expected-note@*{{previous}} + // expected-note@*{{not visible}} global_module_fragment = 1; // expected-error {{missing '#include'; 'global_module_fragment' must be declared before it is used}} + // expected-note@p2.cpp:16 {{not visible}} exported = 1; // expected-error {{must be imported from module 'A'}} - // expected-note@p2.cpp:16 {{previous}} + // expected-note@p2.cpp:18 {{not visible}} not_exported = 1; // expected-error {{undeclared identifier}} @@ -52,16 +55,17 @@ import A; void test_late() { in_header = 1; // expected-error {{missing '#include "foo.h"'; 'in_header' must be declared before it is used}} - // expected-note@*{{previous}} + // expected-note@*{{not visible}} global_module_fragment = 1; // expected-error {{missing '#include'; 'global_module_fragment' must be declared before it is used}} + // expected-note@p2.cpp:16 {{not visible}} exported = 1; not_exported = 1; #ifndef IMPLEMENTATION // expected-error@-2 {{undeclared identifier 'not_exported'; did you mean 'exported'}} - // expected-note@p2.cpp:16 {{declared here}} + // expected-note@p2.cpp:18 {{declared here}} #endif internal = 1; diff --git a/clang/test/CXX/module/module.unit/p8.cpp b/clang/test/CXX/module/module.unit/p8.cpp index aad65272f0d6a..a4a85d0bf91fe 100644 --- a/clang/test/CXX/module/module.unit/p8.cpp +++ b/clang/test/CXX/module/module.unit/p8.cpp @@ -36,5 +36,5 @@ export module foo:bar; // expected-error {{not yet supported}} expected-error {{ int k = n; #ifndef IMPORTED // expected-error@-2 {{declaration of 'n' must be imported from module 'foo' before it is required}} -// expected-note@* {{previous}} +// expected-note@* {{not visible}} #endif diff --git a/clang/test/CXX/modules-ts/dcl.dcl/dcl.module/dcl.module.import/p1.cpp b/clang/test/CXX/modules-ts/dcl.dcl/dcl.module/dcl.module.import/p1.cpp index 15900c1f6a3a9..70b553f1ff74c 100644 --- a/clang/test/CXX/modules-ts/dcl.dcl/dcl.module/dcl.module.import/p1.cpp +++ b/clang/test/CXX/modules-ts/dcl.dcl/dcl.module/dcl.module.import/p1.cpp @@ -23,7 +23,7 @@ module MODULE_NAME; int use_1 = a; #if !MODULE_X // expected-error@-2 {{declaration of 'a' must be imported from module 'x' before it is required}} -// expected-note@x.cppm:1 {{here}} +// expected-note@x.cppm:1 {{not visible}} #endif import x; @@ -32,7 +32,7 @@ int use_2 = b; // ok // There is no relation between module x and module x.y. int use_3 = c; // expected-error {{declaration of 'c' must be imported from module 'x.y'}} - // expected-note@x.y.cppm:1 {{here}} + // expected-note@x.y.cppm:1 {{not visible}} import x [[]]; import x [[foo]]; // expected-warning {{unknown attribute 'foo' ignored}} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abd.c index 970dc2f234056..6534d89fb4537 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abd.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abd.c @@ -469,3 +469,180 @@ svuint64_t test_svabd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) // CHECK: ret %[[INTRINSIC]] return SVE_ACLE_FUNC(svabd,_n_u64,_x,)(pg, op1, op2); } + +svfloat16_t test_svabd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svabd_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svabd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svabd_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svabd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svabd_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svabd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svabd_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svabd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svabd_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svabd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svabd_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svabd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svabd_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svabd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svabd_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svabd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svabd_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svabd_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svabd_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svabd_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svabd_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svabd_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svabd_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svabd_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svabd_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svabd_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svabd_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svabd_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svabd_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svabd_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svabd_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svabd_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svabd_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svabd_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svabd_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fabd.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svabd,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c index 2404b0a8c40e6..4d9afd56acb74 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c @@ -469,3 +469,180 @@ svuint64_t test_svadd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) // CHECK: ret %[[INTRINSIC]] return SVE_ACLE_FUNC(svadd,_n_u64,_x,)(pg, op1, op2); } + +svfloat16_t test_svadd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svadd_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svadd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svadd_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svadd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svadd_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svadd_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svadd_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svadd_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svadd_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svadd_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svadd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svadd_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svadd_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svadd_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svadd_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svadd_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svadd_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svadd_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svadd_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svadd_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svadd_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svadd_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svadd_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svadd_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svadd_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svadd_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svadd_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svadd_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svadd_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svadd_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fadd.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadd,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adda.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adda.c new file mode 100644 index 0000000000000..6ac6e5d0d6185 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adda.c @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +float16_t test_svadda_f16(svbool_t pg, float16_t initial, svfloat16_t op) +{ + // CHECK-LABEL: test_svadda_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call half @llvm.aarch64.sve.fadda.nxv8f16( %[[PG]], half %initial, %op) + // CHECK: ret half %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadda,_f16,,)(pg, initial, op); +} + +float32_t test_svadda_f32(svbool_t pg, float32_t initial, svfloat32_t op) +{ + // CHECK-LABEL: test_svadda_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call float @llvm.aarch64.sve.fadda.nxv4f32( %[[PG]], float %initial, %op) + // CHECK: ret float %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadda,_f32,,)(pg, initial, op); +} + +float64_t test_svadda_f64(svbool_t pg, float64_t initial, svfloat64_t op) +{ + // CHECK-LABEL: test_svadda_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call double @llvm.aarch64.sve.fadda.nxv2f64( %[[PG]], double %initial, %op) + // CHECK: ret double %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadda,_f64,,)(pg, initial, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_addv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_addv.c new file mode 100644 index 0000000000000..0c715f31dbf46 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_addv.c @@ -0,0 +1,108 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +int64_t test_svaddv_s8(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svaddv_s8 + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.saddv.nxv16i8( %pg, %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svaddv,_s8,,)(pg, op); +} + +int64_t test_svaddv_s16(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svaddv_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.saddv.nxv8i16( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svaddv,_s16,,)(pg, op); +} + +int64_t test_svaddv_s32(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svaddv_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.saddv.nxv4i32( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svaddv,_s32,,)(pg, op); +} + +int64_t test_svaddv_s64(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svaddv_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.saddv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svaddv,_s64,,)(pg, op); +} + +uint64_t test_svaddv_u8(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svaddv_u8 + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( %pg, %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svaddv,_u8,,)(pg, op); +} + +uint64_t test_svaddv_u16(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svaddv_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv8i16( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svaddv,_u16,,)(pg, op); +} + +uint64_t test_svaddv_u32(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svaddv_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv4i32( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svaddv,_u32,,)(pg, op); +} + +uint64_t test_svaddv_u64(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svaddv_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svaddv,_u64,,)(pg, op); +} + +float16_t test_svaddv_f16(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svaddv_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call half @llvm.aarch64.sve.faddv.nxv8f16( %[[PG]], %op) + // CHECK: ret half %[[INTRINSIC]] + return SVE_ACLE_FUNC(svaddv,_f16,,)(pg, op); +} + +float32_t test_svaddv_f32(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svaddv_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call float @llvm.aarch64.sve.faddv.nxv4f32( %[[PG]], %op) + // CHECK: ret float %[[INTRINSIC]] + return SVE_ACLE_FUNC(svaddv,_f32,,)(pg, op); +} + +float64_t test_svaddv_f64(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svaddv_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call double @llvm.aarch64.sve.faddv.nxv2f64( %[[PG]], %op) + // CHECK: ret double %[[INTRINSIC]] + return SVE_ACLE_FUNC(svaddv,_f64,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_and.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_and.c new file mode 100644 index 0000000000000..2f290ea463c6d --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_and.c @@ -0,0 +1,479 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svand_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svand_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svand_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svand_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svand_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svand_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s32,_z,)(pg, op1, op2); +} + +svint64_t test_svand_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svand_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s64,_z,)(pg, op1, op2); +} + +svuint8_t test_svand_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svand_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_svand_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svand_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_svand_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svand_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u32,_z,)(pg, op1, op2); +} + +svuint64_t test_svand_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svand_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u64,_z,)(pg, op1, op2); +} + +svint8_t test_svand_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svand_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svand_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svand_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svand_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svand_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s32,_m,)(pg, op1, op2); +} + +svint64_t test_svand_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svand_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s64,_m,)(pg, op1, op2); +} + +svuint8_t test_svand_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svand_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_svand_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svand_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_svand_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svand_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u32,_m,)(pg, op1, op2); +} + +svuint64_t test_svand_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svand_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u64,_m,)(pg, op1, op2); +} + +svint8_t test_svand_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svand_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svand_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svand_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svand_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svand_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s32,_x,)(pg, op1, op2); +} + +svint64_t test_svand_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svand_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_s64,_x,)(pg, op1, op2); +} + +svuint8_t test_svand_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svand_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_svand_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svand_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_svand_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svand_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u32,_x,)(pg, op1, op2); +} + +svuint64_t test_svand_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svand_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_u64,_x,)(pg, op1, op2); +} + +svint8_t test_svand_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_svand_n_s8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svand_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svand_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svand_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svand_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s32,_z,)(pg, op1, op2); +} + +svint64_t test_svand_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svand_n_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s64,_z,)(pg, op1, op2); +} + +svuint8_t test_svand_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svand_n_u8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_svand_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svand_n_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_svand_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svand_n_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u32,_z,)(pg, op1, op2); +} + +svuint64_t test_svand_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svand_n_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u64,_z,)(pg, op1, op2); +} + +svint8_t test_svand_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_svand_n_s8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svand_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svand_n_s16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svand_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svand_n_s32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s32,_m,)(pg, op1, op2); +} + +svint64_t test_svand_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svand_n_s64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s64,_m,)(pg, op1, op2); +} + +svuint8_t test_svand_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svand_n_u8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_svand_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svand_n_u16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_svand_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svand_n_u32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u32,_m,)(pg, op1, op2); +} + +svuint64_t test_svand_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svand_n_u64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u64,_m,)(pg, op1, op2); +} + +svint8_t test_svand_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_svand_n_s8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svand_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svand_n_s16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svand_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svand_n_s32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s32,_x,)(pg, op1, op2); +} + +svint64_t test_svand_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svand_n_s64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_s64,_x,)(pg, op1, op2); +} + +svuint8_t test_svand_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svand_n_u8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_svand_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svand_n_u16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_svand_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svand_n_u32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u32,_x,)(pg, op1, op2); +} + +svuint64_t test_svand_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svand_n_u64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_n_u64,_x,)(pg, op1, op2); +} + +svbool_t test_svand_b_z(svbool_t pg, svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svand_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.z.nxv16i1( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svand,_b,_z,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_andv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_andv.c new file mode 100644 index 0000000000000..eda6afd44de12 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_andv.c @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +int8_t test_svandv_s8(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svandv_s8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.andv.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svandv,_s8,,)(pg, op); +} + +int16_t test_svandv_s16(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svandv_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.andv.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svandv,_s16,,)(pg, op); +} + +int32_t test_svandv_s32(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svandv_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.andv.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svandv,_s32,,)(pg, op); +} + +int64_t test_svandv_s64(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svandv_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.andv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svandv,_s64,,)(pg, op); +} + +uint8_t test_svandv_u8(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svandv_u8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.andv.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svandv,_u8,,)(pg, op); +} + +uint16_t test_svandv_u16(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svandv_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.andv.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svandv,_u16,,)(pg, op); +} + +uint32_t test_svandv_u32(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svandv_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.andv.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svandv,_u32,,)(pg, op); +} + +uint64_t test_svandv_u64(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svandv_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.andv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svandv,_u64,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c new file mode 100644 index 0000000000000..2fb80acc2822a --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c @@ -0,0 +1,412 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svasr_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svasr_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svasr_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svasr_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svasr_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svasr_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s32,_z,)(pg, op1, op2); +} + +svint64_t test_svasr_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s64,_z,)(pg, op1, op2); +} + +svint8_t test_svasr_s8_m(svbool_t pg, svint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svasr_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svasr_s16_m(svbool_t pg, svint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svasr_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svasr_s32_m(svbool_t pg, svint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svasr_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s32,_m,)(pg, op1, op2); +} + +svint64_t test_svasr_s64_m(svbool_t pg, svint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s64,_m,)(pg, op1, op2); +} + +svint8_t test_svasr_s8_x(svbool_t pg, svint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svasr_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svasr_s16_x(svbool_t pg, svint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svasr_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svasr_s32_x(svbool_t pg, svint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svasr_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s32,_x,)(pg, op1, op2); +} + +svint64_t test_svasr_s64_x(svbool_t pg, svint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s64,_x,)(pg, op1, op2); +} + +svint64_t test_svasr_n_s64_z(svbool_t pg, svint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_n_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv2i64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s64,_z,)(pg, op1, op2); +} + +svint64_t test_svasr_n_s64_m(svbool_t pg, svint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_n_s64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s64,_m,)(pg, op1, op2); +} + +svint64_t test_svasr_n_s64_x(svbool_t pg, svint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_n_s64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s64,_x,)(pg, op1, op2); +} + +svint8_t test_svasr_wide_s8_z(svbool_t pg, svint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svasr_wide_s16_z(svbool_t pg, svint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svasr_wide_s32_z(svbool_t pg, svint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_s32,_z,)(pg, op1, op2); +} + +svint8_t test_svasr_wide_s8_m(svbool_t pg, svint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svasr_wide_s16_m(svbool_t pg, svint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svasr_wide_s32_m(svbool_t pg, svint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_s32,_m,)(pg, op1, op2); +} + +svint8_t test_svasr_wide_s8_x(svbool_t pg, svint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svasr_wide_s16_x(svbool_t pg, svint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svasr_wide_s32_x(svbool_t pg, svint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_s32,_x,)(pg, op1, op2); +} + +svint8_t test_svasr_n_s8_z(svbool_t pg, svint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svasr_n_s8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svasr_n_s16_z(svbool_t pg, svint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svasr_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv8i16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svasr_n_s32_z(svbool_t pg, svint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svasr_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv4i32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s32,_z,)(pg, op1, op2); +} + +svint8_t test_svasr_n_s8_m(svbool_t pg, svint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svasr_n_s8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svasr_n_s16_m(svbool_t pg, svint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svasr_n_s16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svasr_n_s32_m(svbool_t pg, svint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svasr_n_s32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s32,_m,)(pg, op1, op2); +} + +svint8_t test_svasr_n_s8_x(svbool_t pg, svint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svasr_n_s8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svasr_n_s16_x(svbool_t pg, svint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svasr_n_s16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svasr_n_s32_x(svbool_t pg, svint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svasr_n_s32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_n_s32,_x,)(pg, op1, op2); +} + +svint8_t test_svasr_wide_n_s8_m(svbool_t pg, svint8_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_n_s8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_n_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svasr_wide_n_s16_m(svbool_t pg, svint16_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_n_s16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_n_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svasr_wide_n_s32_m(svbool_t pg, svint32_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_n_s32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_n_s32,_m,)(pg, op1, op2); +} + +svint8_t test_svasr_wide_n_s8_z(svbool_t pg, svint8_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_n_s8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv16i8( %pg, %[[PG]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_n_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svasr_wide_n_s16_z(svbool_t pg, svint16_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv8i16( %[[PG]], %[[OP]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_n_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svasr_wide_n_s32_z(svbool_t pg, svint32_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv4i32( %[[PG]], %[[OP]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_n_s32,_z,)(pg, op1, op2); +} + +svint8_t test_svasr_wide_n_s8_x(svbool_t pg, svint8_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_n_s8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_n_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svasr_wide_n_s16_x(svbool_t pg, svint16_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_n_s16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_n_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svasr_wide_n_s32_x(svbool_t pg, svint32_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svasr_wide_n_s32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.wide.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr_wide,_n_s32,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bic.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bic.c new file mode 100644 index 0000000000000..b941c3ddaf09e --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bic.c @@ -0,0 +1,479 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svbic_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svbic_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svbic_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svbic_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svbic_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svbic_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s32,_z,)(pg, op1, op2); +} + +svint64_t test_svbic_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svbic_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s64,_z,)(pg, op1, op2); +} + +svuint8_t test_svbic_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svbic_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_svbic_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svbic_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_svbic_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svbic_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u32,_z,)(pg, op1, op2); +} + +svuint64_t test_svbic_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svbic_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u64,_z,)(pg, op1, op2); +} + +svint8_t test_svbic_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svbic_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svbic_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svbic_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svbic_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svbic_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s32,_m,)(pg, op1, op2); +} + +svint64_t test_svbic_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svbic_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s64,_m,)(pg, op1, op2); +} + +svuint8_t test_svbic_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svbic_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_svbic_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svbic_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_svbic_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svbic_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u32,_m,)(pg, op1, op2); +} + +svuint64_t test_svbic_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svbic_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u64,_m,)(pg, op1, op2); +} + +svint8_t test_svbic_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svbic_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svbic_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svbic_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svbic_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svbic_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s32,_x,)(pg, op1, op2); +} + +svint64_t test_svbic_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svbic_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_s64,_x,)(pg, op1, op2); +} + +svuint8_t test_svbic_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svbic_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_svbic_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svbic_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_svbic_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svbic_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u32,_x,)(pg, op1, op2); +} + +svuint64_t test_svbic_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svbic_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_u64,_x,)(pg, op1, op2); +} + +svint8_t test_svbic_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_svbic_n_s8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svbic_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svbic_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svbic_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svbic_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s32,_z,)(pg, op1, op2); +} + +svint64_t test_svbic_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svbic_n_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s64,_z,)(pg, op1, op2); +} + +svuint8_t test_svbic_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svbic_n_u8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_svbic_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svbic_n_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_svbic_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svbic_n_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u32,_z,)(pg, op1, op2); +} + +svuint64_t test_svbic_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svbic_n_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u64,_z,)(pg, op1, op2); +} + +svint8_t test_svbic_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_svbic_n_s8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svbic_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svbic_n_s16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svbic_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svbic_n_s32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s32,_m,)(pg, op1, op2); +} + +svint64_t test_svbic_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svbic_n_s64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s64,_m,)(pg, op1, op2); +} + +svuint8_t test_svbic_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svbic_n_u8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_svbic_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svbic_n_u16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_svbic_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svbic_n_u32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u32,_m,)(pg, op1, op2); +} + +svuint64_t test_svbic_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svbic_n_u64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u64,_m,)(pg, op1, op2); +} + +svint8_t test_svbic_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_svbic_n_s8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svbic_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svbic_n_s16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svbic_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svbic_n_s32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s32,_x,)(pg, op1, op2); +} + +svint64_t test_svbic_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svbic_n_s64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_s64,_x,)(pg, op1, op2); +} + +svuint8_t test_svbic_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svbic_n_u8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_svbic_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svbic_n_u16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_svbic_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svbic_n_u32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u32,_x,)(pg, op1, op2); +} + +svuint64_t test_svbic_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svbic_n_u64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_n_u64,_x,)(pg, op1, op2); +} + +svbool_t test_svbic_b_z(svbool_t pg, svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svbic_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bic.z.nxv16i1( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbic,_b,_z,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brka.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brka.c new file mode 100644 index 0000000000000..5bfd9cf1a0cd4 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brka.c @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svbrka_b_z(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svbrka_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.brka.z.nxv16i1( %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbrka,_b,_z,)(pg, op); +} + +svbool_t test_svbrka_b_m(svbool_t inactive, svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svbrka_b_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.brka.nxv16i1( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbrka,_b,_m,)(inactive, pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkb.c new file mode 100644 index 0000000000000..29f990b9bf3df --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkb.c @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svbrkb_b_z(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svbrkb_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.brkb.z.nxv16i1( %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbrkb,_b,_z,)(pg, op); +} + +svbool_t test_svbrkb_b_m(svbool_t inactive, svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svbrkb_b_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.brkb.nxv16i1( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbrkb,_b,_m,)(inactive, pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkn.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkn.c new file mode 100644 index 0000000000000..8894e89c789c6 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkn.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svbrkn_b_z(svbool_t pg, svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svbrkn_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.brkn.z.nxv16i1( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbrkn,_b,_z,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpa.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpa.c new file mode 100644 index 0000000000000..80201d5b5e9d8 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpa.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svbrkpa_b_z(svbool_t pg, svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svbrkpa_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.brkpa.z.nxv16i1( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbrkpa,_b,_z,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpb.c new file mode 100644 index 0000000000000..d446cdf6f87aa --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpb.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svbrkpb_b_z(svbool_t pg, svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svbrkpb_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.brkpb.z.nxv16i1( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svbrkpb,_b,_z,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cadd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cadd.c new file mode 100644 index 0000000000000..16319796c4be9 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cadd.c @@ -0,0 +1,105 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svcadd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svcadd_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcadd.nxv8f16( %[[PG]], %[[SEL]], %op2, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcadd,_f16,_z,)(pg, op1, op2, 90); +} + +svfloat16_t test_svcadd_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svcadd_f16_z_1 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcadd.nxv8f16( %[[PG]], %[[SEL]], %op2, i32 270) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcadd,_f16,_z,)(pg, op1, op2, 270); +} + +svfloat32_t test_svcadd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svcadd_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcadd.nxv4f32( %[[PG]], %[[SEL]], %op2, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcadd,_f32,_z,)(pg, op1, op2, 90); +} + +svfloat64_t test_svcadd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svcadd_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcadd.nxv2f64( %[[PG]], %[[SEL]], %op2, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcadd,_f64,_z,)(pg, op1, op2, 90); +} + +svfloat16_t test_svcadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svcadd_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcadd.nxv8f16( %[[PG]], %op1, %op2, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcadd,_f16,_m,)(pg, op1, op2, 90); +} + +svfloat32_t test_svcadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svcadd_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcadd.nxv4f32( %[[PG]], %op1, %op2, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcadd,_f32,_m,)(pg, op1, op2, 90); +} + +svfloat64_t test_svcadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svcadd_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcadd.nxv2f64( %[[PG]], %op1, %op2, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcadd,_f64,_m,)(pg, op1, op2, 90); +} + +svfloat16_t test_svcadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svcadd_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcadd.nxv8f16( %[[PG]], %op1, %op2, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcadd,_f16,_x,)(pg, op1, op2, 90); +} + +svfloat32_t test_svcadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svcadd_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcadd.nxv4f32( %[[PG]], %op1, %op2, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcadd,_f32,_x,)(pg, op1, op2, 90); +} + +svfloat64_t test_svcadd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svcadd_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcadd.nxv2f64( %[[PG]], %op1, %op2, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcadd,_f64,_x,)(pg, op1, op2, 90); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c new file mode 100644 index 0000000000000..db5f8bf6ce2f7 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c @@ -0,0 +1,205 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svclasta_s8(svbool_t pg, svint8_t fallback, svint8_t data) +{ + // CHECK-LABEL: test_svclasta_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv16i8( %pg, %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_s8,,)(pg, fallback, data); +} + +svint16_t test_svclasta_s16(svbool_t pg, svint16_t fallback, svint16_t data) +{ + // CHECK-LABEL: test_svclasta_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv8i16( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_s16,,)(pg, fallback, data); +} + +svint32_t test_svclasta_s32(svbool_t pg, svint32_t fallback, svint32_t data) +{ + // CHECK-LABEL: test_svclasta_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv4i32( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_s32,,)(pg, fallback, data); +} + +svint64_t test_svclasta_s64(svbool_t pg, svint64_t fallback, svint64_t data) +{ + // CHECK-LABEL: test_svclasta_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv2i64( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_s64,,)(pg, fallback, data); +} + +svuint8_t test_svclasta_u8(svbool_t pg, svuint8_t fallback, svuint8_t data) +{ + // CHECK-LABEL: test_svclasta_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv16i8( %pg, %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_u8,,)(pg, fallback, data); +} + +svuint16_t test_svclasta_u16(svbool_t pg, svuint16_t fallback, svuint16_t data) +{ + // CHECK-LABEL: test_svclasta_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv8i16( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_u16,,)(pg, fallback, data); +} + +svuint32_t test_svclasta_u32(svbool_t pg, svuint32_t fallback, svuint32_t data) +{ + // CHECK-LABEL: test_svclasta_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv4i32( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_u32,,)(pg, fallback, data); +} + +svuint64_t test_svclasta_u64(svbool_t pg, svuint64_t fallback, svuint64_t data) +{ + // CHECK-LABEL: test_svclasta_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv2i64( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_u64,,)(pg, fallback, data); +} + +svfloat16_t test_svclasta_f16(svbool_t pg, svfloat16_t fallback, svfloat16_t data) +{ + // CHECK-LABEL: test_svclasta_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv8f16( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_f16,,)(pg, fallback, data); +} + +svfloat32_t test_svclasta_f32(svbool_t pg, svfloat32_t fallback, svfloat32_t data) +{ + // CHECK-LABEL: test_svclasta_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv4f32( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_f32,,)(pg, fallback, data); +} + +svfloat64_t test_svclasta_f64(svbool_t pg, svfloat64_t fallback, svfloat64_t data) +{ + // CHECK-LABEL: test_svclasta_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv2f64( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_f64,,)(pg, fallback, data); +} + +int8_t test_svclasta_n_s8(svbool_t pg, int8_t fallback, svint8_t data) +{ + // CHECK-LABEL: test_svclasta_n_s8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.clasta.n.nxv16i8( %pg, i8 %fallback, %data) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_n_s8,,)(pg, fallback, data); +} + +int16_t test_svclasta_n_s16(svbool_t pg, int16_t fallback, svint16_t data) +{ + // CHECK-LABEL: test_svclasta_n_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16( %[[PG]], i16 %fallback, %data) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_n_s16,,)(pg, fallback, data); +} + +int32_t test_svclasta_n_s32(svbool_t pg, int32_t fallback, svint32_t data) +{ + // CHECK-LABEL: test_svclasta_n_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32( %[[PG]], i32 %fallback, %data) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_n_s32,,)(pg, fallback, data); +} + +int64_t test_svclasta_n_s64(svbool_t pg, int64_t fallback, svint64_t data) +{ + // CHECK-LABEL: test_svclasta_n_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64( %[[PG]], i64 %fallback, %data) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_n_s64,,)(pg, fallback, data); +} + +uint8_t test_svclasta_n_u8(svbool_t pg, uint8_t fallback, svuint8_t data) +{ + // CHECK-LABEL: test_svclasta_n_u8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.clasta.n.nxv16i8( %pg, i8 %fallback, %data) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_n_u8,,)(pg, fallback, data); +} + +uint16_t test_svclasta_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data) +{ + // CHECK-LABEL: test_svclasta_n_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16( %[[PG]], i16 %fallback, %data) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_n_u16,,)(pg, fallback, data); +} + +uint32_t test_svclasta_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data) +{ + // CHECK-LABEL: test_svclasta_n_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32( %[[PG]], i32 %fallback, %data) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_n_u32,,)(pg, fallback, data); +} + +uint64_t test_svclasta_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data) +{ + // CHECK-LABEL: test_svclasta_n_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64( %[[PG]], i64 %fallback, %data) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_n_u64,,)(pg, fallback, data); +} + +float16_t test_svclasta_n_f16(svbool_t pg, float16_t fallback, svfloat16_t data) +{ + // CHECK-LABEL: test_svclasta_n_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call half @llvm.aarch64.sve.clasta.n.nxv8f16( %[[PG]], half %fallback, %data) + // CHECK: ret half %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_n_f16,,)(pg, fallback, data); +} + +float32_t test_svclasta_n_f32(svbool_t pg, float32_t fallback, svfloat32_t data) +{ + // CHECK-LABEL: test_svclasta_n_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call float @llvm.aarch64.sve.clasta.n.nxv4f32( %[[PG]], float %fallback, %data) + // CHECK: ret float %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_n_f32,,)(pg, fallback, data); +} + +float64_t test_svclasta_n_f64(svbool_t pg, float64_t fallback, svfloat64_t data) +{ + // CHECK-LABEL: test_svclasta_n_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call double @llvm.aarch64.sve.clasta.n.nxv2f64( %[[PG]], double %fallback, %data) + // CHECK: ret double %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclasta,_n_f64,,)(pg, fallback, data); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c new file mode 100644 index 0000000000000..0d25c48d22ba5 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c @@ -0,0 +1,205 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svclastb_s8(svbool_t pg, svint8_t fallback, svint8_t data) +{ + // CHECK-LABEL: test_svclastb_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv16i8( %pg, %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_s8,,)(pg, fallback, data); +} + +svint16_t test_svclastb_s16(svbool_t pg, svint16_t fallback, svint16_t data) +{ + // CHECK-LABEL: test_svclastb_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv8i16( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_s16,,)(pg, fallback, data); +} + +svint32_t test_svclastb_s32(svbool_t pg, svint32_t fallback, svint32_t data) +{ + // CHECK-LABEL: test_svclastb_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv4i32( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_s32,,)(pg, fallback, data); +} + +svint64_t test_svclastb_s64(svbool_t pg, svint64_t fallback, svint64_t data) +{ + // CHECK-LABEL: test_svclastb_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv2i64( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_s64,,)(pg, fallback, data); +} + +svuint8_t test_svclastb_u8(svbool_t pg, svuint8_t fallback, svuint8_t data) +{ + // CHECK-LABEL: test_svclastb_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv16i8( %pg, %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_u8,,)(pg, fallback, data); +} + +svuint16_t test_svclastb_u16(svbool_t pg, svuint16_t fallback, svuint16_t data) +{ + // CHECK-LABEL: test_svclastb_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv8i16( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_u16,,)(pg, fallback, data); +} + +svuint32_t test_svclastb_u32(svbool_t pg, svuint32_t fallback, svuint32_t data) +{ + // CHECK-LABEL: test_svclastb_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv4i32( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_u32,,)(pg, fallback, data); +} + +svuint64_t test_svclastb_u64(svbool_t pg, svuint64_t fallback, svuint64_t data) +{ + // CHECK-LABEL: test_svclastb_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv2i64( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_u64,,)(pg, fallback, data); +} + +svfloat16_t test_svclastb_f16(svbool_t pg, svfloat16_t fallback, svfloat16_t data) +{ + // CHECK-LABEL: test_svclastb_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv8f16( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_f16,,)(pg, fallback, data); +} + +svfloat32_t test_svclastb_f32(svbool_t pg, svfloat32_t fallback, svfloat32_t data) +{ + // CHECK-LABEL: test_svclastb_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv4f32( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_f32,,)(pg, fallback, data); +} + +svfloat64_t test_svclastb_f64(svbool_t pg, svfloat64_t fallback, svfloat64_t data) +{ + // CHECK-LABEL: test_svclastb_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv2f64( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_f64,,)(pg, fallback, data); +} + +int8_t test_svclastb_n_s8(svbool_t pg, int8_t fallback, svint8_t data) +{ + // CHECK-LABEL: test_svclastb_n_s8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.clastb.n.nxv16i8( %pg, i8 %fallback, %data) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_n_s8,,)(pg, fallback, data); +} + +int16_t test_svclastb_n_s16(svbool_t pg, int16_t fallback, svint16_t data) +{ + // CHECK-LABEL: test_svclastb_n_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16( %[[PG]], i16 %fallback, %data) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_n_s16,,)(pg, fallback, data); +} + +int32_t test_svclastb_n_s32(svbool_t pg, int32_t fallback, svint32_t data) +{ + // CHECK-LABEL: test_svclastb_n_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32( %[[PG]], i32 %fallback, %data) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_n_s32,,)(pg, fallback, data); +} + +int64_t test_svclastb_n_s64(svbool_t pg, int64_t fallback, svint64_t data) +{ + // CHECK-LABEL: test_svclastb_n_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64( %[[PG]], i64 %fallback, %data) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_n_s64,,)(pg, fallback, data); +} + +uint8_t test_svclastb_n_u8(svbool_t pg, uint8_t fallback, svuint8_t data) +{ + // CHECK-LABEL: test_svclastb_n_u8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.clastb.n.nxv16i8( %pg, i8 %fallback, %data) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_n_u8,,)(pg, fallback, data); +} + +uint16_t test_svclastb_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data) +{ + // CHECK-LABEL: test_svclastb_n_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16( %[[PG]], i16 %fallback, %data) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_n_u16,,)(pg, fallback, data); +} + +uint32_t test_svclastb_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data) +{ + // CHECK-LABEL: test_svclastb_n_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32( %[[PG]], i32 %fallback, %data) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_n_u32,,)(pg, fallback, data); +} + +uint64_t test_svclastb_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data) +{ + // CHECK-LABEL: test_svclastb_n_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64( %[[PG]], i64 %fallback, %data) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_n_u64,,)(pg, fallback, data); +} + +float16_t test_svclastb_n_f16(svbool_t pg, float16_t fallback, svfloat16_t data) +{ + // CHECK-LABEL: test_svclastb_n_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call half @llvm.aarch64.sve.clastb.n.nxv8f16( %[[PG]], half %fallback, %data) + // CHECK: ret half %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_n_f16,,)(pg, fallback, data); +} + +float32_t test_svclastb_n_f32(svbool_t pg, float32_t fallback, svfloat32_t data) +{ + // CHECK-LABEL: test_svclastb_n_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call float @llvm.aarch64.sve.clastb.n.nxv4f32( %[[PG]], float %fallback, %data) + // CHECK: ret float %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_n_f32,,)(pg, fallback, data); +} + +float64_t test_svclastb_n_f64(svbool_t pg, float64_t fallback, svfloat64_t data) +{ + // CHECK-LABEL: test_svclastb_n_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call double @llvm.aarch64.sve.clastb.n.nxv2f64( %[[PG]], double %fallback, %data) + // CHECK: ret double %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclastb,_n_f64,,)(pg, fallback, data); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c new file mode 100644 index 0000000000000..e464ac5a2ff66 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c @@ -0,0 +1,116 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svuint8_t test_svcls_s8_z(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svcls_s8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s8,_z,)(pg, op); +} + +svuint16_t test_svcls_s16_z(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svcls_s16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s16,_z,)(pg, op); +} + +svuint32_t test_svcls_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svcls_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s32,_z,)(pg, op); +} + +svuint64_t test_svcls_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svcls_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s64,_z,)(pg, op); +} + +svuint8_t test_svcls_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svcls_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s8,_m,)(inactive, pg, op); +} + +svuint16_t test_svcls_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svcls_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s16,_m,)(inactive, pg, op); +} + +svuint32_t test_svcls_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svcls_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s32,_m,)(inactive, pg, op); +} + +svuint64_t test_svcls_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svcls_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s64,_m,)(inactive, pg, op); +} + +svuint8_t test_svcls_s8_x(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svcls_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv16i8( undef, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s8,_x,)(pg, op); +} + +svuint16_t test_svcls_s16_x(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svcls_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s16,_x,)(pg, op); +} + +svuint32_t test_svcls_s32_x(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svcls_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s32,_x,)(pg, op); +} + +svuint64_t test_svcls_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svcls_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clz.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clz.c new file mode 100644 index 0000000000000..fe46e4a655ace --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clz.c @@ -0,0 +1,221 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svuint8_t test_svclz_s8_z(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svclz_s8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s8,_z,)(pg, op); +} + +svuint16_t test_svclz_s16_z(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svclz_s16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s16,_z,)(pg, op); +} + +svuint32_t test_svclz_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svclz_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s32,_z,)(pg, op); +} + +svuint64_t test_svclz_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svclz_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s64,_z,)(pg, op); +} + +svuint8_t test_svclz_u8_z(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svclz_u8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u8,_z,)(pg, op); +} + +svuint16_t test_svclz_u16_z(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svclz_u16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u16,_z,)(pg, op); +} + +svuint32_t test_svclz_u32_z(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svclz_u32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u32,_z,)(pg, op); +} + +svuint64_t test_svclz_u64_z(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svclz_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u64,_z,)(pg, op); +} + +svuint8_t test_svclz_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svclz_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s8,_m,)(inactive, pg, op); +} + +svuint16_t test_svclz_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svclz_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s16,_m,)(inactive, pg, op); +} + +svuint32_t test_svclz_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svclz_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s32,_m,)(inactive, pg, op); +} + +svuint64_t test_svclz_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svclz_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s64,_m,)(inactive, pg, op); +} + +svuint8_t test_svclz_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svclz_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u8,_m,)(inactive, pg, op); +} + +svuint16_t test_svclz_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svclz_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u16,_m,)(inactive, pg, op); +} + +svuint32_t test_svclz_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svclz_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u32,_m,)(inactive, pg, op); +} + +svuint64_t test_svclz_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svclz_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u64,_m,)(inactive, pg, op); +} + +svuint8_t test_svclz_s8_x(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svclz_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv16i8( undef, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s8,_x,)(pg, op); +} + +svuint16_t test_svclz_s16_x(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svclz_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s16,_x,)(pg, op); +} + +svuint32_t test_svclz_s32_x(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svclz_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s32,_x,)(pg, op); +} + +svuint64_t test_svclz_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svclz_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_s64,_x,)(pg, op); +} + +svuint8_t test_svclz_u8_x(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svclz_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv16i8( undef, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u8,_x,)(pg, op); +} + +svuint16_t test_svclz_u16_x(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svclz_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u16,_x,)(pg, op); +} + +svuint32_t test_svclz_u32_x(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svclz_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u32,_x,)(pg, op); +} + +svuint64_t test_svclz_u64_x(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svclz_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clz.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svclz,_u64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c index 454d834d23bdf..29c86bb08b882 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c @@ -10,6 +10,66 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +svfloat16_t test_svcmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svcmla_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcmla.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3, i32 0) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 0); +} + +svfloat16_t test_svcmla_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svcmla_f16_z_1 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcmla.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 90); +} + +svfloat16_t test_svcmla_f16_z_2(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svcmla_f16_z_2 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcmla.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3, i32 180) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 180); +} + +svfloat16_t test_svcmla_f16_z_3(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svcmla_f16_z_3 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcmla.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3, i32 270) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 270); +} + +svfloat32_t test_svcmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svcmla_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcmla.nxv4f32( %[[PG]], %[[SEL]], %op2, %op3, i32 0) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcmla,_f32,_z,)(pg, op1, op2, op3, 0); +} + +svfloat64_t test_svcmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svcmla_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcmla.nxv2f64( %[[PG]], %[[SEL]], %op2, %op3, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcmla,_f64,_z,)(pg, op1, op2, op3, 90); +} + svfloat16_t test_svcmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { // CHECK-LABEL: test_svcmla_f16_m @@ -37,6 +97,33 @@ svfloat64_t test_svcmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf return SVE_ACLE_FUNC(svcmla,_f64,_m,)(pg, op1, op2, op3, 0); } +svfloat16_t test_svcmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svcmla_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcmla.nxv8f16( %[[PG]], %op1, %op2, %op3, i32 90) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcmla,_f16,_x,)(pg, op1, op2, op3, 90); +} + +svfloat32_t test_svcmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svcmla_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcmla.nxv4f32( %[[PG]], %op1, %op2, %op3, i32 180) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcmla,_f32,_x,)(pg, op1, op2, op3, 180); +} + +svfloat64_t test_svcmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svcmla_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcmla.nxv2f64( %[[PG]], %op1, %op2, %op3, i32 270) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcmla,_f64,_x,)(pg, op1, op2, op3, 270); +} + svfloat16_t test_svcmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { // CHECK-LABEL: test_svcmla_lane_f16 diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnot.c new file mode 100644 index 0000000000000..4f98be8c3f49a --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnot.c @@ -0,0 +1,221 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svcnot_s8_z(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svcnot_s8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s8,_z,)(pg, op); +} + +svint16_t test_svcnot_s16_z(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svcnot_s16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s16,_z,)(pg, op); +} + +svint32_t test_svcnot_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svcnot_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s32,_z,)(pg, op); +} + +svint64_t test_svcnot_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svcnot_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s64,_z,)(pg, op); +} + +svuint8_t test_svcnot_u8_z(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svcnot_u8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u8,_z,)(pg, op); +} + +svuint16_t test_svcnot_u16_z(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svcnot_u16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u16,_z,)(pg, op); +} + +svuint32_t test_svcnot_u32_z(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svcnot_u32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u32,_z,)(pg, op); +} + +svuint64_t test_svcnot_u64_z(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svcnot_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u64,_z,)(pg, op); +} + +svint8_t test_svcnot_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svcnot_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s8,_m,)(inactive, pg, op); +} + +svint16_t test_svcnot_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svcnot_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s16,_m,)(inactive, pg, op); +} + +svint32_t test_svcnot_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svcnot_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s32,_m,)(inactive, pg, op); +} + +svint64_t test_svcnot_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svcnot_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s64,_m,)(inactive, pg, op); +} + +svuint8_t test_svcnot_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svcnot_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u8,_m,)(inactive, pg, op); +} + +svuint16_t test_svcnot_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svcnot_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u16,_m,)(inactive, pg, op); +} + +svuint32_t test_svcnot_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svcnot_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u32,_m,)(inactive, pg, op); +} + +svuint64_t test_svcnot_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svcnot_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u64,_m,)(inactive, pg, op); +} + +svint8_t test_svcnot_s8_x(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svcnot_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv16i8( undef, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s8,_x,)(pg, op); +} + +svint16_t test_svcnot_s16_x(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svcnot_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s16,_x,)(pg, op); +} + +svint32_t test_svcnot_s32_x(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svcnot_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s32,_x,)(pg, op); +} + +svint64_t test_svcnot_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svcnot_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_s64,_x,)(pg, op); +} + +svuint8_t test_svcnot_u8_x(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svcnot_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv16i8( undef, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u8,_x,)(pg, op); +} + +svuint16_t test_svcnot_u16_x(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svcnot_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u16,_x,)(pg, op); +} + +svuint32_t test_svcnot_u32_x(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svcnot_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u32,_x,)(pg, op); +} + +svuint64_t test_svcnot_u64_x(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svcnot_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnot.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnot,_u64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt.c new file mode 100644 index 0000000000000..be5b3c30e74c2 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt.c @@ -0,0 +1,302 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svuint8_t test_svcnt_s8_z(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svcnt_s8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s8,_z,)(pg, op); +} + +svuint16_t test_svcnt_s16_z(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svcnt_s16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s16,_z,)(pg, op); +} + +svuint32_t test_svcnt_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svcnt_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s32,_z,)(pg, op); +} + +svuint64_t test_svcnt_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svcnt_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s64,_z,)(pg, op); +} + +svuint8_t test_svcnt_u8_z(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svcnt_u8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u8,_z,)(pg, op); +} + +svuint16_t test_svcnt_u16_z(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svcnt_u16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u16,_z,)(pg, op); +} + +svuint32_t test_svcnt_u32_z(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svcnt_u32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u32,_z,)(pg, op); +} + +svuint64_t test_svcnt_u64_z(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svcnt_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u64,_z,)(pg, op); +} + +svuint16_t test_svcnt_f16_z(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svcnt_f16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8f16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_f16,_z,)(pg, op); +} + +svuint32_t test_svcnt_f32_z(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svcnt_f32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv4f32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_f32,_z,)(pg, op); +} + +svuint64_t test_svcnt_f64_z(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svcnt_f64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv2f64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_f64,_z,)(pg, op); +} + +svuint8_t test_svcnt_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svcnt_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s8,_m,)(inactive, pg, op); +} + +svuint16_t test_svcnt_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svcnt_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s16,_m,)(inactive, pg, op); +} + +svuint32_t test_svcnt_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svcnt_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s32,_m,)(inactive, pg, op); +} + +svuint64_t test_svcnt_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svcnt_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s64,_m,)(inactive, pg, op); +} + +svuint8_t test_svcnt_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svcnt_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u8,_m,)(inactive, pg, op); +} + +svuint16_t test_svcnt_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svcnt_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u16,_m,)(inactive, pg, op); +} + +svuint32_t test_svcnt_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svcnt_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u32,_m,)(inactive, pg, op); +} + +svuint64_t test_svcnt_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svcnt_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u64,_m,)(inactive, pg, op); +} + +svuint16_t test_svcnt_f16_m(svuint16_t inactive, svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svcnt_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8f16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_f16,_m,)(inactive, pg, op); +} + +svuint32_t test_svcnt_f32_m(svuint32_t inactive, svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svcnt_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv4f32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_f32,_m,)(inactive, pg, op); +} + +svuint64_t test_svcnt_f64_m(svuint64_t inactive, svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svcnt_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv2f64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_f64,_m,)(inactive, pg, op); +} + +svuint8_t test_svcnt_s8_x(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svcnt_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv16i8( undef, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s8,_x,)(pg, op); +} + +svuint16_t test_svcnt_s16_x(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svcnt_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s16,_x,)(pg, op); +} + +svuint32_t test_svcnt_s32_x(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svcnt_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s32,_x,)(pg, op); +} + +svuint64_t test_svcnt_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svcnt_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_s64,_x,)(pg, op); +} + +svuint8_t test_svcnt_u8_x(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svcnt_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv16i8( undef, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u8,_x,)(pg, op); +} + +svuint16_t test_svcnt_u16_x(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svcnt_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u16,_x,)(pg, op); +} + +svuint32_t test_svcnt_u32_x(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svcnt_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u32,_x,)(pg, op); +} + +svuint64_t test_svcnt_u64_x(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svcnt_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_u64,_x,)(pg, op); +} + +svuint16_t test_svcnt_f16_x(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svcnt_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8f16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_f16,_x,)(pg, op); +} + +svuint32_t test_svcnt_f32_x(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svcnt_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv4f32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_f32,_x,)(pg, op); +} + +svuint64_t test_svcnt_f64_x(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svcnt_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv2f64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcnt,_f64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_compact.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_compact.c new file mode 100644 index 0000000000000..03cf3f36d8d8c --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_compact.c @@ -0,0 +1,65 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint32_t test_svcompact_s32(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svcompact_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.compact.nxv4i32( %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcompact,_s32,,)(pg, op); +} + +svint64_t test_svcompact_s64(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svcompact_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.compact.nxv2i64( %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcompact,_s64,,)(pg, op); +} + +svuint32_t test_svcompact_u32(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svcompact_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.compact.nxv4i32( %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcompact,_u32,,)(pg, op); +} + +svuint64_t test_svcompact_u64(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svcompact_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.compact.nxv2i64( %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcompact,_u64,,)(pg, op); +} + +svfloat32_t test_svcompact_f32(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svcompact_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.compact.nxv4f32( %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcompact,_f32,,)(pg, op); +} + +svfloat64_t test_svcompact_f64(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svcompact_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.compact.nxv2f64( %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcompact,_f64,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_div.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_div.c index e199e8c12c19d..3a78dc222c8ae 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_div.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_div.c @@ -245,3 +245,180 @@ svuint64_t test_svdiv_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) // CHECK: ret %[[INTRINSIC]] return SVE_ACLE_FUNC(svdiv,_n_u64,_x,)(pg, op1, op2); } + +svfloat16_t test_svdiv_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svdiv_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svdiv_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svdiv_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svdiv_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svdiv_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svdiv_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svdiv_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svdiv_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svdiv_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svdiv_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svdiv_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svdiv_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svdiv_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svdiv_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svdiv_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svdiv_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svdiv_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svdiv_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svdiv_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svdiv_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svdiv_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svdiv_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svdiv_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svdiv_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svdiv_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svdiv_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svdiv_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svdiv_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svdiv_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svdiv_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svdiv_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svdiv_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svdiv_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svdiv_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svdiv_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdiv.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdiv,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_divr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_divr.c index 16a0b363bbd8b..2b590b839d9ef 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_divr.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_divr.c @@ -245,3 +245,180 @@ svuint64_t test_svdivr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) // CHECK: ret %[[INTRINSIC]] return SVE_ACLE_FUNC(svdivr,_n_u64,_x,)(pg, op1, op2); } + +svfloat16_t test_svdivr_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svdivr_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svdivr_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svdivr_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svdivr_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svdivr_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svdivr_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svdivr_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svdivr_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svdivr_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svdivr_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svdivr_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svdivr_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svdivr_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svdivr_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svdivr_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svdivr_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svdivr_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svdivr_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svdivr_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svdivr_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svdivr_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svdivr_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svdivr_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svdivr_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svdivr_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svdivr_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svdivr_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svdivr_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svdivr_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svdivr_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svdivr_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svdivr_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svdivr_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svdivr_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svdivr_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fdivr.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdivr,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c new file mode 100644 index 0000000000000..7393ef705e523 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c @@ -0,0 +1,389 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svdupq_lane_s8(svint8_t data, uint64_t index) +{ + // CHECK-LABEL: test_svdupq_lane_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv16i8( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_s8,,)(data, index); +} + +svint16_t test_svdupq_lane_s16(svint16_t data, uint64_t index) +{ + // CHECK-LABEL: test_svdupq_lane_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8i16( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_s16,,)(data, index); +} + +svint32_t test_svdupq_lane_s32(svint32_t data, uint64_t index) +{ + // CHECK-LABEL: test_svdupq_lane_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4i32( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_s32,,)(data, index); +} + +svint64_t test_svdupq_lane_s64(svint64_t data, uint64_t index) +{ + // CHECK-LABEL: test_svdupq_lane_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2i64( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_s64,,)(data, index); +} + +svuint8_t test_svdupq_lane_u8(svuint8_t data, uint64_t index) +{ + // CHECK-LABEL: test_svdupq_lane_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv16i8( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_u8,,)(data, index); +} + +svuint16_t test_svdupq_lane_u16(svuint16_t data, uint64_t index) +{ + // CHECK-LABEL: test_svdupq_lane_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8i16( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_u16,,)(data, index); +} + +svuint32_t test_svdupq_lane_u32(svuint32_t data, uint64_t index) +{ + // CHECK-LABEL: test_svdupq_lane_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4i32( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_u32,,)(data, index); +} + +svuint64_t test_svdupq_lane_u64(svuint64_t data, uint64_t index) +{ + // CHECK-LABEL: test_svdupq_lane_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2i64( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_u64,,)(data, index); +} + +svfloat16_t test_svdupq_lane_f16(svfloat16_t data, uint64_t index) +{ + // CHECK-LABEL: test_svdupq_lane_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8f16( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_f16,,)(data, index); +} + +svfloat32_t test_svdupq_lane_f32(svfloat32_t data, uint64_t index) +{ + // CHECK-LABEL: test_svdupq_lane_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv4f32( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_f32,,)(data, index); +} + +svfloat64_t test_svdupq_lane_f64(svfloat64_t data, uint64_t index) +{ + // CHECK-LABEL: test_svdupq_lane_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv2f64( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_f64,,)(data, index); +} + +svint8_t test_svdupq_n_s8(int8_t x0, int8_t x1, int8_t x2, int8_t x3, + int8_t x4, int8_t x5, int8_t x6, int8_t x7, + int8_t x8, int8_t x9, int8_t x10, int8_t x11, + int8_t x12, int8_t x13, int8_t x14, int8_t x15) +{ + // CHECK-LABEL: test_svdupq_n_s8 + // CHECK: %[[ALLOCA:.*]] = alloca [16 x i8], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i8 %x0, i8* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[ALLOCA]], i64 0, i64 15 + // CHECK: store i8 %x15, i8* %[[GEP]], align 1 + // CHECK-NOT: store + // CHECK: %[[PTRUE:.*]] = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv16i8( %[[PTRUE]], i8* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_s8,)(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); +} + +svint16_t test_svdupq_n_s16(int16_t x0, int16_t x1, int16_t x2, int16_t x3, + int16_t x4, int16_t x5, int16_t x6, int16_t x7) +{ + // CHECK-LABEL: test_svdupq_n_s16 + // CHECK-DAG: %[[ALLOCA:.*]] = alloca [8 x i16], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [8 x i16], [8 x i16]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i16 %x0, i16* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [8 x i16], [8 x i16]* %[[ALLOCA]], i64 0, i64 7 + // CHECK: store i16 %x7, i16* %[[GEP]], align 2 + // CHECK-NOT: store + // CHECK: call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv8i16( %{{.*}}, i16* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_s16,)(x0, x1, x2, x3, x4, x5, x6, x7); +} + +svint32_t test_svdupq_n_s32(int32_t x0, int32_t x1, int32_t x2, int32_t x3) +{ + // CHECK-LABEL: test_svdupq_n_s32 + // CHECK: %[[ALLOCA:.*]] = alloca [4 x i32], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [4 x i32], [4 x i32]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i32 %x0, i32* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [4 x i32], [4 x i32]* %[[ALLOCA]], i64 0, i64 3 + // CHECK: store i32 %x3, i32* %[[GEP]], align 4 + // CHECK-NOT: store + // CHECK: call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv4i32( %{{.*}}, i32* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_s32,)(x0, x1, x2, x3); +} + +svint64_t test_svdupq_n_s64(int64_t x0, int64_t x1) +{ + // CHECK-LABEL: test_svdupq_n_s64 + // CHECK: %[[ALLOCA:.*]] = alloca [2 x i64], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [2 x i64], [2 x i64]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i64 %x0, i64* %[[BASE]], align 16 + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [2 x i64], [2 x i64]* %[[ALLOCA]], i64 0, i64 1 + // CHECK: store i64 %x1, i64* %[[GEP]], align 8 + // CHECK-NOT: store + // CHECK: call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv2i64( %{{.*}}, i64* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_s64,)(x0, x1); +} + +svuint8_t test_svdupq_n_u8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, + uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7, + uint8_t x8, uint8_t x9, uint8_t x10, uint8_t x11, + uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) +{ + // CHECK-LABEL: test_svdupq_n_u8 + // CHECK: %[[ALLOCA:.*]] = alloca [16 x i8], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i8 %x0, i8* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[ALLOCA]], i64 0, i64 15 + // CHECK: store i8 %x15, i8* %[[GEP]], align 1 + // CHECK-NOT: store + // CHECK: %[[PTRUE:.*]] = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv16i8( %[[PTRUE]], i8* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_u8,)(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); +} + +svuint16_t test_svdupq_n_u16(uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, + uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) +{ + // CHECK-LABEL: test_svdupq_n_u16 + // CHECK: %[[ALLOCA:.*]] = alloca [8 x i16], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [8 x i16], [8 x i16]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i16 %x0, i16* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [8 x i16], [8 x i16]* %[[ALLOCA]], i64 0, i64 7 + // CHECK: store i16 %x7, i16* %[[GEP]], align 2 + // CHECK-NOT: store + // CHECK: call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv8i16( %{{.*}}, i16* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_u16,)(x0, x1, x2, x3, x4, x5, x6, x7); +} + +svuint32_t test_svdupq_n_u32(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) +{ + // CHECK-LABEL: test_svdupq_n_u32 + // CHECK: %[[ALLOCA:.*]] = alloca [4 x i32], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [4 x i32], [4 x i32]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i32 %x0, i32* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [4 x i32], [4 x i32]* %[[ALLOCA]], i64 0, i64 3 + // CHECK: store i32 %x3, i32* %[[GEP]], align 4 + // CHECK-NOT: store + // CHECK: call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv4i32( %{{.*}}, i32* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_u32,)(x0, x1, x2, x3); +} + +svuint64_t test_svdupq_n_u64(uint64_t x0, uint64_t x1) +{ + // CHECK-LABEL: test_svdupq_n_u64 + // CHECK: %[[ALLOCA:.*]] = alloca [2 x i64], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [2 x i64], [2 x i64]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i64 %x0, i64* %[[BASE]], align 16 + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [2 x i64], [2 x i64]* %[[ALLOCA]], i64 0, i64 1 + // CHECK: store i64 %x1, i64* %[[GEP]], align 8 + // CHECK-NOT: store + // CHECK: call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv2i64( %{{.*}}, i64* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_u64,)(x0, x1); +} + +svfloat16_t test_svdupq_n_f16(float16_t x0, float16_t x1, float16_t x2, float16_t x3, + float16_t x4, float16_t x5, float16_t x6, float16_t x7) +{ + // CHECK-LABEL: test_svdupq_n_f16 + // CHECK: %[[ALLOCA:.*]] = alloca [8 x half], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [8 x half], [8 x half]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store half %x0, half* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [8 x half], [8 x half]* %[[ALLOCA]], i64 0, i64 7 + // CHECK: store half %x7, half* %[[GEP]], align 2 + // CHECK-NOT: store + // CHECK: call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv8f16( %{{.*}}, half* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_f16,)(x0, x1, x2, x3, x4, x5, x6, x7); +} + +svfloat32_t test_svdupq_n_f32(float32_t x0, float32_t x1, float32_t x2, float32_t x3) +{ + // CHECK-LABEL: test_svdupq_n_f32 + // CHECK: %[[ALLOCA:.*]] = alloca [4 x float], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [4 x float], [4 x float]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store float %x0, float* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [4 x float], [4 x float]* %[[ALLOCA]], i64 0, i64 3 + // CHECK: store float %x3, float* %[[GEP]], align 4 + // CHECK-NOT: store + // CHECK: call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv4f32( %{{.*}}, float* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_f32,)(x0, x1, x2, x3); +} + +svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1) +{ + // CHECK-LABEL: test_svdupq_n_f64 + // CHECK: %[[ALLOCA:.*]] = alloca [2 x double], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [2 x double], [2 x double]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store double %x0, double* %[[BASE]], align 16 + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [2 x double], [2 x double]* %[[ALLOCA]], i64 0, i64 1 + // CHECK: store double %x1, double* %[[GEP]], align 8 + // CHECK-NOT: store + // CHECK: call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv2f64( %{{.*}}, double* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_f64,)(x0, x1); +} + +svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3, + bool x4, bool x5, bool x6, bool x7, + bool x8, bool x9, bool x10, bool x11, + bool x12, bool x13, bool x14, bool x15) +{ + // CHECK-LABEL: test_svdupq_n_b8 + // CHECK-DAG: %[[ALLOCA:.*]] = alloca [16 x i8], align 16 + // CHECK-DAG: %[[X0:.*]] = zext i1 %x0 to i8 + // CHECK-DAG: %[[X15:.*]] = zext i1 %x15 to i8 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i8 %[[X0]], i8* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[ALLOCA]], i64 0, i64 15 + // CHECK: store i8 %[[X15]], i8* %[[GEP]], align 1 + // CHECK-NOT: store + // CHECK: %[[PTRUE:.*]] = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv16i8( %[[PTRUE]], i8* nonnull %[[BASE]]) + // CHECK: %[[ZERO:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + // CHECK: %[[CMP:.*]] = call @llvm.aarch64.sve.cmpne.wide.nxv16i8( %[[PTRUE]], %[[LOAD]], %[[ZERO]]) + // CHECK: ret %[[CMP]] + return SVE_ACLE_FUNC(svdupq,_n,_b8,)(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); +} + +svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3, + bool x4, bool x5, bool x6, bool x7) +{ + // CHECK-LABEL: test_svdupq_n_b16 + // CHECK-DAG: %[[ALLOCA:.*]] = alloca [8 x i16], align 16 + // CHECK-DAG: %[[X0:.*]] = zext i1 %x0 to i16 + // CHECK-DAG: %[[X7:.*]] = zext i1 %x7 to i16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [8 x i16], [8 x i16]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i16 %[[X0]], i16* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [8 x i16], [8 x i16]* %[[ALLOCA]], i64 0, i64 7 + // CHECK: store i16 %[[X7]], i16* %[[GEP]], align 2 + // CHECK-NOT: store + // CHECK: %[[PTRUE:.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv8i16( %{{.*}}, i16* nonnull %[[BASE]]) + // CHECK: %[[ZERO:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + // CHECK: %[[CMP:.*]] = call @llvm.aarch64.sve.cmpne.wide.nxv8i16( %{{.*}}, %[[LOAD]], %[[ZERO]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[CMP]]) + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svdupq,_n,_b16,)(x0, x1, x2, x3, x4, x5, x6, x7); +} + +svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) +{ + // CHECK-LABEL: test_svdupq_n_b32 + // CHECK-DAG: %[[ALLOCA:.*]] = alloca [4 x i32], align 16 + // CHECK-DAG: %[[X0:.*]] = zext i1 %x0 to i32 + // CHECK-DAG: %[[X3:.*]] = zext i1 %x3 to i32 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [4 x i32], [4 x i32]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i32 %[[X0]], i32* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [4 x i32], [4 x i32]* %[[ALLOCA]], i64 0, i64 3 + // CHECK: store i32 %[[X3]], i32* %[[GEP]], align 4 + // CHECK-NOT: store + // CHECK: %[[PTRUE:.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv4i32( %{{.*}}, i32* nonnull %[[BASE]]) + // CHECK: %[[ZERO:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %{{.*}}, %[[LOAD]], %[[ZERO]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svdupq,_n,_b32,)(x0, x1, x2, x3); +} + +svbool_t test_svdupq_n_b64(bool x0, bool x1) +{ + // CHECK-LABEL: test_svdupq_n_b64 + // CHECK-DAG: %[[ALLOCA:.*]] = alloca [2 x i64], align 16 + // CHECK-DAG: %[[X0:.*]] = zext i1 %x0 to i64 + // CHECK-DAG: %[[X1:.*]] = zext i1 %x1 to i64 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [2 x i64], [2 x i64]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i64 %[[X0]], i64* %[[BASE]], align 16 + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [2 x i64], [2 x i64]* %[[ALLOCA]], i64 0, i64 1 + // CHECK: store i64 %[[X1]], i64* %[[GEP]], align 8 + // CHECK-NOT: store + // CHECK: %[[PTRUE:.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv2i64( %{{.*}}, i64* nonnull %[[BASE]]) + // CHECK: %[[ZERO:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 0) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cmpne.nxv2i64( %{{.*}}, %[[LOAD]], %[[ZERO]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svdupq,_n,_b64,)(x0, x1); +} + +// This test checks that the `alloca` is added to the entry-block. +svint64_t test_svdupq_control_flow(int64_t x0, int64_t x1, svint64_t Default, bool P) +{ + // CHECK-LABEL: test_svdupq_control_flow + // CHECK: entry: + // CHECK-DAG: %[[ALLOCA:.*]] = alloca [2 x i64], align 16 + // CHECK-DAG: [[BR:.*]]: + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [2 x i64], [2 x i64]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store i64 %x0, i64* %[[BASE]], align 16 + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [2 x i64], [2 x i64]* %[[ALLOCA]], i64 0, i64 1 + // CHECK: store i64 %x1, i64* %[[GEP]], align 8 + // CHECK-NOT: store + // CHECK: call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv2i64( %{{.*}}, i64* nonnull %[[BASE]]) + // CHECK: [[END:.*]]: + // CHECK: %[[RETVAL:.*]] = phi [ %[[LOAD]], %if.end ], [ %Default, %entry ] + // CHECK: ret %[[RETVAL]] + if (P) + return Default; + return SVE_ACLE_FUNC(svdupq,_n,_s64,)(x0, x1); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eor.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eor.c new file mode 100644 index 0000000000000..d721d4326d0a5 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eor.c @@ -0,0 +1,478 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_sveor_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_sveor_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s8,_z,)(pg, op1, op2); +} + +svint16_t test_sveor_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_sveor_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s16,_z,)(pg, op1, op2); +} + +svint32_t test_sveor_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_sveor_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s32,_z,)(pg, op1, op2); +} + +svint64_t test_sveor_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_sveor_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s64,_z,)(pg, op1, op2); +} + +svuint8_t test_sveor_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_sveor_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_sveor_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_sveor_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_sveor_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_sveor_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u32,_z,)(pg, op1, op2); +} + +svuint64_t test_sveor_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_sveor_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u64,_z,)(pg, op1, op2); +} + +svint8_t test_sveor_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_sveor_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s8,_m,)(pg, op1, op2); +} + +svint16_t test_sveor_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_sveor_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s16,_m,)(pg, op1, op2); +} + +svint32_t test_sveor_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_sveor_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s32,_m,)(pg, op1, op2); +} + +svint64_t test_sveor_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_sveor_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s64,_m,)(pg, op1, op2); +} + +svuint8_t test_sveor_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_sveor_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_sveor_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_sveor_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_sveor_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_sveor_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u32,_m,)(pg, op1, op2); +} + +svuint64_t test_sveor_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_sveor_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u64,_m,)(pg, op1, op2); +} + +svint8_t test_sveor_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_sveor_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s8,_x,)(pg, op1, op2); +} + +svint16_t test_sveor_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_sveor_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s16,_x,)(pg, op1, op2); +} + +svint32_t test_sveor_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_sveor_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s32,_x,)(pg, op1, op2); +} + +svint64_t test_sveor_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_sveor_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_s64,_x,)(pg, op1, op2); +} + +svuint8_t test_sveor_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_sveor_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_sveor_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_sveor_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_sveor_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_sveor_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u32,_x,)(pg, op1, op2); +} + +svuint64_t test_sveor_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_sveor_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_u64,_x,)(pg, op1, op2); +} + +svint8_t test_sveor_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_sveor_n_s8_z + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s8,_z,)(pg, op1, op2); +} + +svint16_t test_sveor_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_sveor_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s16,_z,)(pg, op1, op2); +} + +svint32_t test_sveor_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_sveor_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s32,_z,)(pg, op1, op2); +} + +svint64_t test_sveor_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_sveor_n_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s64,_z,)(pg, op1, op2); +} + +svuint8_t test_sveor_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_sveor_n_u8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_sveor_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_sveor_n_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_sveor_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_sveor_n_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u32,_z,)(pg, op1, op2); +} + +svuint64_t test_sveor_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_sveor_n_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u64,_z,)(pg, op1, op2); +} + +svint8_t test_sveor_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_sveor_n_s8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s8,_m,)(pg, op1, op2); +} + +svint16_t test_sveor_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_sveor_n_s16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s16,_m,)(pg, op1, op2); +} + +svint32_t test_sveor_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_sveor_n_s32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s32,_m,)(pg, op1, op2); +} + +svint64_t test_sveor_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_sveor_n_s64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s64,_m,)(pg, op1, op2); +} + +svuint8_t test_sveor_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_sveor_n_u8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_sveor_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_sveor_n_u16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_sveor_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_sveor_n_u32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u32,_m,)(pg, op1, op2); +} + +svuint64_t test_sveor_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_sveor_n_u64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u64,_m,)(pg, op1, op2); +} + +svint8_t test_sveor_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_sveor_n_s8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s8,_x,)(pg, op1, op2); +} + +svint16_t test_sveor_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_sveor_n_s16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s16,_x,)(pg, op1, op2); +} + +svint32_t test_sveor_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_sveor_n_s32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s32,_x,)(pg, op1, op2); +} + +svint64_t test_sveor_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_sveor_n_s64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_s64,_x,)(pg, op1, op2); +} + +svuint8_t test_sveor_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_sveor_n_u8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_sveor_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_sveor_n_u16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_sveor_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_sveor_n_u32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u32,_x,)(pg, op1, op2); +} + +svuint64_t test_sveor_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_sveor_n_u64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_n_u64,_x,)(pg, op1, op2); +} + +svbool_t test_sveor_b_z(svbool_t pg, svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_sveor_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.z.nxv16i1( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveor,_b,_z,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eorv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eorv.c new file mode 100644 index 0000000000000..cf4447ad52fd0 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eorv.c @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +int8_t test_sveorv_s8(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_sveorv_s8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.eorv.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveorv,_s8,,)(pg, op); +} + +int16_t test_sveorv_s16(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_sveorv_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.eorv.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveorv,_s16,,)(pg, op); +} + +int32_t test_sveorv_s32(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_sveorv_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.eorv.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveorv,_s32,,)(pg, op); +} + +int64_t test_sveorv_s64(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_sveorv_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.eorv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveorv,_s64,,)(pg, op); +} + +uint8_t test_sveorv_u8(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_sveorv_u8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.eorv.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveorv,_u8,,)(pg, op); +} + +uint16_t test_sveorv_u16(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_sveorv_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.eorv.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveorv,_u16,,)(pg, op); +} + +uint32_t test_sveorv_u32(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_sveorv_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.eorv.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveorv,_u32,,)(pg, op); +} + +uint64_t test_sveorv_u64(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_sveorv_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.eorv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(sveorv,_u64,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_expa.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_expa.c new file mode 100644 index 0000000000000..d1432042ec031 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_expa.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svexpa_f16(svuint16_t op) +{ + // CHECK-LABEL: test_svexpa_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fexpa.x.nxv8f16( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexpa,_f16,,)(op); +} + +svfloat32_t test_svexpa_f32(svuint32_t op) +{ + // CHECK-LABEL: test_svexpa_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fexpa.x.nxv4f32( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexpa,_f32,,)(op); +} + +svfloat64_t test_svexpa_f64(svuint64_t op) +{ + // CHECK-LABEL: test_svexpa_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fexpa.x.nxv2f64( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexpa,_f64,,)(op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c new file mode 100644 index 0000000000000..720b81a28a189 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c @@ -0,0 +1,173 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint16_t test_svextb_s16_z(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svextb_s16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s16,_z,)(pg, op); +} + +svint32_t test_svextb_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svextb_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s32,_z,)(pg, op); +} + +svint64_t test_svextb_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svextb_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s64,_z,)(pg, op); +} + +svuint16_t test_svextb_u16_z(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svextb_u16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtb.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_u16,_z,)(pg, op); +} + +svuint32_t test_svextb_u32_z(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svextb_u32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtb.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_u32,_z,)(pg, op); +} + +svuint64_t test_svextb_u64_z(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svextb_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtb.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_u64,_z,)(pg, op); +} + +svint16_t test_svextb_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svextb_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s16,_m,)(inactive, pg, op); +} + +svint32_t test_svextb_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svextb_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s32,_m,)(inactive, pg, op); +} + +svint64_t test_svextb_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svextb_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s64,_m,)(inactive, pg, op); +} + +svuint16_t test_svextb_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svextb_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtb.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_u16,_m,)(inactive, pg, op); +} + +svuint32_t test_svextb_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svextb_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtb.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_u32,_m,)(inactive, pg, op); +} + +svuint64_t test_svextb_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svextb_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtb.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_u64,_m,)(inactive, pg, op); +} + +svint16_t test_svextb_s16_x(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svextb_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s16,_x,)(pg, op); +} + +svint32_t test_svextb_s32_x(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svextb_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s32,_x,)(pg, op); +} + +svint64_t test_svextb_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svextb_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s64,_x,)(pg, op); +} + +svuint16_t test_svextb_u16_x(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svextb_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtb.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_u16,_x,)(pg, op); +} + +svuint32_t test_svextb_u32_x(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svextb_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtb.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_u32,_x,)(pg, op); +} + +svuint64_t test_svextb_u64_x(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svextb_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtb.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_u64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_exth.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_exth.c new file mode 100644 index 0000000000000..eb279566a82ea --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_exth.c @@ -0,0 +1,119 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint32_t test_svexth_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svexth_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxth.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_s32,_z,)(pg, op); +} + +svint64_t test_svexth_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svexth_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxth.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_s64,_z,)(pg, op); +} + +svuint32_t test_svexth_u32_z(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svexth_u32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxth.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_u32,_z,)(pg, op); +} + +svuint64_t test_svexth_u64_z(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svexth_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxth.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_u64,_z,)(pg, op); +} + +svint32_t test_svexth_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svexth_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxth.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_s32,_m,)(inactive, pg, op); +} + +svint64_t test_svexth_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svexth_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxth.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_s64,_m,)(inactive, pg, op); +} + +svuint32_t test_svexth_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svexth_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxth.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_u32,_m,)(inactive, pg, op); +} + +svuint64_t test_svexth_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svexth_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxth.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_u64,_m,)(inactive, pg, op); +} + +svint32_t test_svexth_s32_x(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svexth_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxth.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_s32,_x,)(pg, op); +} + +svint64_t test_svexth_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svexth_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxth.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_s64,_x,)(pg, op); +} + +svuint32_t test_svexth_u32_x(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svexth_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxth.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_u32,_x,)(pg, op); +} + +svuint64_t test_svexth_u64_x(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svexth_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxth.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svexth,_u64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extw.c new file mode 100644 index 0000000000000..76626475c89a4 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extw.c @@ -0,0 +1,65 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint64_t test_svextw_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svextw_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtw.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextw,_s64,_z,)(pg, op); +} + +svuint64_t test_svextw_u64_z(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svextw_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtw.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextw,_u64,_z,)(pg, op); +} + +svint64_t test_svextw_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svextw_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtw.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextw,_s64,_m,)(inactive, pg, op); +} + +svuint64_t test_svextw_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svextw_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtw.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextw,_u64,_m,)(inactive, pg, op); +} + +svint64_t test_svextw_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svextw_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtw.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextw,_s64,_x,)(pg, op); +} + +svuint64_t test_svextw_u64_x(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svextw_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtw.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextw,_u64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta.c new file mode 100644 index 0000000000000..2e99d68e51c45 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta.c @@ -0,0 +1,108 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +int8_t test_svlasta_s8(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svlasta_s8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.lasta.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlasta,_s8,,)(pg, op); +} + +int16_t test_svlasta_s16(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svlasta_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.lasta.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlasta,_s16,,)(pg, op); +} + +int32_t test_svlasta_s32(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svlasta_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.lasta.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlasta,_s32,,)(pg, op); +} + +int64_t test_svlasta_s64(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svlasta_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.lasta.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlasta,_s64,,)(pg, op); +} + +uint8_t test_svlasta_u8(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svlasta_u8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.lasta.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlasta,_u8,,)(pg, op); +} + +uint16_t test_svlasta_u16(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svlasta_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.lasta.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlasta,_u16,,)(pg, op); +} + +uint32_t test_svlasta_u32(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svlasta_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.lasta.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlasta,_u32,,)(pg, op); +} + +uint64_t test_svlasta_u64(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svlasta_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.lasta.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlasta,_u64,,)(pg, op); +} + +float16_t test_svlasta_f16(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svlasta_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call half @llvm.aarch64.sve.lasta.nxv8f16( %[[PG]], %op) + // CHECK: ret half %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlasta,_f16,,)(pg, op); +} + +float32_t test_svlasta_f32(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svlasta_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call float @llvm.aarch64.sve.lasta.nxv4f32( %[[PG]], %op) + // CHECK: ret float %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlasta,_f32,,)(pg, op); +} + +float64_t test_svlasta_f64(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svlasta_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call double @llvm.aarch64.sve.lasta.nxv2f64( %[[PG]], %op) + // CHECK: ret double %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlasta,_f64,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb.c new file mode 100644 index 0000000000000..c48fb76da7220 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb.c @@ -0,0 +1,108 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +int8_t test_svlastb_s8(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svlastb_s8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlastb,_s8,,)(pg, op); +} + +int16_t test_svlastb_s16(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svlastb_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.lastb.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlastb,_s16,,)(pg, op); +} + +int32_t test_svlastb_s32(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svlastb_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.lastb.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlastb,_s32,,)(pg, op); +} + +int64_t test_svlastb_s64(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svlastb_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.lastb.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlastb,_s64,,)(pg, op); +} + +uint8_t test_svlastb_u8(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svlastb_u8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlastb,_u8,,)(pg, op); +} + +uint16_t test_svlastb_u16(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svlastb_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.lastb.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlastb,_u16,,)(pg, op); +} + +uint32_t test_svlastb_u32(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svlastb_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.lastb.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlastb,_u32,,)(pg, op); +} + +uint64_t test_svlastb_u64(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svlastb_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.lastb.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlastb,_u64,,)(pg, op); +} + +float16_t test_svlastb_f16(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svlastb_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call half @llvm.aarch64.sve.lastb.nxv8f16( %[[PG]], %op) + // CHECK: ret half %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlastb,_f16,,)(pg, op); +} + +float32_t test_svlastb_f32(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svlastb_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32( %[[PG]], %op) + // CHECK: ret float %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlastb,_f32,,)(pg, op); +} + +float64_t test_svlastb_f64(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svlastb_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call double @llvm.aarch64.sve.lastb.nxv2f64( %[[PG]], %op) + // CHECK: ret double %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlastb,_f64,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len.c new file mode 100644 index 0000000000000..fc8ebaa18868c --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len.c @@ -0,0 +1,110 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +uint64_t test_svlen_s8(svint8_t op) +{ + // CHECK-LABEL: test_svlen_s8 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 4 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_s8,,)(op); +} + +uint64_t test_svlen_s16(svint16_t op) +{ + // CHECK-LABEL: test_svlen_s16 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 3 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_s16,,)(op); +} + +uint64_t test_svlen_s32(svint32_t op) +{ + // CHECK-LABEL: test_svlen_s32 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 2 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_s32,,)(op); +} + +uint64_t test_svlen_s64(svint64_t op) +{ + // CHECK-LABEL: test_svlen_s64 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 1 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_s64,,)(op); +} + +uint64_t test_svlen_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svlen_u8 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 4 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_u8,,)(op); +} + +uint64_t test_svlen_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svlen_u16 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 3 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_u16,,)(op); +} + +uint64_t test_svlen_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svlen_u32 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 2 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_u32,,)(op); +} + +uint64_t test_svlen_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svlen_u64 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 1 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_u64,,)(op); +} + +uint64_t test_svlen_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svlen_f16 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 3 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_f16,,)(op); +} + +uint64_t test_svlen_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svlen_f32 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 2 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_f32,,)(op); +} + +uint64_t test_svlen_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svlen_f64 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 1 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_f64,,)(op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsl.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsl.c new file mode 100644 index 0000000000000..8b8728e3b8cdb --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsl.c @@ -0,0 +1,481 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svlsl_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svlsl_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svlsl_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svlsl_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svlsl_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svlsl_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s32,_z,)(pg, op1, op2); +} + +svint64_t test_svlsl_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s64,_z,)(pg, op1, op2); +} + +svuint8_t test_svlsl_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svlsl_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_svlsl_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svlsl_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_svlsl_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svlsl_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u32,_z,)(pg, op1, op2); +} + +svuint64_t test_svlsl_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u64,_z,)(pg, op1, op2); +} + +svint8_t test_svlsl_s8_m(svbool_t pg, svint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svlsl_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svlsl_s16_m(svbool_t pg, svint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svlsl_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svlsl_s32_m(svbool_t pg, svint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svlsl_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s32,_m,)(pg, op1, op2); +} + +svint64_t test_svlsl_s64_m(svbool_t pg, svint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s64,_m,)(pg, op1, op2); +} + +svuint8_t test_svlsl_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svlsl_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_svlsl_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svlsl_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_svlsl_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svlsl_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u32,_m,)(pg, op1, op2); +} + +svuint64_t test_svlsl_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u64,_m,)(pg, op1, op2); +} + +svint8_t test_svlsl_s8_x(svbool_t pg, svint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svlsl_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svlsl_s16_x(svbool_t pg, svint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svlsl_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svlsl_s32_x(svbool_t pg, svint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svlsl_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s32,_x,)(pg, op1, op2); +} + +svint64_t test_svlsl_s64_x(svbool_t pg, svint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_s64,_x,)(pg, op1, op2); +} + +svuint8_t test_svlsl_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svlsl_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_svlsl_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svlsl_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_svlsl_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svlsl_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u32,_x,)(pg, op1, op2); +} + +svuint64_t test_svlsl_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl,_u64,_x,)(pg, op1, op2); +} + +svint8_t test_svlsl_wide_s8_z(svbool_t pg, svint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svlsl_wide_s16_z(svbool_t pg, svint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svlsl_wide_s32_z(svbool_t pg, svint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_s32,_z,)(pg, op1, op2); +} + +svuint8_t test_svlsl_wide_u8_z(svbool_t pg, svuint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_svlsl_wide_u16_z(svbool_t pg, svuint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_svlsl_wide_u32_z(svbool_t pg, svuint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_u32,_z,)(pg, op1, op2); +} + +svint8_t test_svlsl_wide_s8_m(svbool_t pg, svint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svlsl_wide_s16_m(svbool_t pg, svint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svlsl_wide_s32_m(svbool_t pg, svint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_s32,_m,)(pg, op1, op2); +} + +svuint8_t test_svlsl_wide_u8_m(svbool_t pg, svuint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_svlsl_wide_u16_m(svbool_t pg, svuint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_svlsl_wide_u32_m(svbool_t pg, svuint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_u32,_m,)(pg, op1, op2); +} + +svint8_t test_svlsl_wide_s8_x(svbool_t pg, svint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svlsl_wide_s16_x(svbool_t pg, svint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svlsl_wide_s32_x(svbool_t pg, svint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_s32,_x,)(pg, op1, op2); +} + +svuint8_t test_svlsl_wide_u8_x(svbool_t pg, svuint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_svlsl_wide_u16_x(svbool_t pg, svuint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_svlsl_wide_u32_x(svbool_t pg, svuint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_u32,_x,)(pg, op1, op2); +} + +svint8_t test_svlsl_wide_n_s8_m(svbool_t pg, svint8_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_n_s8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_n_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svlsl_wide_n_s16_m(svbool_t pg, svint16_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_n_s16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_n_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svlsl_wide_n_s32_m(svbool_t pg, svint32_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_n_s32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_n_s32,_m,)(pg, op1, op2); +} + +svint8_t test_svlsl_wide_n_s8_z(svbool_t pg, svint8_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_n_s8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, %[[PG]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_n_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svlsl_wide_n_s16_z(svbool_t pg, svint16_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %[[PG]], %[[OP]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_n_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svlsl_wide_n_s32_z(svbool_t pg, svint32_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %[[PG]], %[[OP]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_n_s32,_z,)(pg, op1, op2); +} + +svint8_t test_svlsl_wide_n_s8_x(svbool_t pg, svint8_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_n_s8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_n_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svlsl_wide_n_s16_x(svbool_t pg, svint16_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_n_s16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_n_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svlsl_wide_n_s32_x(svbool_t pg, svint32_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsl_wide_n_s32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsl.wide.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsl_wide,_n_s32,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsr.c new file mode 100644 index 0000000000000..4dfabba2aa3a7 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsr.c @@ -0,0 +1,291 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svuint8_t test_svlsr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svlsr_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_svlsr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svlsr_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_svlsr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svlsr_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u32,_z,)(pg, op1, op2); +} + +svuint64_t test_svlsr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u64,_z,)(pg, op1, op2); +} + +svuint8_t test_svlsr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svlsr_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_svlsr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svlsr_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_svlsr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svlsr_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u32,_m,)(pg, op1, op2); +} + +svuint64_t test_svlsr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u64,_m,)(pg, op1, op2); +} + +svuint8_t test_svlsr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svlsr_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_svlsr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svlsr_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_svlsr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svlsr_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u32,_x,)(pg, op1, op2); +} + +svuint64_t test_svlsr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr,_u64,_x,)(pg, op1, op2); +} + +svuint8_t test_svlsr_wide_u8_z(svbool_t pg, svuint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_svlsr_wide_u16_z(svbool_t pg, svuint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_svlsr_wide_u32_z(svbool_t pg, svuint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_u32,_z,)(pg, op1, op2); +} + +svuint8_t test_svlsr_wide_u8_m(svbool_t pg, svuint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_svlsr_wide_u16_m(svbool_t pg, svuint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_svlsr_wide_u32_m(svbool_t pg, svuint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_u32,_m,)(pg, op1, op2); +} + +svuint8_t test_svlsr_wide_u8_x(svbool_t pg, svuint8_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_svlsr_wide_u16_x(svbool_t pg, svuint16_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_svlsr_wide_u32_x(svbool_t pg, svuint32_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_u32,_x,)(pg, op1, op2); +} + +svuint8_t test_svlsr_wide_n_u8_m(svbool_t pg, svuint8_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_n_u8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_n_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_svlsr_wide_n_u16_m(svbool_t pg, svuint16_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_n_u16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_n_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_svlsr_wide_n_u32_m(svbool_t pg, svuint32_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_n_u32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_n_u32,_m,)(pg, op1, op2); +} + +svuint8_t test_svlsr_wide_n_u8_z(svbool_t pg, svuint8_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_n_u8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv16i8( %pg, %[[PG]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_n_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_svlsr_wide_n_u16_z(svbool_t pg, svuint16_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_n_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv8i16( %[[PG]], %[[OP]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_n_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_svlsr_wide_n_u32_z(svbool_t pg, svuint32_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_n_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv4i32( %[[PG]], %[[OP]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_n_u32,_z,)(pg, op1, op2); +} + +svuint8_t test_svlsr_wide_n_u8_x(svbool_t pg, svuint8_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_n_u8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_n_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_svlsr_wide_n_u16_x(svbool_t pg, svuint16_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_n_u16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_n_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_svlsr_wide_n_u32_x(svbool_t pg, svuint32_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svlsr_wide_n_u32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.lsr.wide.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svlsr_wide,_n_u32,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mad.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mad.c new file mode 100644 index 0000000000000..4c961bd9d13b8 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mad.c @@ -0,0 +1,646 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svmad_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmad_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s8,_z,)(pg, op1, op2, op3); +} + +svint16_t test_svmad_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmad_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s16,_z,)(pg, op1, op2, op3); +} + +svint32_t test_svmad_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmad_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s32,_z,)(pg, op1, op2, op3); +} + +svint64_t test_svmad_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmad_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s64,_z,)(pg, op1, op2, op3); +} + +svuint8_t test_svmad_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmad_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u8,_z,)(pg, op1, op2, op3); +} + +svuint16_t test_svmad_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmad_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u16,_z,)(pg, op1, op2, op3); +} + +svuint32_t test_svmad_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmad_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u32,_z,)(pg, op1, op2, op3); +} + +svuint64_t test_svmad_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmad_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u64,_z,)(pg, op1, op2, op3); +} + +svint8_t test_svmad_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmad_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s8,_m,)(pg, op1, op2, op3); +} + +svint16_t test_svmad_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmad_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s16,_m,)(pg, op1, op2, op3); +} + +svint32_t test_svmad_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmad_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s32,_m,)(pg, op1, op2, op3); +} + +svint64_t test_svmad_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmad_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s64,_m,)(pg, op1, op2, op3); +} + +svuint8_t test_svmad_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmad_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u8,_m,)(pg, op1, op2, op3); +} + +svuint16_t test_svmad_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmad_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u16,_m,)(pg, op1, op2, op3); +} + +svuint32_t test_svmad_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmad_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u32,_m,)(pg, op1, op2, op3); +} + +svuint64_t test_svmad_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmad_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u64,_m,)(pg, op1, op2, op3); +} + +svint8_t test_svmad_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmad_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s8,_x,)(pg, op1, op2, op3); +} + +svint16_t test_svmad_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmad_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s16,_x,)(pg, op1, op2, op3); +} + +svint32_t test_svmad_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmad_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s32,_x,)(pg, op1, op2, op3); +} + +svint64_t test_svmad_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmad_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_s64,_x,)(pg, op1, op2, op3); +} + +svuint8_t test_svmad_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmad_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u8,_x,)(pg, op1, op2, op3); +} + +svuint16_t test_svmad_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmad_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u16,_x,)(pg, op1, op2, op3); +} + +svuint32_t test_svmad_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmad_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u32,_x,)(pg, op1, op2, op3); +} + +svuint64_t test_svmad_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmad_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_u64,_x,)(pg, op1, op2, op3); +} + +svint8_t test_svmad_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmad_n_s8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s8,_z,)(pg, op1, op2, op3); +} + +svint16_t test_svmad_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmad_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s16,_z,)(pg, op1, op2, op3); +} + +svint32_t test_svmad_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmad_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s32,_z,)(pg, op1, op2, op3); +} + +svint64_t test_svmad_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmad_n_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s64,_z,)(pg, op1, op2, op3); +} + +svuint8_t test_svmad_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmad_n_u8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u8,_z,)(pg, op1, op2, op3); +} + +svuint16_t test_svmad_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmad_n_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u16,_z,)(pg, op1, op2, op3); +} + +svuint32_t test_svmad_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmad_n_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u32,_z,)(pg, op1, op2, op3); +} + +svuint64_t test_svmad_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmad_n_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u64,_z,)(pg, op1, op2, op3); +} + +svint8_t test_svmad_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmad_n_s8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s8,_m,)(pg, op1, op2, op3); +} + +svint16_t test_svmad_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmad_n_s16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s16,_m,)(pg, op1, op2, op3); +} + +svint32_t test_svmad_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmad_n_s32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s32,_m,)(pg, op1, op2, op3); +} + +svint64_t test_svmad_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmad_n_s64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s64,_m,)(pg, op1, op2, op3); +} + +svuint8_t test_svmad_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmad_n_u8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u8,_m,)(pg, op1, op2, op3); +} + +svuint16_t test_svmad_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmad_n_u16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u16,_m,)(pg, op1, op2, op3); +} + +svuint32_t test_svmad_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmad_n_u32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u32,_m,)(pg, op1, op2, op3); +} + +svuint64_t test_svmad_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmad_n_u64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u64,_m,)(pg, op1, op2, op3); +} + +svint8_t test_svmad_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmad_n_s8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s8,_x,)(pg, op1, op2, op3); +} + +svint16_t test_svmad_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmad_n_s16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s16,_x,)(pg, op1, op2, op3); +} + +svint32_t test_svmad_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmad_n_s32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s32,_x,)(pg, op1, op2, op3); +} + +svint64_t test_svmad_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmad_n_s64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_s64,_x,)(pg, op1, op2, op3); +} + +svuint8_t test_svmad_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmad_n_u8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u8,_x,)(pg, op1, op2, op3); +} + +svuint16_t test_svmad_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmad_n_u16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u16,_x,)(pg, op1, op2, op3); +} + +svuint32_t test_svmad_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmad_n_u32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u32,_x,)(pg, op1, op2, op3); +} + +svuint64_t test_svmad_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmad_n_u64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mad.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_u64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmad_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmad_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmad_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmad_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv4f32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmad_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmad_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv2f64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmad_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmad_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmad_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmad_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmad_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmad_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmad_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmad_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmad_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmad_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmad_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmad_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_f64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmad_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmad_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv8f16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmad_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmad_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv4f32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmad_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmad_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv2f64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmad_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmad_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmad_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmad_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmad_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmad_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmad_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmad_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmad_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmad_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmad_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmad_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmad.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmad,_n_f64,_x,)(pg, op1, op2, op3); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_max.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_max.c index 0c88d609bbe83..4f91422d2a172 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_max.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_max.c @@ -469,3 +469,179 @@ svuint64_t test_svmax_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) // CHECK: ret %[[INTRINSIC]] return SVE_ACLE_FUNC(svmax,_n_u64,_x,)(pg, op1, op2); } + +svfloat16_t test_svmax_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmax_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svmax_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmax_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svmax_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmax_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svmax_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmax_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svmax_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmax_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svmax_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmax_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svmax_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmax_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svmax_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmax_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svmax_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmax_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svmax_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmax_n_f16_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svmax_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmax_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svmax_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmax_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svmax_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmax_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svmax_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmax_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svmax_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmax_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svmax_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmax_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svmax_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmax_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svmax_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmax_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmax.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmax,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnm.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnm.c new file mode 100644 index 0000000000000..5e873b01e50cd --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnm.c @@ -0,0 +1,187 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svmaxnm_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmaxnm_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svmaxnm_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmaxnm_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svmaxnm_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmaxnm_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svmaxnm_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmaxnm_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svmaxnm_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmaxnm_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svmaxnm_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmaxnm_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svmaxnm_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmaxnm_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svmaxnm_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmaxnm_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svmaxnm_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmaxnm_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svmaxnm_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmaxnm_n_f16_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svmaxnm_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmaxnm_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svmaxnm_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmaxnm_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svmaxnm_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmaxnm_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svmaxnm_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmaxnm_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svmaxnm_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmaxnm_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svmaxnm_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmaxnm_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svmaxnm_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmaxnm_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svmaxnm_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmaxnm_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnm,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnmv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnmv.c new file mode 100644 index 0000000000000..1b76fd2a35d2c --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnmv.c @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +float16_t test_svmaxnmv_f16(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svmaxnmv_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call half @llvm.aarch64.sve.fmaxnmv.nxv8f16( %[[PG]], %op) + // CHECK: ret half %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnmv,_f16,,)(pg, op); +} + +float32_t test_svmaxnmv_f32(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svmaxnmv_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call float @llvm.aarch64.sve.fmaxnmv.nxv4f32( %[[PG]], %op) + // CHECK: ret float %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnmv,_f32,,)(pg, op); +} + +float64_t test_svmaxnmv_f64(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svmaxnmv_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call double @llvm.aarch64.sve.fmaxnmv.nxv2f64( %[[PG]], %op) + // CHECK: ret double %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxnmv,_f64,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxv.c new file mode 100644 index 0000000000000..145fbff7f46cb --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxv.c @@ -0,0 +1,108 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +int8_t test_svmaxv_s8(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svmaxv_s8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxv,_s8,,)(pg, op); +} + +int16_t test_svmaxv_s16(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svmaxv_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.smaxv.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxv,_s16,,)(pg, op); +} + +int32_t test_svmaxv_s32(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svmaxv_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.smaxv.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxv,_s32,,)(pg, op); +} + +int64_t test_svmaxv_s64(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svmaxv_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.smaxv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxv,_s64,,)(pg, op); +} + +uint8_t test_svmaxv_u8(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svmaxv_u8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxv,_u8,,)(pg, op); +} + +uint16_t test_svmaxv_u16(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svmaxv_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.umaxv.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxv,_u16,,)(pg, op); +} + +uint32_t test_svmaxv_u32(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svmaxv_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.umaxv.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxv,_u32,,)(pg, op); +} + +uint64_t test_svmaxv_u64(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svmaxv_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.umaxv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxv,_u64,,)(pg, op); +} + +float16_t test_svmaxv_f16(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svmaxv_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call half @llvm.aarch64.sve.fmaxv.nxv8f16( %[[PG]], %op) + // CHECK: ret half %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxv,_f16,,)(pg, op); +} + +float32_t test_svmaxv_f32(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svmaxv_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call float @llvm.aarch64.sve.fmaxv.nxv4f32( %[[PG]], %op) + // CHECK: ret float %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxv,_f32,,)(pg, op); +} + +float64_t test_svmaxv_f64(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svmaxv_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call double @llvm.aarch64.sve.fmaxv.nxv2f64( %[[PG]], %op) + // CHECK: ret double %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmaxv,_f64,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_min.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_min.c index 3b237d9dc0d8b..bdf7ae02af186 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_min.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_min.c @@ -469,3 +469,179 @@ svuint64_t test_svmin_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) // CHECK: ret %[[INTRINSIC]] return SVE_ACLE_FUNC(svmin,_n_u64,_x,)(pg, op1, op2); } + +svfloat16_t test_svmin_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmin_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svmin_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmin_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svmin_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmin_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svmin_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmin_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svmin_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmin_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svmin_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmin_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svmin_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmin_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svmin_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmin_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svmin_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmin_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svmin_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmin_n_f16_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svmin_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmin_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svmin_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmin_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svmin_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmin_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svmin_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmin_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svmin_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmin_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svmin_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmin_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svmin_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmin_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svmin_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmin_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmin.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmin,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnm.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnm.c new file mode 100644 index 0000000000000..9f4f445efeeba --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnm.c @@ -0,0 +1,187 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svminnm_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svminnm_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svminnm_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svminnm_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svminnm_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svminnm_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svminnm_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svminnm_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svminnm_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svminnm_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svminnm_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svminnm_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svminnm_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svminnm_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svminnm_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svminnm_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svminnm_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svminnm_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svminnm_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svminnm_n_f16_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svminnm_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svminnm_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svminnm_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svminnm_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svminnm_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svminnm_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svminnm_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svminnm_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svminnm_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svminnm_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svminnm_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svminnm_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svminnm_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svminnm_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svminnm_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svminnm_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fminnm.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnm,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnmv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnmv.c new file mode 100644 index 0000000000000..7c6904b03c5ae --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnmv.c @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +float16_t test_svminnmv_f16(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svminnmv_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call half @llvm.aarch64.sve.fminnmv.nxv8f16( %[[PG]], %op) + // CHECK: ret half %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnmv,_f16,,)(pg, op); +} + +float32_t test_svminnmv_f32(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svminnmv_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call float @llvm.aarch64.sve.fminnmv.nxv4f32( %[[PG]], %op) + // CHECK: ret float %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnmv,_f32,,)(pg, op); +} + +float64_t test_svminnmv_f64(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svminnmv_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call double @llvm.aarch64.sve.fminnmv.nxv2f64( %[[PG]], %op) + // CHECK: ret double %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminnmv,_f64,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minv.c new file mode 100644 index 0000000000000..cb3901656c6ab --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minv.c @@ -0,0 +1,108 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +int8_t test_svminv_s8(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svminv_s8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.sminv.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminv,_s8,,)(pg, op); +} + +int16_t test_svminv_s16(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svminv_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.sminv.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminv,_s16,,)(pg, op); +} + +int32_t test_svminv_s32(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svminv_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.sminv.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminv,_s32,,)(pg, op); +} + +int64_t test_svminv_s64(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svminv_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.sminv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminv,_s64,,)(pg, op); +} + +uint8_t test_svminv_u8(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svminv_u8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.uminv.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminv,_u8,,)(pg, op); +} + +uint16_t test_svminv_u16(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svminv_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.uminv.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminv,_u16,,)(pg, op); +} + +uint32_t test_svminv_u32(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svminv_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.uminv.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminv,_u32,,)(pg, op); +} + +uint64_t test_svminv_u64(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svminv_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminv,_u64,,)(pg, op); +} + +float16_t test_svminv_f16(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svminv_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call half @llvm.aarch64.sve.fminv.nxv8f16( %[[PG]], %op) + // CHECK: ret half %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminv,_f16,,)(pg, op); +} + +float32_t test_svminv_f32(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svminv_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call float @llvm.aarch64.sve.fminv.nxv4f32( %[[PG]], %op) + // CHECK: ret float %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminv,_f32,,)(pg, op); +} + +float64_t test_svminv_f64(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svminv_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call double @llvm.aarch64.sve.fminv.nxv2f64( %[[PG]], %op) + // CHECK: ret double %[[INTRINSIC]] + return SVE_ACLE_FUNC(svminv,_f64,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c index 8e5e354b9157e..39bc9ff4dff6d 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c @@ -10,6 +10,641 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +svint8_t test_svmla_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmla_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s8,_z,)(pg, op1, op2, op3); +} + +svint16_t test_svmla_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmla_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s16,_z,)(pg, op1, op2, op3); +} + +svint32_t test_svmla_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmla_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s32,_z,)(pg, op1, op2, op3); +} + +svint64_t test_svmla_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmla_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s64,_z,)(pg, op1, op2, op3); +} + +svuint8_t test_svmla_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmla_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u8,_z,)(pg, op1, op2, op3); +} + +svuint16_t test_svmla_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmla_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u16,_z,)(pg, op1, op2, op3); +} + +svuint32_t test_svmla_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmla_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u32,_z,)(pg, op1, op2, op3); +} + +svuint64_t test_svmla_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmla_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u64,_z,)(pg, op1, op2, op3); +} + +svint8_t test_svmla_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmla_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s8,_m,)(pg, op1, op2, op3); +} + +svint16_t test_svmla_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmla_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s16,_m,)(pg, op1, op2, op3); +} + +svint32_t test_svmla_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmla_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s32,_m,)(pg, op1, op2, op3); +} + +svint64_t test_svmla_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmla_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s64,_m,)(pg, op1, op2, op3); +} + +svuint8_t test_svmla_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmla_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u8,_m,)(pg, op1, op2, op3); +} + +svuint16_t test_svmla_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmla_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u16,_m,)(pg, op1, op2, op3); +} + +svuint32_t test_svmla_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmla_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u32,_m,)(pg, op1, op2, op3); +} + +svuint64_t test_svmla_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmla_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u64,_m,)(pg, op1, op2, op3); +} + +svint8_t test_svmla_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmla_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s8,_x,)(pg, op1, op2, op3); +} + +svint16_t test_svmla_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmla_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s16,_x,)(pg, op1, op2, op3); +} + +svint32_t test_svmla_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmla_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s32,_x,)(pg, op1, op2, op3); +} + +svint64_t test_svmla_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmla_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_s64,_x,)(pg, op1, op2, op3); +} + +svuint8_t test_svmla_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmla_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u8,_x,)(pg, op1, op2, op3); +} + +svuint16_t test_svmla_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmla_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u16,_x,)(pg, op1, op2, op3); +} + +svuint32_t test_svmla_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmla_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u32,_x,)(pg, op1, op2, op3); +} + +svuint64_t test_svmla_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmla_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_u64,_x,)(pg, op1, op2, op3); +} + +svint8_t test_svmla_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmla_n_s8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s8,_z,)(pg, op1, op2, op3); +} + +svint16_t test_svmla_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmla_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s16,_z,)(pg, op1, op2, op3); +} + +svint32_t test_svmla_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmla_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s32,_z,)(pg, op1, op2, op3); +} + +svint64_t test_svmla_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmla_n_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s64,_z,)(pg, op1, op2, op3); +} + +svuint8_t test_svmla_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmla_n_u8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u8,_z,)(pg, op1, op2, op3); +} + +svuint16_t test_svmla_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmla_n_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u16,_z,)(pg, op1, op2, op3); +} + +svuint32_t test_svmla_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmla_n_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u32,_z,)(pg, op1, op2, op3); +} + +svuint64_t test_svmla_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmla_n_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u64,_z,)(pg, op1, op2, op3); +} + +svint8_t test_svmla_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmla_n_s8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s8,_m,)(pg, op1, op2, op3); +} + +svint16_t test_svmla_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmla_n_s16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s16,_m,)(pg, op1, op2, op3); +} + +svint32_t test_svmla_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmla_n_s32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s32,_m,)(pg, op1, op2, op3); +} + +svint64_t test_svmla_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmla_n_s64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s64,_m,)(pg, op1, op2, op3); +} + +svuint8_t test_svmla_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmla_n_u8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u8,_m,)(pg, op1, op2, op3); +} + +svuint16_t test_svmla_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmla_n_u16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u16,_m,)(pg, op1, op2, op3); +} + +svuint32_t test_svmla_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmla_n_u32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u32,_m,)(pg, op1, op2, op3); +} + +svuint64_t test_svmla_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmla_n_u64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u64,_m,)(pg, op1, op2, op3); +} + +svint8_t test_svmla_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmla_n_s8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s8,_x,)(pg, op1, op2, op3); +} + +svint16_t test_svmla_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmla_n_s16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s16,_x,)(pg, op1, op2, op3); +} + +svint32_t test_svmla_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmla_n_s32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s32,_x,)(pg, op1, op2, op3); +} + +svint64_t test_svmla_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmla_n_s64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_s64,_x,)(pg, op1, op2, op3); +} + +svuint8_t test_svmla_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmla_n_u8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u8,_x,)(pg, op1, op2, op3); +} + +svuint16_t test_svmla_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmla_n_u16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u16,_x,)(pg, op1, op2, op3); +} + +svuint32_t test_svmla_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmla_n_u32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u32,_x,)(pg, op1, op2, op3); +} + +svuint64_t test_svmla_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmla_n_u64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_u64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmla_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmla_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv4f32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmla_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv2f64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmla_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmla_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmla_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmla_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmla_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmla_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_f64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmla_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmla_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv8f16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmla_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmla_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv4f32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmla_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmla_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv2f64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmla_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmla_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmla_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmla_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmla_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmla_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmla_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmla_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmla_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmla_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmla_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmla_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmla.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmla,_n_f64,_x,)(pg, op1, op2, op3); +} + svfloat16_t test_svmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { // CHECK-LABEL: test_svmla_lane_f16 diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mls.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mls.c new file mode 100644 index 0000000000000..c711d97468c01 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mls.c @@ -0,0 +1,694 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svmls_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmls_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s8,_z,)(pg, op1, op2, op3); +} + +svint16_t test_svmls_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmls_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s16,_z,)(pg, op1, op2, op3); +} + +svint32_t test_svmls_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmls_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s32,_z,)(pg, op1, op2, op3); +} + +svint64_t test_svmls_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmls_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s64,_z,)(pg, op1, op2, op3); +} + +svuint8_t test_svmls_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmls_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u8,_z,)(pg, op1, op2, op3); +} + +svuint16_t test_svmls_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmls_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u16,_z,)(pg, op1, op2, op3); +} + +svuint32_t test_svmls_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmls_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u32,_z,)(pg, op1, op2, op3); +} + +svuint64_t test_svmls_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmls_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u64,_z,)(pg, op1, op2, op3); +} + +svint8_t test_svmls_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmls_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s8,_m,)(pg, op1, op2, op3); +} + +svint16_t test_svmls_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmls_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s16,_m,)(pg, op1, op2, op3); +} + +svint32_t test_svmls_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmls_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s32,_m,)(pg, op1, op2, op3); +} + +svint64_t test_svmls_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmls_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s64,_m,)(pg, op1, op2, op3); +} + +svuint8_t test_svmls_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmls_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u8,_m,)(pg, op1, op2, op3); +} + +svuint16_t test_svmls_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmls_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u16,_m,)(pg, op1, op2, op3); +} + +svuint32_t test_svmls_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmls_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u32,_m,)(pg, op1, op2, op3); +} + +svuint64_t test_svmls_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmls_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u64,_m,)(pg, op1, op2, op3); +} + +svint8_t test_svmls_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmls_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s8,_x,)(pg, op1, op2, op3); +} + +svint16_t test_svmls_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmls_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s16,_x,)(pg, op1, op2, op3); +} + +svint32_t test_svmls_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmls_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s32,_x,)(pg, op1, op2, op3); +} + +svint64_t test_svmls_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmls_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_s64,_x,)(pg, op1, op2, op3); +} + +svuint8_t test_svmls_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmls_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u8,_x,)(pg, op1, op2, op3); +} + +svuint16_t test_svmls_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmls_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u16,_x,)(pg, op1, op2, op3); +} + +svuint32_t test_svmls_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmls_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u32,_x,)(pg, op1, op2, op3); +} + +svuint64_t test_svmls_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmls_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_u64,_x,)(pg, op1, op2, op3); +} + +svint8_t test_svmls_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmls_n_s8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s8,_z,)(pg, op1, op2, op3); +} + +svint16_t test_svmls_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmls_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s16,_z,)(pg, op1, op2, op3); +} + +svint32_t test_svmls_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmls_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s32,_z,)(pg, op1, op2, op3); +} + +svint64_t test_svmls_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmls_n_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s64,_z,)(pg, op1, op2, op3); +} + +svuint8_t test_svmls_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmls_n_u8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u8,_z,)(pg, op1, op2, op3); +} + +svuint16_t test_svmls_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmls_n_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u16,_z,)(pg, op1, op2, op3); +} + +svuint32_t test_svmls_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmls_n_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u32,_z,)(pg, op1, op2, op3); +} + +svuint64_t test_svmls_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmls_n_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u64,_z,)(pg, op1, op2, op3); +} + +svint8_t test_svmls_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmls_n_s8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s8,_m,)(pg, op1, op2, op3); +} + +svint16_t test_svmls_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmls_n_s16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s16,_m,)(pg, op1, op2, op3); +} + +svint32_t test_svmls_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmls_n_s32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s32,_m,)(pg, op1, op2, op3); +} + +svint64_t test_svmls_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmls_n_s64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s64,_m,)(pg, op1, op2, op3); +} + +svuint8_t test_svmls_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmls_n_u8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u8,_m,)(pg, op1, op2, op3); +} + +svuint16_t test_svmls_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmls_n_u16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u16,_m,)(pg, op1, op2, op3); +} + +svuint32_t test_svmls_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmls_n_u32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u32,_m,)(pg, op1, op2, op3); +} + +svuint64_t test_svmls_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmls_n_u64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u64,_m,)(pg, op1, op2, op3); +} + +svint8_t test_svmls_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmls_n_s8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s8,_x,)(pg, op1, op2, op3); +} + +svint16_t test_svmls_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmls_n_s16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s16,_x,)(pg, op1, op2, op3); +} + +svint32_t test_svmls_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmls_n_s32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s32,_x,)(pg, op1, op2, op3); +} + +svint64_t test_svmls_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmls_n_s64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_s64,_x,)(pg, op1, op2, op3); +} + +svuint8_t test_svmls_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmls_n_u8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u8,_x,)(pg, op1, op2, op3); +} + +svuint16_t test_svmls_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmls_n_u16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u16,_x,)(pg, op1, op2, op3); +} + +svuint32_t test_svmls_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmls_n_u32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u32,_x,)(pg, op1, op2, op3); +} + +svuint64_t test_svmls_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmls_n_u64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mls.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_u64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmls_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmls_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmls_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmls_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv4f32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmls_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmls_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv2f64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmls_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmls_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmls_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmls_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmls_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmls_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmls_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmls_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmls_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmls_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmls_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmls_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_f64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmls_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmls_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv8f16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmls_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmls_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv4f32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmls_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmls_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv2f64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmls_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmls_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmls_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmls_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmls_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmls_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmls_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmls_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmls_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmls_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmls_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmls_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls,_n_f64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmls_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmls_lane_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.lane.nxv8f16( %op1, %op2, %op3, i32 0) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls_lane,_f16,,)(op1, op2, op3, 0); +} + +svfloat16_t test_svmls_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmls_lane_f16_1 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.lane.nxv8f16( %op1, %op2, %op3, i32 7) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls_lane,_f16,,)(op1, op2, op3, 7); +} + +svfloat32_t test_svmls_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmls_lane_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.lane.nxv4f32( %op1, %op2, %op3, i32 0) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls_lane,_f32,,)(op1, op2, op3, 0); +} + +svfloat32_t test_svmls_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmls_lane_f32_1 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.lane.nxv4f32( %op1, %op2, %op3, i32 3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls_lane,_f32,,)(op1, op2, op3, 3); +} + +svfloat64_t test_svmls_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmls_lane_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.lane.nxv2f64( %op1, %op2, %op3, i32 0) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls_lane,_f64,,)(op1, op2, op3, 0); +} + +svfloat64_t test_svmls_lane_f64_1(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmls_lane_f64_1 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmls.lane.nxv2f64( %op1, %op2, %op3, i32 1) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmls_lane,_f64,,)(op1, op2, op3, 1); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mov.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mov.c new file mode 100644 index 0000000000000..609c3d22b4725 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mov.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svmov_b_z(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svmov_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.z.nxv16i1( %pg, %op, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmov,_b,_z,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_msb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_msb.c new file mode 100644 index 0000000000000..8e0b7ae393a22 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_msb.c @@ -0,0 +1,646 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svmsb_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmsb_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s8,_z,)(pg, op1, op2, op3); +} + +svint16_t test_svmsb_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmsb_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s16,_z,)(pg, op1, op2, op3); +} + +svint32_t test_svmsb_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmsb_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s32,_z,)(pg, op1, op2, op3); +} + +svint64_t test_svmsb_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmsb_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s64,_z,)(pg, op1, op2, op3); +} + +svuint8_t test_svmsb_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmsb_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u8,_z,)(pg, op1, op2, op3); +} + +svuint16_t test_svmsb_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmsb_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u16,_z,)(pg, op1, op2, op3); +} + +svuint32_t test_svmsb_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmsb_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u32,_z,)(pg, op1, op2, op3); +} + +svuint64_t test_svmsb_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmsb_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u64,_z,)(pg, op1, op2, op3); +} + +svint8_t test_svmsb_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmsb_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s8,_m,)(pg, op1, op2, op3); +} + +svint16_t test_svmsb_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmsb_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s16,_m,)(pg, op1, op2, op3); +} + +svint32_t test_svmsb_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmsb_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s32,_m,)(pg, op1, op2, op3); +} + +svint64_t test_svmsb_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmsb_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s64,_m,)(pg, op1, op2, op3); +} + +svuint8_t test_svmsb_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmsb_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u8,_m,)(pg, op1, op2, op3); +} + +svuint16_t test_svmsb_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmsb_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u16,_m,)(pg, op1, op2, op3); +} + +svuint32_t test_svmsb_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmsb_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u32,_m,)(pg, op1, op2, op3); +} + +svuint64_t test_svmsb_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmsb_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u64,_m,)(pg, op1, op2, op3); +} + +svint8_t test_svmsb_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svmsb_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s8,_x,)(pg, op1, op2, op3); +} + +svint16_t test_svmsb_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmsb_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s16,_x,)(pg, op1, op2, op3); +} + +svint32_t test_svmsb_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmsb_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s32,_x,)(pg, op1, op2, op3); +} + +svint64_t test_svmsb_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) +{ + // CHECK-LABEL: test_svmsb_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_s64,_x,)(pg, op1, op2, op3); +} + +svuint8_t test_svmsb_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) +{ + // CHECK-LABEL: test_svmsb_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u8,_x,)(pg, op1, op2, op3); +} + +svuint16_t test_svmsb_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) +{ + // CHECK-LABEL: test_svmsb_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u16,_x,)(pg, op1, op2, op3); +} + +svuint32_t test_svmsb_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svmsb_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u32,_x,)(pg, op1, op2, op3); +} + +svuint64_t test_svmsb_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svmsb_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_u64,_x,)(pg, op1, op2, op3); +} + +svint8_t test_svmsb_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s8,_z,)(pg, op1, op2, op3); +} + +svint16_t test_svmsb_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s16,_z,)(pg, op1, op2, op3); +} + +svint32_t test_svmsb_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s32,_z,)(pg, op1, op2, op3); +} + +svint64_t test_svmsb_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s64,_z,)(pg, op1, op2, op3); +} + +svuint8_t test_svmsb_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u8,_z,)(pg, op1, op2, op3); +} + +svuint16_t test_svmsb_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u16,_z,)(pg, op1, op2, op3); +} + +svuint32_t test_svmsb_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u32,_z,)(pg, op1, op2, op3); +} + +svuint64_t test_svmsb_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u64,_z,)(pg, op1, op2, op3); +} + +svint8_t test_svmsb_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s8,_m,)(pg, op1, op2, op3); +} + +svint16_t test_svmsb_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s16,_m,)(pg, op1, op2, op3); +} + +svint32_t test_svmsb_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s32,_m,)(pg, op1, op2, op3); +} + +svint64_t test_svmsb_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s64,_m,)(pg, op1, op2, op3); +} + +svuint8_t test_svmsb_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u8,_m,)(pg, op1, op2, op3); +} + +svuint16_t test_svmsb_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u16,_m,)(pg, op1, op2, op3); +} + +svuint32_t test_svmsb_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u32,_m,)(pg, op1, op2, op3); +} + +svuint64_t test_svmsb_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u64,_m,)(pg, op1, op2, op3); +} + +svint8_t test_svmsb_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s8,_x,)(pg, op1, op2, op3); +} + +svint16_t test_svmsb_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s16,_x,)(pg, op1, op2, op3); +} + +svint32_t test_svmsb_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s32,_x,)(pg, op1, op2, op3); +} + +svint64_t test_svmsb_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) +{ + // CHECK-LABEL: test_svmsb_n_s64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_s64,_x,)(pg, op1, op2, op3); +} + +svuint8_t test_svmsb_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv16i8( %pg, %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u8,_x,)(pg, op1, op2, op3); +} + +svuint16_t test_svmsb_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv8i16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u16,_x,)(pg, op1, op2, op3); +} + +svuint32_t test_svmsb_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv4i32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u32,_x,)(pg, op1, op2, op3); +} + +svuint64_t test_svmsb_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) +{ + // CHECK-LABEL: test_svmsb_n_u64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.msb.nxv2i64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_u64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmsb_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmsb_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmsb_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmsb_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv4f32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmsb_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmsb_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv2f64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmsb_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmsb_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmsb_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmsb_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmsb_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmsb_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmsb_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmsb_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmsb_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svmsb_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmsb_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svmsb_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_f64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmsb_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmsb_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv8f16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmsb_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmsb_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv4f32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmsb_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmsb_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv2f64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmsb_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmsb_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmsb_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmsb_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmsb_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmsb_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svmsb_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmsb_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svmsb_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svmsb_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svmsb_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svmsb_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmsb.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmsb,_n_f64,_x,)(pg, op1, op2, op3); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mul.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mul.c index b4a9f0cdfc9e1..bc76e4754becb 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mul.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mul.c @@ -469,3 +469,228 @@ svuint64_t test_svmul_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) // CHECK: ret %[[INTRINSIC]] return SVE_ACLE_FUNC(svmul,_n_u64,_x,)(pg, op1, op2); } + +svfloat16_t test_svmul_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmul_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svmul_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmul_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svmul_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmul_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svmul_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmul_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svmul_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmul_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svmul_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmul_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svmul_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmul_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svmul_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmul_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svmul_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmul_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svmul_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmul_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svmul_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmul_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svmul_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmul_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svmul_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmul_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svmul_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmul_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svmul_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmul_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svmul_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmul_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svmul_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmul_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svmul_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmul_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul,_n_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svmul_lane_f16(svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmul_lane_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.lane.nxv8f16( %op1, %op2, i32 0) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul_lane,_f16,,)(op1, op2, 0); +} + +svfloat16_t test_svmul_lane_f16_1(svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmul_lane_f16_1 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.lane.nxv8f16( %op1, %op2, i32 7) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul_lane,_f16,,)(op1, op2, 7); +} + +svfloat32_t test_svmul_lane_f32(svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmul_lane_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.lane.nxv4f32( %op1, %op2, i32 0) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul_lane,_f32,,)(op1, op2, 0); +} + +svfloat32_t test_svmul_lane_f32_1(svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmul_lane_f32_1 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.lane.nxv4f32( %op1, %op2, i32 3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul_lane,_f32,,)(op1, op2, 3); +} + +svfloat64_t test_svmul_lane_f64(svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmul_lane_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.lane.nxv2f64( %op1, %op2, i32 0) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul_lane,_f64,,)(op1, op2, 0); +} + +svfloat64_t test_svmul_lane_f64_1(svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmul_lane_f64_1 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmul.lane.nxv2f64( %op1, %op2, i32 1) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmul_lane,_f64,,)(op1, op2, 1); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mulx.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mulx.c new file mode 100644 index 0000000000000..e485d8a720fe8 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mulx.c @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svmulx_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmulx_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svmulx_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmulx_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svmulx_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmulx_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svmulx_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmulx_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svmulx_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmulx_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svmulx_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmulx_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svmulx_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svmulx_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svmulx_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svmulx_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svmulx_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svmulx_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svmulx_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmulx_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svmulx_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmulx_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svmulx_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmulx_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svmulx_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmulx_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svmulx_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmulx_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svmulx_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmulx_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svmulx_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svmulx_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svmulx_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svmulx_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svmulx_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svmulx_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmulx.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmulx,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nand.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nand.c new file mode 100644 index 0000000000000..e238dfb240f46 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nand.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svnand_b_z(svbool_t pg, svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svnand_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.nand.z.nxv16i1( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnand,_b,_z,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmad.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmad.c new file mode 100644 index 0000000000000..b9f69b09753aa --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmad.c @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svnmad_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmad_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmad_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmad_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv4f32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmad_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmad_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv2f64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmad_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmad_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmad_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmad_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmad_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmad_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmad_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmad_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmad_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmad_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmad_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmad_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_f64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmad_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmad_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv8f16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_n_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmad_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmad_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv4f32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_n_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmad_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmad_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv2f64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_n_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmad_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmad_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_n_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmad_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmad_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_n_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmad_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmad_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_n_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmad_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmad_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_n_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmad_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmad_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_n_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmad_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmad_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmad.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmad,_n_f64,_x,)(pg, op1, op2, op3); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmla.c new file mode 100644 index 0000000000000..9cbaf6148e108 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmla.c @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svnmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmla_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmla_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv4f32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmla_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv2f64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmla_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmla_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmla_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmla_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmla_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmla_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_f64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmla_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmla_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv8f16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_n_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmla_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmla_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv4f32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_n_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmla_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmla_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv2f64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_n_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmla_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmla_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_n_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmla_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmla_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_n_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmla_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmla_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_n_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmla_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmla_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_n_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmla_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmla_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_n_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmla_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmla_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmla.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmla,_n_f64,_x,)(pg, op1, op2, op3); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmls.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmls.c new file mode 100644 index 0000000000000..d1936e716ddc0 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmls.c @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svnmls_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmls_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmls_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmls_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv4f32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmls_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmls_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv2f64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmls_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmls_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmls_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmls_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmls_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmls_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmls_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmls_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmls_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmls_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmls_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmls_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_f64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmls_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmls_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv8f16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_n_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmls_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmls_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv4f32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_n_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmls_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmls_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv2f64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_n_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmls_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmls_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_n_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmls_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmls_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_n_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmls_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmls_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_n_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmls_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmls_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_n_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmls_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmls_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_n_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmls_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmls_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmls.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmls,_n_f64,_x,)(pg, op1, op2, op3); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmsb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmsb.c new file mode 100644 index 0000000000000..df5e24ccd9dfb --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmsb.c @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svnmsb_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmsb_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv8f16( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmsb_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmsb_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv4f32( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmsb_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmsb_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv2f64( %[[PG]], %[[SEL]], %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmsb_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmsb_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmsb_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmsb_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmsb_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmsb_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmsb_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svnmsb_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv8f16( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmsb_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // CHECK-LABEL: test_svnmsb_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv4f32( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmsb_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // CHECK-LABEL: test_svnmsb_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv2f64( %[[PG]], %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_f64,_x,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmsb_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmsb_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv8f16( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_n_f16,_z,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmsb_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmsb_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv4f32( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_n_f32,_z,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmsb_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmsb_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv2f64( %[[PG]], %[[SEL]], %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_n_f64,_z,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmsb_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmsb_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_n_f16,_m,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmsb_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmsb_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_n_f32,_m,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmsb_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmsb_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_n_f64,_m,)(pg, op1, op2, op3); +} + +svfloat16_t test_svnmsb_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svnmsb_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv8f16( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_n_f16,_x,)(pg, op1, op2, op3); +} + +svfloat32_t test_svnmsb_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) +{ + // CHECK-LABEL: test_svnmsb_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv4f32( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_n_f32,_x,)(pg, op1, op2, op3); +} + +svfloat64_t test_svnmsb_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) +{ + // CHECK-LABEL: test_svnmsb_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fnmsb.nxv2f64( %[[PG]], %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnmsb,_n_f64,_x,)(pg, op1, op2, op3); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nor.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nor.c new file mode 100644 index 0000000000000..b22075b0b77de --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nor.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svnor_b_z(svbool_t pg, svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svnor_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.nor.z.nxv16i1( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnor,_b,_z,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c new file mode 100644 index 0000000000000..45703d895d3c9 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c @@ -0,0 +1,229 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svnot_s8_z(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svnot_s8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s8,_z,)(pg, op); +} + +svint16_t test_svnot_s16_z(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svnot_s16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s16,_z,)(pg, op); +} + +svint32_t test_svnot_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svnot_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s32,_z,)(pg, op); +} + +svint64_t test_svnot_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svnot_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s64,_z,)(pg, op); +} + +svuint8_t test_svnot_u8_z(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svnot_u8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u8,_z,)(pg, op); +} + +svuint16_t test_svnot_u16_z(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svnot_u16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u16,_z,)(pg, op); +} + +svuint32_t test_svnot_u32_z(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svnot_u32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u32,_z,)(pg, op); +} + +svuint64_t test_svnot_u64_z(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svnot_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u64,_z,)(pg, op); +} + +svint8_t test_svnot_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svnot_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s8,_m,)(inactive, pg, op); +} + +svint16_t test_svnot_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svnot_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s16,_m,)(inactive, pg, op); +} + +svint32_t test_svnot_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svnot_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s32,_m,)(inactive, pg, op); +} + +svint64_t test_svnot_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svnot_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s64,_m,)(inactive, pg, op); +} + +svuint8_t test_svnot_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svnot_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u8,_m,)(inactive, pg, op); +} + +svuint16_t test_svnot_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svnot_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u16,_m,)(inactive, pg, op); +} + +svuint32_t test_svnot_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svnot_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u32,_m,)(inactive, pg, op); +} + +svuint64_t test_svnot_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svnot_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u64,_m,)(inactive, pg, op); +} + +svint8_t test_svnot_s8_x(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svnot_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv16i8( undef, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s8,_x,)(pg, op); +} + +svint16_t test_svnot_s16_x(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svnot_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s16,_x,)(pg, op); +} + +svint32_t test_svnot_s32_x(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svnot_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s32,_x,)(pg, op); +} + +svint64_t test_svnot_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svnot_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_s64,_x,)(pg, op); +} + +svuint8_t test_svnot_u8_x(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svnot_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv16i8( undef, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u8,_x,)(pg, op); +} + +svuint16_t test_svnot_u16_x(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svnot_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u16,_x,)(pg, op); +} + +svuint32_t test_svnot_u32_x(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svnot_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u32,_x,)(pg, op); +} + +svuint64_t test_svnot_u64_x(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svnot_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.not.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_u64,_x,)(pg, op); +} + +svbool_t test_svnot_b_z(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svnot_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.z.nxv16i1( %pg, %op, %pg) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svnot,_b,_z,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orn.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orn.c new file mode 100644 index 0000000000000..6c227bc7c03dd --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orn.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svorn_b_z(svbool_t pg, svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svorn_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orn.z.nxv16i1( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorn,_b,_z,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orr.c new file mode 100644 index 0000000000000..2e5bfad0b80ec --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orr.c @@ -0,0 +1,479 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svorr_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svorr_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svorr_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svorr_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svorr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svorr_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s32,_z,)(pg, op1, op2); +} + +svint64_t test_svorr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svorr_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s64,_z,)(pg, op1, op2); +} + +svuint8_t test_svorr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svorr_u8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_svorr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svorr_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_svorr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svorr_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u32,_z,)(pg, op1, op2); +} + +svuint64_t test_svorr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svorr_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u64,_z,)(pg, op1, op2); +} + +svint8_t test_svorr_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svorr_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svorr_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svorr_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svorr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svorr_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s32,_m,)(pg, op1, op2); +} + +svint64_t test_svorr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svorr_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s64,_m,)(pg, op1, op2); +} + +svuint8_t test_svorr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svorr_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_svorr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svorr_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_svorr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svorr_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u32,_m,)(pg, op1, op2); +} + +svuint64_t test_svorr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svorr_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u64,_m,)(pg, op1, op2); +} + +svint8_t test_svorr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svorr_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svorr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svorr_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svorr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svorr_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s32,_x,)(pg, op1, op2); +} + +svint64_t test_svorr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svorr_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_s64,_x,)(pg, op1, op2); +} + +svuint8_t test_svorr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svorr_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_svorr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svorr_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_svorr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svorr_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u32,_x,)(pg, op1, op2); +} + +svuint64_t test_svorr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svorr_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_u64,_x,)(pg, op1, op2); +} + +svint8_t test_svorr_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_svorr_n_s8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svorr_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svorr_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svorr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svorr_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s32,_z,)(pg, op1, op2); +} + +svint64_t test_svorr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svorr_n_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s64,_z,)(pg, op1, op2); +} + +svuint8_t test_svorr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svorr_n_u8_z + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u8,_z,)(pg, op1, op2); +} + +svuint16_t test_svorr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svorr_n_u16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u16,_z,)(pg, op1, op2); +} + +svuint32_t test_svorr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svorr_n_u32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u32,_z,)(pg, op1, op2); +} + +svuint64_t test_svorr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svorr_n_u64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u64,_z,)(pg, op1, op2); +} + +svint8_t test_svorr_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_svorr_n_s8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svorr_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svorr_n_s16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s16,_m,)(pg, op1, op2); +} + +svint32_t test_svorr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svorr_n_s32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s32,_m,)(pg, op1, op2); +} + +svint64_t test_svorr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svorr_n_s64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s64,_m,)(pg, op1, op2); +} + +svuint8_t test_svorr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svorr_n_u8_m + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u8,_m,)(pg, op1, op2); +} + +svuint16_t test_svorr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svorr_n_u16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u16,_m,)(pg, op1, op2); +} + +svuint32_t test_svorr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svorr_n_u32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u32,_m,)(pg, op1, op2); +} + +svuint64_t test_svorr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svorr_n_u64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u64,_m,)(pg, op1, op2); +} + +svint8_t test_svorr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_svorr_n_s8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s8,_x,)(pg, op1, op2); +} + +svint16_t test_svorr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svorr_n_s16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s16,_x,)(pg, op1, op2); +} + +svint32_t test_svorr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svorr_n_s32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s32,_x,)(pg, op1, op2); +} + +svint64_t test_svorr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svorr_n_s64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_s64,_x,)(pg, op1, op2); +} + +svuint8_t test_svorr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svorr_n_u8_x + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv16i8( %pg, %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u8,_x,)(pg, op1, op2); +} + +svuint16_t test_svorr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svorr_n_u16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv8i16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u16,_x,)(pg, op1, op2); +} + +svuint32_t test_svorr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svorr_n_u32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv4i32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u32,_x,)(pg, op1, op2); +} + +svuint64_t test_svorr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svorr_n_u64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.nxv2i64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_n_u64,_x,)(pg, op1, op2); +} + +svbool_t test_svorr_b_z(svbool_t pg, svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svorr_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.orr.z.nxv16i1( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorr,_b,_z,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orv.c new file mode 100644 index 0000000000000..8a512ef8ffe2a --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orv.c @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +int8_t test_svorv_s8(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svorv_s8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.orv.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorv,_s8,,)(pg, op); +} + +int16_t test_svorv_s16(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svorv_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.orv.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorv,_s16,,)(pg, op); +} + +int32_t test_svorv_s32(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svorv_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.orv.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorv,_s32,,)(pg, op); +} + +int64_t test_svorv_s64(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svorv_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.orv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorv,_s64,,)(pg, op); +} + +uint8_t test_svorv_u8(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svorv_u8 + // CHECK: %[[INTRINSIC:.*]] = call i8 @llvm.aarch64.sve.orv.nxv16i8( %pg, %op) + // CHECK: ret i8 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorv,_u8,,)(pg, op); +} + +uint16_t test_svorv_u16(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svorv_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i16 @llvm.aarch64.sve.orv.nxv8i16( %[[PG]], %op) + // CHECK: ret i16 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorv,_u16,,)(pg, op); +} + +uint32_t test_svorv_u32(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svorv_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.orv.nxv4i32( %[[PG]], %op) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorv,_u32,,)(pg, op); +} + +uint64_t test_svorv_u64(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svorv_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.orv.nxv2i64( %[[PG]], %op) + // CHECK: ret i64 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svorv,_u64,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfirst.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfirst.c new file mode 100644 index 0000000000000..b8b2b2d725d02 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfirst.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svpfirst_b(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svpfirst_b + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.pfirst.nxv16i1( %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svpfirst,_b,,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pnext.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pnext.c new file mode 100644 index 0000000000000..2bc17afe12334 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pnext.c @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +svbool_t test_svpnext_b8(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svpnext_b8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.pnext.nxv16i1( %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return svpnext_b8(pg, op); +} + +svbool_t test_svpnext_b16(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svpnext_b16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.pnext.nxv8i1( %[[PG]], %[[OP]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svpnext_b16(pg, op); +} + +svbool_t test_svpnext_b32(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svpnext_b32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.pnext.nxv4i1( %[[PG]], %[[OP]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svpnext_b32(pg, op); +} + +svbool_t test_svpnext_b64(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svpnext_b64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.pnext.nxv2i1( %[[PG]], %[[OP]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svpnext_b64(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c index f29cb995230ce..a361d3ab7d084 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c @@ -102,3 +102,76 @@ void test_svprfb_vnum(svbool_t pg, const void *base, int64_t vnum) // CHECK: @llvm.aarch64.sve.prf.nxv16i1( %pg, i8* %[[GEP]], i32 0) return svprfb_vnum(pg, base, vnum, SV_PLDL1KEEP); } + +void test_svprfb_gather_u32base(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfb_gather_u32base + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32( %[[PG]], %bases, i64 0, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfb_gather,_u32base,,)(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfb_gather_u64base(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfb_gather_u64base + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64( %[[PG]], %bases, i64 0, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfb_gather,_u64base,,)(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfb_gather_s32offset(svbool_t pg, const void *base, svint32_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_s32offset + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nxv4i32( %[[PG]], i8* %base, %offsets, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfb_gather_,s32,offset,)(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_s64offset(svbool_t pg, const void *base, svint64_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_s64offset + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64( %[[PG]], i8* %base, %offsets, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfb_gather_,s64,offset,)(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_u32offset(svbool_t pg, const void *base, svuint32_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_u32offset + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nxv4i32( %[[PG]], i8* %base, %offsets, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfb_gather_,u32,offset,)(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_u64offset(svbool_t pg, const void *base, svuint64_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_u64offset + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64( %[[PG]], i8* %base, %offsets, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfb_gather_,u64,offset,)(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_u32base_offset(svbool_t pg, svuint32_t bases, int64_t offset) +{ + // CHECK-LABEL: test_svprfb_gather_u32base_offset + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32( %[[PG]], %bases, i64 %offset, i32 0) + // CHECK: ret void + return svprfb_gather_u32base_offset(pg, bases, offset, SV_PLDL1KEEP); + return SVE_ACLE_FUNC(svprfb_gather,_u32base,_offset,)(pg, bases, offset, SV_PLDL1KEEP); +} + +void test_svprfb_gather_u64base_offset(svbool_t pg, svuint64_t bases, int64_t offset) +{ + // CHECK-LABEL: test_svprfb_gather_u64base_offset + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64( %[[PG]], %bases, i64 %offset, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfb_gather,_u64base,_offset,)(pg, bases, offset, SV_PLDL1KEEP); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c index 59d3fbfb7e1b3..b7dde06e397fa 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c @@ -116,3 +116,77 @@ void test_svprfd_vnum(svbool_t pg, const void *base, int64_t vnum) // CHECK: @llvm.aarch64.sve.prf.nxv2i1( %[[PG]], i8* %[[I8_BASE]], i32 0) return svprfd_vnum(pg, base, vnum, SV_PLDL1KEEP); } + +void test_svprfd_gather_u32base(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfd_gather_u32base + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32( %[[PG]], %bases, i64 0, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfd_gather,_u32base,,)(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfd_gather_u64base(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfd_gather_u64base + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64( %[[PG]], %bases, i64 0, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfd_gather,_u64base,,)(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfd_gather_s32index(svbool_t pg, const void *base, svint32_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_s32index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nxv4i32( %[[PG]], i8* %base, %indices, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfd_gather_,s32,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_s64index(svbool_t pg, const void *base, svint64_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_s64index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64( %[[PG]], i8* %base, %indices, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfd_gather_,s64,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_u32index(svbool_t pg, const void *base, svuint32_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_u32index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nxv4i32( %[[PG]], i8* %base, %indices, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfd_gather_,u32,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_u64index(svbool_t pg, const void *base, svuint64_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_u64index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64( %[[PG]], i8* %base, %indices, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfd_gather_,u64,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfd_gather_u32base_index + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3 + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32( %[[PG]], %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfd_gather,_u32base,_index,)(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfd_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfd_gather_u64base_index + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3 + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64( %[[PG]], %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfd_gather,_u64base,_index,)(pg, bases, index, SV_PLDL1KEEP); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c index a6290f3f2f0c8..e988448ad93e2 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c @@ -116,3 +116,75 @@ void test_svprfh_vnum(svbool_t pg, const void *base, int64_t vnum) // CHECK: @llvm.aarch64.sve.prf.nxv8i1( %[[PG]], i8* %[[I8_BASE]], i32 0) return svprfh_vnum(pg, base, vnum, SV_PLDL1KEEP); } + +void test_svprfh_gather_u32base(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfh_gather_u32base + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32( %[[PG]], %bases, i64 0, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfh_gather,_u32base,,)(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfh_gather_u64base(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfh_gather_u64base + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64( %[[PG]], %bases, i64 0, i32 0) + return SVE_ACLE_FUNC(svprfh_gather,_u64base,,)(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfh_gather_s32index(svbool_t pg, const void *base, svint32_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_s32index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nxv4i32( %[[PG]], i8* %base, %indices, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfh_gather_,s32,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_s64index(svbool_t pg, const void *base, svint64_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_s64index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64( %[[PG]], i8* %base, %indices, i32 0) + return SVE_ACLE_FUNC(svprfh_gather_,s64,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_u32index(svbool_t pg, const void *base, svuint32_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_u32index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nxv4i32( %[[PG]], i8* %base, %indices, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfh_gather_,u32,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_u64index(svbool_t pg, const void *base, svuint64_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_u64index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64( %[[PG]], i8* %base, %indices, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfh_gather_,u64,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfh_gather_u32base_index + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1 + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32( %[[PG]], %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfh_gather,_u32base,_index,)(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfh_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfh_gather_u64base_index + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1 + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64( %[[PG]], %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfh_gather,_u64base,_index,)(pg, bases, index, SV_PLDL1KEEP); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c index e6b1b603e475d..d71767cad882c 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c @@ -116,3 +116,77 @@ void test_svprfw_vnum(svbool_t pg, const void *base, int64_t vnum) // CHECK: @llvm.aarch64.sve.prf.nxv4i1( %[[PG]], i8* %[[I8_BASE]], i32 0) return svprfw_vnum(pg, base, vnum, SV_PLDL1KEEP); } + +void test_svprfw_gather_u32base(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfw_gather_u32base + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32( %[[PG]], %bases, i64 0, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfw_gather,_u32base,,)(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfw_gather_u64base(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfw_gather_u64base + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64( %[[PG]], %bases, i64 0, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfw_gather,_u64base,,)(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfw_gather_s32index(svbool_t pg, const void *base, svint32_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_s32index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nxv4i32( %[[PG]], i8* %base, %indices, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfw_gather_,s32,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_s64index(svbool_t pg, const void *base, svint64_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_s64index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64( %[[PG]], i8* %base, %indices, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfw_gather_,s64,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_u32index(svbool_t pg, const void *base, svuint32_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_u32index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nxv4i32( %[[PG]], i8* %base, %indices, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfw_gather_,u32,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_u64index(svbool_t pg, const void *base, svuint64_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_u64index + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64( %[[PG]], i8* %base, %indices, i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfw_gather_,u64,index,)(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfw_gather_u32base_index + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2 + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32( %[[PG]], %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfw_gather,_u32base,_index,)(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfw_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfw_gather_u64base_index + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2 + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64( %[[PG]], %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return SVE_ACLE_FUNC(svprfw_gather,_u64base,_index,)(pg, bases, index, SV_PLDL1KEEP); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptest.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptest.c new file mode 100644 index 0000000000000..92df211e3d74d --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptest.c @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +bool test_svptest_any(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svptest_any + // CHECK: %[[INTRINSIC:.*]] = call i1 @llvm.aarch64.sve.ptest.any{{(.nxv16i1)?}}( %pg, %op) + // CHECK: ret i1 %[[INTRINSIC]] + return svptest_any(pg, op); +} + +bool test_svptest_first(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svptest_first + // CHECK: %[[INTRINSIC:.*]] = call i1 @llvm.aarch64.sve.ptest.first{{(.nxv16i1)?}}( %pg, %op) + // CHECK: ret i1 %[[INTRINSIC]] + return svptest_first(pg, op); +} + +bool test_svptest_last(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svptest_last + // CHECK: %[[INTRINSIC:.*]] = call i1 @llvm.aarch64.sve.ptest.last{{(.nxv16i1)?}}( %pg, %op) + // CHECK: ret i1 %[[INTRINSIC]] + return svptest_last(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rbit.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rbit.c new file mode 100644 index 0000000000000..907360a5e7c5b --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rbit.c @@ -0,0 +1,221 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svrbit_s8_z(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svrbit_s8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s8,_z,)(pg, op); +} + +svint16_t test_svrbit_s16_z(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svrbit_s16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s16,_z,)(pg, op); +} + +svint32_t test_svrbit_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svrbit_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s32,_z,)(pg, op); +} + +svint64_t test_svrbit_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrbit_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s64,_z,)(pg, op); +} + +svuint8_t test_svrbit_u8_z(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svrbit_u8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u8,_z,)(pg, op); +} + +svuint16_t test_svrbit_u16_z(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svrbit_u16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u16,_z,)(pg, op); +} + +svuint32_t test_svrbit_u32_z(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svrbit_u32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u32,_z,)(pg, op); +} + +svuint64_t test_svrbit_u64_z(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrbit_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u64,_z,)(pg, op); +} + +svint8_t test_svrbit_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svrbit_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s8,_m,)(inactive, pg, op); +} + +svint16_t test_svrbit_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svrbit_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s16,_m,)(inactive, pg, op); +} + +svint32_t test_svrbit_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svrbit_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s32,_m,)(inactive, pg, op); +} + +svint64_t test_svrbit_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrbit_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s64,_m,)(inactive, pg, op); +} + +svuint8_t test_svrbit_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svrbit_u8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u8,_m,)(inactive, pg, op); +} + +svuint16_t test_svrbit_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svrbit_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u16,_m,)(inactive, pg, op); +} + +svuint32_t test_svrbit_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svrbit_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u32,_m,)(inactive, pg, op); +} + +svuint64_t test_svrbit_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrbit_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u64,_m,)(inactive, pg, op); +} + +svint8_t test_svrbit_s8_x(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svrbit_s8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv16i8( undef, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s8,_x,)(pg, op); +} + +svint16_t test_svrbit_s16_x(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svrbit_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s16,_x,)(pg, op); +} + +svint32_t test_svrbit_s32_x(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svrbit_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s32,_x,)(pg, op); +} + +svint64_t test_svrbit_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrbit_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_s64,_x,)(pg, op); +} + +svuint8_t test_svrbit_u8_x(svbool_t pg, svuint8_t op) +{ + // CHECK-LABEL: test_svrbit_u8_x + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv16i8( undef, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u8,_x,)(pg, op); +} + +svuint16_t test_svrbit_u16_x(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svrbit_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u16,_x,)(pg, op); +} + +svuint32_t test_svrbit_u32_x(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svrbit_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u32,_x,)(pg, op); +} + +svuint64_t test_svrbit_u64_x(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrbit_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rbit.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrbit,_u64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpe.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpe.c new file mode 100644 index 0000000000000..f3ee6f4769ae9 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpe.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrecpe_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svrecpe_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpe.x.nxv8f16( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpe,_f16,,)(op); +} + +svfloat32_t test_svrecpe_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svrecpe_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpe.x.nxv4f32( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpe,_f32,,)(op); +} + +svfloat64_t test_svrecpe_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svrecpe_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpe.x.nxv2f64( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpe,_f64,,)(op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recps.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recps.c new file mode 100644 index 0000000000000..de13a02c64d4a --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recps.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrecps_f16(svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svrecps_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecps.x.nxv8f16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecps,_f16,,)(op1, op2); +} + +svfloat32_t test_svrecps_f32(svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svrecps_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecps.x.nxv4f32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecps,_f32,,)(op1, op2); +} + +svfloat64_t test_svrecps_f64(svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svrecps_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecps.x.nxv2f64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecps,_f64,,)(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpx.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpx.c new file mode 100644 index 0000000000000..7666db7a0d2a2 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpx.c @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrecpx_f16_z(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrecpx_f16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpx.nxv8f16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpx,_f16,_z,)(pg, op); +} + +svfloat32_t test_svrecpx_f32_z(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrecpx_f32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpx.nxv4f32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpx,_f32,_z,)(pg, op); +} + +svfloat64_t test_svrecpx_f64_z(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrecpx_f64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpx.nxv2f64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpx,_f64,_z,)(pg, op); +} + +svfloat16_t test_svrecpx_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrecpx_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpx.nxv8f16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpx,_f16,_m,)(inactive, pg, op); +} + +svfloat32_t test_svrecpx_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrecpx_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpx.nxv4f32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpx,_f32,_m,)(inactive, pg, op); +} + +svfloat64_t test_svrecpx_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrecpx_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpx.nxv2f64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpx,_f64,_m,)(inactive, pg, op); +} + +svfloat16_t test_svrecpx_f16_x(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrecpx_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpx.nxv8f16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpx,_f16,_x,)(pg, op); +} + +svfloat32_t test_svrecpx_f32_x(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrecpx_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpx.nxv4f32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpx,_f32,_x,)(pg, op); +} + +svfloat64_t test_svrecpx_f64_x(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrecpx_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frecpx.nxv2f64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrecpx,_f64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c new file mode 100644 index 0000000000000..e33a7e0a8504a --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c @@ -0,0 +1,960 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svreinterpret_s8_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_s8 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_s8,_s8,,)(op); +} + +svint8_t test_svreinterpret_s8_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_s16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_s16,,)(op); +} + +svint8_t test_svreinterpret_s8_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_s32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_s32,,)(op); +} + +svint8_t test_svreinterpret_s8_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_s64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_s64,,)(op); +} + +svint8_t test_svreinterpret_s8_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_u8 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_s8,_u8,,)(op); +} + +svint8_t test_svreinterpret_s8_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_u16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_u16,,)(op); +} + +svint8_t test_svreinterpret_s8_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_u32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_u32,,)(op); +} + +svint8_t test_svreinterpret_s8_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_u64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_u64,,)(op); +} + +svint8_t test_svreinterpret_s8_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_f16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_f16,,)(op); +} + +svint8_t test_svreinterpret_s8_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_f32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_f32,,)(op); +} + +svint8_t test_svreinterpret_s8_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_f64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_f64,,)(op); +} + +svint16_t test_svreinterpret_s16_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_s16_s8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s16,_s8,,)(op); +} + +svint16_t test_svreinterpret_s16_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s16_s16 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_s16,_s16,,)(op); +} + +svint16_t test_svreinterpret_s16_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s16_s32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s16,_s32,,)(op); +} + +svint16_t test_svreinterpret_s16_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s16_s64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s16,_s64,,)(op); +} + +svint16_t test_svreinterpret_s16_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_s16_u8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s16,_u8,,)(op); +} + +svint16_t test_svreinterpret_s16_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s16_u16 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_s16,_u16,,)(op); +} + +svint16_t test_svreinterpret_s16_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s16_u32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s16,_u32,,)(op); +} + +svint16_t test_svreinterpret_s16_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s16_u64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s16,_u64,,)(op); +} + +svint16_t test_svreinterpret_s16_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s16_f16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s16,_f16,,)(op); +} + +svint16_t test_svreinterpret_s16_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s16_f32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s16,_f32,,)(op); +} + +svint16_t test_svreinterpret_s16_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s16_f64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s16,_f64,,)(op); +} + +svint32_t test_svreinterpret_s32_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_s32_s8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s32,_s8,,)(op); +} + +svint32_t test_svreinterpret_s32_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s32_s16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s32,_s16,,)(op); +} + +svint32_t test_svreinterpret_s32_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s32_s32 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_s32,_s32,,)(op); +} + +svint32_t test_svreinterpret_s32_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s32_s64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s32,_s64,,)(op); +} + +svint32_t test_svreinterpret_s32_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_s32_u8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s32,_u8,,)(op); +} + +svint32_t test_svreinterpret_s32_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s32_u16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s32,_u16,,)(op); +} + +svint32_t test_svreinterpret_s32_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s32_u32 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_s32,_u32,,)(op); +} + +svint32_t test_svreinterpret_s32_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s32_u64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s32,_u64,,)(op); +} + +svint32_t test_svreinterpret_s32_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s32_f16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s32,_f16,,)(op); +} + +svint32_t test_svreinterpret_s32_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s32_f32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s32,_f32,,)(op); +} + +svint32_t test_svreinterpret_s32_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s32_f64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s32,_f64,,)(op); +} + +svint64_t test_svreinterpret_s64_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_s64_s8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s64,_s8,,)(op); +} + +svint64_t test_svreinterpret_s64_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s64_s16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s64,_s16,,)(op); +} + +svint64_t test_svreinterpret_s64_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s64_s32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s64,_s32,,)(op); +} + +svint64_t test_svreinterpret_s64_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s64_s64 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_s64,_s64,,)(op); +} + +svint64_t test_svreinterpret_s64_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_s64_u8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s64,_u8,,)(op); +} + +svint64_t test_svreinterpret_s64_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s64_u16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s64,_u16,,)(op); +} + +svint64_t test_svreinterpret_s64_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s64_u32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s64,_u32,,)(op); +} + +svint64_t test_svreinterpret_s64_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s64_u64 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_s64,_u64,,)(op); +} + +svint64_t test_svreinterpret_s64_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s64_f16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s64,_f16,,)(op); +} + +svint64_t test_svreinterpret_s64_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s64_f32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s64,_f32,,)(op); +} + +svint64_t test_svreinterpret_s64_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s64_f64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s64,_f64,,)(op); +} + +svuint8_t test_svreinterpret_u8_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_u8_s8 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_u8,_s8,,)(op); +} + +svuint8_t test_svreinterpret_u8_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u8_s16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u8,_s16,,)(op); +} + +svuint8_t test_svreinterpret_u8_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u8_s32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u8,_s32,,)(op); +} + +svuint8_t test_svreinterpret_u8_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u8_s64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u8,_s64,,)(op); +} + +svuint8_t test_svreinterpret_u8_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_u8_u8 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_u8,_u8,,)(op); +} + +svuint8_t test_svreinterpret_u8_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u8_u16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u8,_u16,,)(op); +} + +svuint8_t test_svreinterpret_u8_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u8_u32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u8,_u32,,)(op); +} + +svuint8_t test_svreinterpret_u8_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u8_u64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u8,_u64,,)(op); +} + +svuint8_t test_svreinterpret_u8_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u8_f16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u8,_f16,,)(op); +} + +svuint8_t test_svreinterpret_u8_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u8_f32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u8,_f32,,)(op); +} + +svuint8_t test_svreinterpret_u8_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u8_f64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u8,_f64,,)(op); +} + +svuint16_t test_svreinterpret_u16_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_u16_s8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u16,_s8,,)(op); +} + +svuint16_t test_svreinterpret_u16_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u16_s16 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_u16,_s16,,)(op); +} + +svuint16_t test_svreinterpret_u16_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u16_s32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u16,_s32,,)(op); +} + +svuint16_t test_svreinterpret_u16_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u16_s64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u16,_s64,,)(op); +} + +svuint16_t test_svreinterpret_u16_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_u16_u8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u16,_u8,,)(op); +} + +svuint16_t test_svreinterpret_u16_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u16_u16 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_u16,_u16,,)(op); +} + +svuint16_t test_svreinterpret_u16_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u16_u32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u16,_u32,,)(op); +} + +svuint16_t test_svreinterpret_u16_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u16_u64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u16,_u64,,)(op); +} + +svuint16_t test_svreinterpret_u16_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u16_f16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u16,_f16,,)(op); +} + +svuint16_t test_svreinterpret_u16_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u16_f32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u16,_f32,,)(op); +} + +svuint16_t test_svreinterpret_u16_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u16_f64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u16,_f64,,)(op); +} + +svuint32_t test_svreinterpret_u32_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_u32_s8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u32,_s8,,)(op); +} + +svuint32_t test_svreinterpret_u32_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u32_s16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u32,_s16,,)(op); +} + +svuint32_t test_svreinterpret_u32_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u32_s32 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_u32,_s32,,)(op); +} + +svuint32_t test_svreinterpret_u32_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u32_s64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u32,_s64,,)(op); +} + +svuint32_t test_svreinterpret_u32_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_u32_u8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u32,_u8,,)(op); +} + +svuint32_t test_svreinterpret_u32_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u32_u16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u32,_u16,,)(op); +} + +svuint32_t test_svreinterpret_u32_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u32_u32 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_u32,_u32,,)(op); +} + +svuint32_t test_svreinterpret_u32_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u32_u64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u32,_u64,,)(op); +} + +svuint32_t test_svreinterpret_u32_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u32_f16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u32,_f16,,)(op); +} + +svuint32_t test_svreinterpret_u32_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u32_f32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u32,_f32,,)(op); +} + +svuint32_t test_svreinterpret_u32_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u32_f64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u32,_f64,,)(op); +} + +svuint64_t test_svreinterpret_u64_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_u64_s8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u64,_s8,,)(op); +} + +svuint64_t test_svreinterpret_u64_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u64_s16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u64,_s16,,)(op); +} + +svuint64_t test_svreinterpret_u64_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u64_s32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u64,_s32,,)(op); +} + +svuint64_t test_svreinterpret_u64_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u64_s64 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_u64,_s64,,)(op); +} + +svuint64_t test_svreinterpret_u64_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_u64_u8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u64,_u8,,)(op); +} + +svuint64_t test_svreinterpret_u64_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u64_u16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u64,_u16,,)(op); +} + +svuint64_t test_svreinterpret_u64_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u64_u32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u64,_u32,,)(op); +} + +svuint64_t test_svreinterpret_u64_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u64_u64 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_u64,_u64,,)(op); +} + +svuint64_t test_svreinterpret_u64_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svreinterpret_u64_f16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u64,_f16,,)(op); +} + +svuint64_t test_svreinterpret_u64_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svreinterpret_u64_f32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u64,_f32,,)(op); +} + +svuint64_t test_svreinterpret_u64_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svreinterpret_u64_f64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u64,_f64,,)(op); +} + +svfloat16_t test_svreinterpret_f16_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_f16_s8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f16,_s8,,)(op); +} + +svfloat16_t test_svreinterpret_f16_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_f16_s16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f16,_s16,,)(op); +} + +svfloat16_t test_svreinterpret_f16_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_f16_s32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f16,_s32,,)(op); +} + +svfloat16_t test_svreinterpret_f16_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_f16_s64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f16,_s64,,)(op); +} + +svfloat16_t test_svreinterpret_f16_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_f16_u8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f16,_u8,,)(op); +} + +svfloat16_t test_svreinterpret_f16_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_f16_u16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f16,_u16,,)(op); +} + +svfloat16_t test_svreinterpret_f16_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_f16_u32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f16,_u32,,)(op); +} + +svfloat16_t test_svreinterpret_f16_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_f16_u64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f16,_u64,,)(op); +} + +svfloat16_t test_svreinterpret_f16_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svreinterpret_f16_f16 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_f16,_f16,,)(op); +} + +svfloat16_t test_svreinterpret_f16_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svreinterpret_f16_f32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f16,_f32,,)(op); +} + +svfloat16_t test_svreinterpret_f16_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svreinterpret_f16_f64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f16,_f64,,)(op); +} + +svfloat32_t test_svreinterpret_f32_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_f32_s8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f32,_s8,,)(op); +} + +svfloat32_t test_svreinterpret_f32_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_f32_s16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f32,_s16,,)(op); +} + +svfloat32_t test_svreinterpret_f32_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_f32_s32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f32,_s32,,)(op); +} + +svfloat32_t test_svreinterpret_f32_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_f32_s64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f32,_s64,,)(op); +} + +svfloat32_t test_svreinterpret_f32_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_f32_u8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f32,_u8,,)(op); +} + +svfloat32_t test_svreinterpret_f32_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_f32_u16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f32,_u16,,)(op); +} + +svfloat32_t test_svreinterpret_f32_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_f32_u32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f32,_u32,,)(op); +} + +svfloat32_t test_svreinterpret_f32_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_f32_u64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f32,_u64,,)(op); +} + +svfloat32_t test_svreinterpret_f32_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svreinterpret_f32_f16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f32,_f16,,)(op); +} + +svfloat32_t test_svreinterpret_f32_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svreinterpret_f32_f32 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_f32,_f32,,)(op); +} + +svfloat32_t test_svreinterpret_f32_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svreinterpret_f32_f64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f32,_f64,,)(op); +} + +svfloat64_t test_svreinterpret_f64_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_f64_s8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f64,_s8,,)(op); +} + +svfloat64_t test_svreinterpret_f64_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_f64_s16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f64,_s16,,)(op); +} + +svfloat64_t test_svreinterpret_f64_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_f64_s32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f64,_s32,,)(op); +} + +svfloat64_t test_svreinterpret_f64_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_f64_s64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f64,_s64,,)(op); +} + +svfloat64_t test_svreinterpret_f64_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_f64_u8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f64,_u8,,)(op); +} + +svfloat64_t test_svreinterpret_f64_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_f64_u16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f64,_u16,,)(op); +} + +svfloat64_t test_svreinterpret_f64_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_f64_u32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f64,_u32,,)(op); +} + +svfloat64_t test_svreinterpret_f64_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_f64_u64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f64,_u64,,)(op); +} + +svfloat64_t test_svreinterpret_f64_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svreinterpret_f64_f16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f64,_f16,,)(op); +} + +svfloat64_t test_svreinterpret_f64_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svreinterpret_f64_f32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f64,_f32,,)(op); +} + +svfloat64_t test_svreinterpret_f64_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svreinterpret_f64_f64 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_f64,_f64,,)(op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c new file mode 100644 index 0000000000000..fa275a8fab4ff --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c @@ -0,0 +1,137 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svrev_s8(svint8_t op) +{ + // CHECK-LABEL: test_svrev_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv16i8( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrev,_s8,,)(op); +} + +svint16_t test_svrev_s16(svint16_t op) +{ + // CHECK-LABEL: test_svrev_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv8i16( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrev,_s16,,)(op); +} + +svint32_t test_svrev_s32(svint32_t op) +{ + // CHECK-LABEL: test_svrev_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv4i32( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrev,_s32,,)(op); +} + +svint64_t test_svrev_s64(svint64_t op) +{ + // CHECK-LABEL: test_svrev_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv2i64( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrev,_s64,,)(op); +} + +svuint8_t test_svrev_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svrev_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv16i8( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrev,_u8,,)(op); +} + +svuint16_t test_svrev_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svrev_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv8i16( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrev,_u16,,)(op); +} + +svuint32_t test_svrev_u32(svuint32_t op) +{ + // CHECK-LABEL: test_svrev_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv4i32( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrev,_u32,,)(op); +} + +svuint64_t test_svrev_u64(svuint64_t op) +{ + // CHECK-LABEL: test_svrev_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv2i64( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrev,_u64,,)(op); +} + +svfloat16_t test_svrev_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svrev_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv8f16( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrev,_f16,,)(op); +} + +svfloat32_t test_svrev_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svrev_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv4f32( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrev,_f32,,)(op); +} + +svfloat64_t test_svrev_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svrev_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv2f64( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrev,_f64,,)(op); +} + +svbool_t test_svrev_b8(svbool_t op) +{ + // CHECK-LABEL: test_svrev_b8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv16i1( %op) + // CHECK: ret %[[INTRINSIC]] + return svrev_b8(op); +} + +svbool_t test_svrev_b16(svbool_t op) +{ + // CHECK-LABEL: test_svrev_b16 + // CHECK: %[[OP:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv8i1( %[[OP]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svrev_b16(op); +} + +svbool_t test_svrev_b32(svbool_t op) +{ + // CHECK-LABEL: test_svrev_b32 + // CHECK: %[[OP:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv4i1( %[[OP]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svrev_b32(op); +} + +svbool_t test_svrev_b64(svbool_t op) +{ + // CHECK-LABEL: test_svrev_b64 + // CHECK: %[[OP:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rev.nxv2i1( %[[OP]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svrev_b64(op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revb.c new file mode 100644 index 0000000000000..2aaf2dc8644fa --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revb.c @@ -0,0 +1,173 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint16_t test_svrevb_s16_z(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svrevb_s16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_s16,_z,)(pg, op); +} + +svint32_t test_svrevb_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svrevb_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_s32,_z,)(pg, op); +} + +svint64_t test_svrevb_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrevb_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_s64,_z,)(pg, op); +} + +svuint16_t test_svrevb_u16_z(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svrevb_u16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_u16,_z,)(pg, op); +} + +svuint32_t test_svrevb_u32_z(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svrevb_u32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_u32,_z,)(pg, op); +} + +svuint64_t test_svrevb_u64_z(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrevb_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_u64,_z,)(pg, op); +} + +svint16_t test_svrevb_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svrevb_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_s16,_m,)(inactive, pg, op); +} + +svint32_t test_svrevb_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svrevb_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_s32,_m,)(inactive, pg, op); +} + +svint64_t test_svrevb_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrevb_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_s64,_m,)(inactive, pg, op); +} + +svuint16_t test_svrevb_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svrevb_u16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_u16,_m,)(inactive, pg, op); +} + +svuint32_t test_svrevb_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svrevb_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_u32,_m,)(inactive, pg, op); +} + +svuint64_t test_svrevb_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrevb_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_u64,_m,)(inactive, pg, op); +} + +svint16_t test_svrevb_s16_x(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svrevb_s16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_s16,_x,)(pg, op); +} + +svint32_t test_svrevb_s32_x(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svrevb_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_s32,_x,)(pg, op); +} + +svint64_t test_svrevb_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrevb_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_s64,_x,)(pg, op); +} + +svuint16_t test_svrevb_u16_x(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svrevb_u16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv8i16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_u16,_x,)(pg, op); +} + +svuint32_t test_svrevb_u32_x(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svrevb_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_u32,_x,)(pg, op); +} + +svuint64_t test_svrevb_u64_x(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrevb_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revb.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevb,_u64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revh.c new file mode 100644 index 0000000000000..ac7d5c0b2545c --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revh.c @@ -0,0 +1,119 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint32_t test_svrevh_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svrevh_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_s32,_z,)(pg, op); +} + +svint64_t test_svrevh_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrevh_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_s64,_z,)(pg, op); +} + +svuint32_t test_svrevh_u32_z(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svrevh_u32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_u32,_z,)(pg, op); +} + +svuint64_t test_svrevh_u64_z(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrevh_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_u64,_z,)(pg, op); +} + +svint32_t test_svrevh_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svrevh_s32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_s32,_m,)(inactive, pg, op); +} + +svint64_t test_svrevh_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrevh_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_s64,_m,)(inactive, pg, op); +} + +svuint32_t test_svrevh_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svrevh_u32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv4i32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_u32,_m,)(inactive, pg, op); +} + +svuint64_t test_svrevh_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrevh_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_u64,_m,)(inactive, pg, op); +} + +svint32_t test_svrevh_s32_x(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svrevh_s32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_s32,_x,)(pg, op); +} + +svint64_t test_svrevh_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrevh_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_s64,_x,)(pg, op); +} + +svuint32_t test_svrevh_u32_x(svbool_t pg, svuint32_t op) +{ + // CHECK-LABEL: test_svrevh_u32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv4i32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_u32,_x,)(pg, op); +} + +svuint64_t test_svrevh_u64_x(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrevh_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revh.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevh,_u64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revw.c new file mode 100644 index 0000000000000..1d6d414b85b6e --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revw.c @@ -0,0 +1,65 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint64_t test_svrevw_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrevw_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revw.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevw,_s64,_z,)(pg, op); +} + +svuint64_t test_svrevw_u64_z(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrevw_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revw.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevw,_u64,_z,)(pg, op); +} + +svint64_t test_svrevw_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrevw_s64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revw.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevw,_s64,_m,)(inactive, pg, op); +} + +svuint64_t test_svrevw_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrevw_u64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revw.nxv2i64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevw,_u64,_m,)(inactive, pg, op); +} + +svint64_t test_svrevw_s64_x(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svrevw_s64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revw.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevw,_s64,_x,)(pg, op); +} + +svuint64_t test_svrevw_u64_x(svbool_t pg, svuint64_t op) +{ + // CHECK-LABEL: test_svrevw_u64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.revw.nxv2i64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrevw,_u64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinta.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinta.c new file mode 100644 index 0000000000000..cae1f3093a850 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinta.c @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrinta_f16_z(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrinta_f16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinta.nxv8f16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinta,_f16,_z,)(pg, op); +} + +svfloat32_t test_svrinta_f32_z(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrinta_f32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinta.nxv4f32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinta,_f32,_z,)(pg, op); +} + +svfloat64_t test_svrinta_f64_z(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrinta_f64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinta.nxv2f64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinta,_f64,_z,)(pg, op); +} + +svfloat16_t test_svrinta_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrinta_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinta.nxv8f16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinta,_f16,_m,)(inactive, pg, op); +} + +svfloat32_t test_svrinta_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrinta_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinta.nxv4f32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinta,_f32,_m,)(inactive, pg, op); +} + +svfloat64_t test_svrinta_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrinta_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinta.nxv2f64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinta,_f64,_m,)(inactive, pg, op); +} + +svfloat16_t test_svrinta_f16_x(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrinta_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinta.nxv8f16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinta,_f16,_x,)(pg, op); +} + +svfloat32_t test_svrinta_f32_x(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrinta_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinta.nxv4f32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinta,_f32,_x,)(pg, op); +} + +svfloat64_t test_svrinta_f64_x(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrinta_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinta.nxv2f64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinta,_f64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinti.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinti.c new file mode 100644 index 0000000000000..a4bf4f97bf4de --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinti.c @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrinti_f16_z(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrinti_f16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinti.nxv8f16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinti,_f16,_z,)(pg, op); +} + +svfloat32_t test_svrinti_f32_z(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrinti_f32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinti.nxv4f32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinti,_f32,_z,)(pg, op); +} + +svfloat64_t test_svrinti_f64_z(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrinti_f64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinti.nxv2f64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinti,_f64,_z,)(pg, op); +} + +svfloat16_t test_svrinti_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrinti_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinti.nxv8f16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinti,_f16,_m,)(inactive, pg, op); +} + +svfloat32_t test_svrinti_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrinti_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinti.nxv4f32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinti,_f32,_m,)(inactive, pg, op); +} + +svfloat64_t test_svrinti_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrinti_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinti.nxv2f64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinti,_f64,_m,)(inactive, pg, op); +} + +svfloat16_t test_svrinti_f16_x(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrinti_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinti.nxv8f16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinti,_f16,_x,)(pg, op); +} + +svfloat32_t test_svrinti_f32_x(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrinti_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinti.nxv4f32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinti,_f32,_x,)(pg, op); +} + +svfloat64_t test_svrinti_f64_x(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrinti_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frinti.nxv2f64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrinti,_f64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintm.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintm.c new file mode 100644 index 0000000000000..bf04d944f2107 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintm.c @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrintm_f16_z(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintm_f16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintm.nxv8f16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintm,_f16,_z,)(pg, op); +} + +svfloat32_t test_svrintm_f32_z(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintm_f32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintm.nxv4f32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintm,_f32,_z,)(pg, op); +} + +svfloat64_t test_svrintm_f64_z(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintm_f64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintm.nxv2f64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintm,_f64,_z,)(pg, op); +} + +svfloat16_t test_svrintm_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintm_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintm.nxv8f16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintm,_f16,_m,)(inactive, pg, op); +} + +svfloat32_t test_svrintm_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintm_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintm.nxv4f32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintm,_f32,_m,)(inactive, pg, op); +} + +svfloat64_t test_svrintm_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintm_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintm.nxv2f64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintm,_f64,_m,)(inactive, pg, op); +} + +svfloat16_t test_svrintm_f16_x(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintm_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintm.nxv8f16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintm,_f16,_x,)(pg, op); +} + +svfloat32_t test_svrintm_f32_x(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintm_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintm.nxv4f32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintm,_f32,_x,)(pg, op); +} + +svfloat64_t test_svrintm_f64_x(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintm_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintm.nxv2f64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintm,_f64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintn.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintn.c new file mode 100644 index 0000000000000..772a32bdd20dc --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintn.c @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrintn_f16_z(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintn_f16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintn.nxv8f16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintn,_f16,_z,)(pg, op); +} + +svfloat32_t test_svrintn_f32_z(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintn_f32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintn.nxv4f32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintn,_f32,_z,)(pg, op); +} + +svfloat64_t test_svrintn_f64_z(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintn_f64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintn.nxv2f64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintn,_f64,_z,)(pg, op); +} + +svfloat16_t test_svrintn_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintn_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintn.nxv8f16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintn,_f16,_m,)(inactive, pg, op); +} + +svfloat32_t test_svrintn_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintn_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintn.nxv4f32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintn,_f32,_m,)(inactive, pg, op); +} + +svfloat64_t test_svrintn_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintn_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintn.nxv2f64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintn,_f64,_m,)(inactive, pg, op); +} + +svfloat16_t test_svrintn_f16_x(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintn_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintn.nxv8f16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintn,_f16,_x,)(pg, op); +} + +svfloat32_t test_svrintn_f32_x(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintn_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintn.nxv4f32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintn,_f32,_x,)(pg, op); +} + +svfloat64_t test_svrintn_f64_x(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintn_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintn.nxv2f64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintn,_f64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintp.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintp.c new file mode 100644 index 0000000000000..f7a1bac06f786 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintp.c @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrintp_f16_z(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintp_f16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintp.nxv8f16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintp,_f16,_z,)(pg, op); +} + +svfloat32_t test_svrintp_f32_z(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintp_f32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintp.nxv4f32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintp,_f32,_z,)(pg, op); +} + +svfloat64_t test_svrintp_f64_z(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintp_f64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintp.nxv2f64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintp,_f64,_z,)(pg, op); +} + +svfloat16_t test_svrintp_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintp_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintp.nxv8f16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintp,_f16,_m,)(inactive, pg, op); +} + +svfloat32_t test_svrintp_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintp_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintp.nxv4f32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintp,_f32,_m,)(inactive, pg, op); +} + +svfloat64_t test_svrintp_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintp_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintp.nxv2f64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintp,_f64,_m,)(inactive, pg, op); +} + +svfloat16_t test_svrintp_f16_x(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintp_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintp.nxv8f16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintp,_f16,_x,)(pg, op); +} + +svfloat32_t test_svrintp_f32_x(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintp_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintp.nxv4f32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintp,_f32,_x,)(pg, op); +} + +svfloat64_t test_svrintp_f64_x(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintp_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintp.nxv2f64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintp,_f64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintx.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintx.c new file mode 100644 index 0000000000000..c2504fddbd788 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintx.c @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrintx_f16_z(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintx_f16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintx.nxv8f16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintx,_f16,_z,)(pg, op); +} + +svfloat32_t test_svrintx_f32_z(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintx_f32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintx.nxv4f32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintx,_f32,_z,)(pg, op); +} + +svfloat64_t test_svrintx_f64_z(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintx_f64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintx.nxv2f64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintx,_f64,_z,)(pg, op); +} + +svfloat16_t test_svrintx_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintx_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintx.nxv8f16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintx,_f16,_m,)(inactive, pg, op); +} + +svfloat32_t test_svrintx_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintx_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintx.nxv4f32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintx,_f32,_m,)(inactive, pg, op); +} + +svfloat64_t test_svrintx_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintx_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintx.nxv2f64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintx,_f64,_m,)(inactive, pg, op); +} + +svfloat16_t test_svrintx_f16_x(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintx_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintx.nxv8f16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintx,_f16,_x,)(pg, op); +} + +svfloat32_t test_svrintx_f32_x(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintx_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintx.nxv4f32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintx,_f32,_x,)(pg, op); +} + +svfloat64_t test_svrintx_f64_x(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintx_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintx.nxv2f64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintx,_f64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintz.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintz.c new file mode 100644 index 0000000000000..bf780ccc220eb --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintz.c @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrintz_f16_z(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintz_f16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintz.nxv8f16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintz,_f16,_z,)(pg, op); +} + +svfloat32_t test_svrintz_f32_z(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintz_f32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintz.nxv4f32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintz,_f32,_z,)(pg, op); +} + +svfloat64_t test_svrintz_f64_z(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintz_f64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintz.nxv2f64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintz,_f64,_z,)(pg, op); +} + +svfloat16_t test_svrintz_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintz_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintz.nxv8f16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintz,_f16,_m,)(inactive, pg, op); +} + +svfloat32_t test_svrintz_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintz_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintz.nxv4f32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintz,_f32,_m,)(inactive, pg, op); +} + +svfloat64_t test_svrintz_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintz_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintz.nxv2f64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintz,_f64,_m,)(inactive, pg, op); +} + +svfloat16_t test_svrintz_f16_x(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svrintz_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintz.nxv8f16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintz,_f16,_x,)(pg, op); +} + +svfloat32_t test_svrintz_f32_x(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svrintz_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintz.nxv4f32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintz,_f32,_x,)(pg, op); +} + +svfloat64_t test_svrintz_f64_x(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svrintz_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frintz.nxv2f64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrintz,_f64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrte.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrte.c new file mode 100644 index 0000000000000..466d2d527a3a6 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrte.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrsqrte_f16(svfloat16_t op) +{ + // CHECK-LABEL: test_svrsqrte_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frsqrte.x.nxv8f16( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrsqrte,_f16,,)(op); +} + +svfloat32_t test_svrsqrte_f32(svfloat32_t op) +{ + // CHECK-LABEL: test_svrsqrte_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frsqrte.x.nxv4f32( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrsqrte,_f32,,)(op); +} + +svfloat64_t test_svrsqrte_f64(svfloat64_t op) +{ + // CHECK-LABEL: test_svrsqrte_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frsqrte.x.nxv2f64( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrsqrte,_f64,,)(op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrts.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrts.c new file mode 100644 index 0000000000000..19f7d943ac196 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrts.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svrsqrts_f16(svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svrsqrts_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frsqrts.x.nxv8f16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrsqrts,_f16,,)(op1, op2); +} + +svfloat32_t test_svrsqrts_f32(svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svrsqrts_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frsqrts.x.nxv4f32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrsqrts,_f32,,)(op1, op2); +} + +svfloat64_t test_svrsqrts_f64(svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svrsqrts_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.frsqrts.x.nxv2f64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svrsqrts,_f64,,)(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_scale.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_scale.c new file mode 100644 index 0000000000000..3b5d3b5502f4b --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_scale.c @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svscale_f16_z(svbool_t pg, svfloat16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svscale_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svscale_f32_z(svbool_t pg, svfloat32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svscale_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svscale_f64_z(svbool_t pg, svfloat64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svscale_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svscale_f16_m(svbool_t pg, svfloat16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svscale_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svscale_f32_m(svbool_t pg, svfloat32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svscale_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svscale_f64_m(svbool_t pg, svfloat64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svscale_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svscale_f16_x(svbool_t pg, svfloat16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svscale_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svscale_f32_x(svbool_t pg, svfloat32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svscale_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svscale_f64_x(svbool_t pg, svfloat64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svscale_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svscale_n_f16_z(svbool_t pg, svfloat16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svscale_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svscale_n_f32_z(svbool_t pg, svfloat32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svscale_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svscale_n_f64_z(svbool_t pg, svfloat64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svscale_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svscale_n_f16_m(svbool_t pg, svfloat16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svscale_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svscale_n_f32_m(svbool_t pg, svfloat32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svscale_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svscale_n_f64_m(svbool_t pg, svfloat64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svscale_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svscale_n_f16_x(svbool_t pg, svfloat16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svscale_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svscale_n_f32_x(svbool_t pg, svfloat32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svscale_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svscale_n_f64_x(svbool_t pg, svfloat64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svscale_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fscale.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svscale,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sel.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sel.c new file mode 100644 index 0000000000000..97acd393f4d03 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sel.c @@ -0,0 +1,116 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svsel_s8(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svsel_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_s8,,)(pg, op1, op2); +} + +svint16_t test_svsel_s16(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svsel_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_s16,,)(pg, op1, op2); +} + +svint32_t test_svsel_s32(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svsel_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_s32,,)(pg, op1, op2); +} + +svint64_t test_svsel_s64(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svsel_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_s64,,)(pg, op1, op2); +} + +svuint8_t test_svsel_u8(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svsel_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_u8,,)(pg, op1, op2); +} + +svuint16_t test_svsel_u16(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svsel_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_u16,,)(pg, op1, op2); +} + +svuint32_t test_svsel_u32(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svsel_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_u32,,)(pg, op1, op2); +} + +svuint64_t test_svsel_u64(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svsel_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_u64,,)(pg, op1, op2); +} + +svfloat16_t test_svsel_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svsel_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_f16,,)(pg, op1, op2); +} + +svfloat32_t test_svsel_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svsel_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_f32,,)(pg, op1, op2); +} + +svfloat64_t test_svsel_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svsel_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_f64,,)(pg, op1, op2); +} + +svbool_t test_svsel_b(svbool_t pg, svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svsel_b + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sel.nxv16i1( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsel,_b,,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_splice.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_splice.c new file mode 100644 index 0000000000000..795fb70426744 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_splice.c @@ -0,0 +1,108 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svsplice_s8(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svsplice_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.splice.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsplice,_s8,,)(pg, op1, op2); +} + +svint16_t test_svsplice_s16(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svsplice_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.splice.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsplice,_s16,,)(pg, op1, op2); +} + +svint32_t test_svsplice_s32(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svsplice_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.splice.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsplice,_s32,,)(pg, op1, op2); +} + +svint64_t test_svsplice_s64(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svsplice_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.splice.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsplice,_s64,,)(pg, op1, op2); +} + +svuint8_t test_svsplice_u8(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svsplice_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.splice.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsplice,_u8,,)(pg, op1, op2); +} + +svuint16_t test_svsplice_u16(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svsplice_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.splice.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsplice,_u16,,)(pg, op1, op2); +} + +svuint32_t test_svsplice_u32(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svsplice_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.splice.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsplice,_u32,,)(pg, op1, op2); +} + +svuint64_t test_svsplice_u64(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svsplice_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.splice.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsplice,_u64,,)(pg, op1, op2); +} + +svfloat16_t test_svsplice_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svsplice_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.splice.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsplice,_f16,,)(pg, op1, op2); +} + +svfloat32_t test_svsplice_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svsplice_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.splice.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsplice,_f32,,)(pg, op1, op2); +} + +svfloat64_t test_svsplice_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svsplice_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.splice.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsplice,_f64,,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sqrt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sqrt.c new file mode 100644 index 0000000000000..3f0cf96cb7a65 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sqrt.c @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svsqrt_f16_z(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svsqrt_f16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsqrt.nxv8f16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsqrt,_f16,_z,)(pg, op); +} + +svfloat32_t test_svsqrt_f32_z(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svsqrt_f32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsqrt.nxv4f32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsqrt,_f32,_z,)(pg, op); +} + +svfloat64_t test_svsqrt_f64_z(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svsqrt_f64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsqrt.nxv2f64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsqrt,_f64,_z,)(pg, op); +} + +svfloat16_t test_svsqrt_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svsqrt_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsqrt.nxv8f16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsqrt,_f16,_m,)(inactive, pg, op); +} + +svfloat32_t test_svsqrt_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svsqrt_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsqrt.nxv4f32( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsqrt,_f32,_m,)(inactive, pg, op); +} + +svfloat64_t test_svsqrt_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svsqrt_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsqrt.nxv2f64( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsqrt,_f64,_m,)(inactive, pg, op); +} + +svfloat16_t test_svsqrt_f16_x(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svsqrt_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsqrt.nxv8f16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsqrt,_f16,_x,)(pg, op); +} + +svfloat32_t test_svsqrt_f32_x(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svsqrt_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsqrt.nxv4f32( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsqrt,_f32,_x,)(pg, op); +} + +svfloat64_t test_svsqrt_f64_x(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svsqrt_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsqrt.nxv2f64( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsqrt,_f64,_x,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sub.c index 63d6396eb4eb0..fdd9767cc7659 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sub.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sub.c @@ -469,3 +469,180 @@ svuint64_t test_svsub_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) // CHECK: ret %[[INTRINSIC]] return SVE_ACLE_FUNC(svsub,_n_u64,_x,)(pg, op1, op2); } + +svfloat16_t test_svsub_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svsub_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svsub_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svsub_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svsub_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svsub_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svsub_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svsub_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svsub_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svsub_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svsub_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svsub_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svsub_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svsub_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svsub_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svsub_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svsub_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svsub_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svsub_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svsub_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svsub_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svsub_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svsub_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svsub_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svsub_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svsub_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svsub_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svsub_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svsub_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svsub_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svsub_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svsub_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svsub_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svsub_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svsub_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svsub_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsub.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsub,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_subr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_subr.c index 6931ce309c287..0d8eb5eb98ab0 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_subr.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_subr.c @@ -469,3 +469,180 @@ svuint64_t test_svsubr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) // CHECK: ret %[[INTRINSIC]] return SVE_ACLE_FUNC(svsubr,_n_u64,_x,)(pg, op1, op2); } + +svfloat16_t test_svsubr_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svsubr_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv8f16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svsubr_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svsubr_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv4f32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svsubr_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svsubr_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv2f64( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svsubr_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svsubr_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svsubr_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svsubr_f32_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svsubr_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svsubr_f64_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svsubr_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svsubr_f16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svsubr_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svsubr_f32_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svsubr_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svsubr_f64_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_f64,_x,)(pg, op1, op2); +} + +svfloat16_t test_svsubr_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svsubr_n_f16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8f16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv8f16( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_n_f16,_z,)(pg, op1, op2); +} + +svfloat32_t test_svsubr_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svsubr_n_f32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4f32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv4f32( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_n_f32,_z,)(pg, op1, op2); +} + +svfloat64_t test_svsubr_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svsubr_n_f64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2f64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv2f64( %[[PG]], %[[SEL]], %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_n_f64,_z,)(pg, op1, op2); +} + +svfloat16_t test_svsubr_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svsubr_n_f16_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_n_f16,_m,)(pg, op1, op2); +} + +svfloat32_t test_svsubr_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svsubr_n_f32_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_n_f32,_m,)(pg, op1, op2); +} + +svfloat64_t test_svsubr_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svsubr_n_f64_m + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_n_f64,_m,)(pg, op1, op2); +} + +svfloat16_t test_svsubr_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svsubr_n_f16_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv8f16( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_n_f16,_x,)(pg, op1, op2); +} + +svfloat32_t test_svsubr_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svsubr_n_f32_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_n_f32,_x,)(pg, op1, op2); +} + +svfloat64_t test_svsubr_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svsubr_n_f64_x + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fsubr.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svsubr,_n_f64,_x,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl.c new file mode 100644 index 0000000000000..a2b7ee5f7495f --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl.c @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svtbl_s8(svint8_t data, svuint8_t indices) +{ + // CHECK-LABEL: test_svtbl_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv16i8( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtbl,_s8,,)(data, indices); +} + +svint16_t test_svtbl_s16(svint16_t data, svuint16_t indices) +{ + // CHECK-LABEL: test_svtbl_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv8i16( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtbl,_s16,,)(data, indices); +} + +svint32_t test_svtbl_s32(svint32_t data, svuint32_t indices) +{ + // CHECK-LABEL: test_svtbl_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv4i32( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtbl,_s32,,)(data, indices); +} + +svint64_t test_svtbl_s64(svint64_t data, svuint64_t indices) +{ + // CHECK-LABEL: test_svtbl_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv2i64( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtbl,_s64,,)(data, indices); +} + +svuint8_t test_svtbl_u8(svuint8_t data, svuint8_t indices) +{ + // CHECK-LABEL: test_svtbl_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv16i8( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtbl,_u8,,)(data, indices); +} + +svuint16_t test_svtbl_u16(svuint16_t data, svuint16_t indices) +{ + // CHECK-LABEL: test_svtbl_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv8i16( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtbl,_u16,,)(data, indices); +} + +svuint32_t test_svtbl_u32(svuint32_t data, svuint32_t indices) +{ + // CHECK-LABEL: test_svtbl_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv4i32( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtbl,_u32,,)(data, indices); +} + +svuint64_t test_svtbl_u64(svuint64_t data, svuint64_t indices) +{ + // CHECK-LABEL: test_svtbl_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv2i64( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtbl,_u64,,)(data, indices); +} + +svfloat16_t test_svtbl_f16(svfloat16_t data, svuint16_t indices) +{ + // CHECK-LABEL: test_svtbl_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv8f16( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtbl,_f16,,)(data, indices); +} + +svfloat32_t test_svtbl_f32(svfloat32_t data, svuint32_t indices) +{ + // CHECK-LABEL: test_svtbl_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv4f32( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtbl,_f32,,)(data, indices); +} + +svfloat64_t test_svtbl_f64(svfloat64_t data, svuint64_t indices) +{ + // CHECK-LABEL: test_svtbl_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv2f64( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtbl,_f64,,)(data, indices); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c new file mode 100644 index 0000000000000..6e7cbed5350e9 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c @@ -0,0 +1,140 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svtrn1_s8(svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svtrn1_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn1,_s8,,)(op1, op2); +} + +svint16_t test_svtrn1_s16(svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svtrn1_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn1,_s16,,)(op1, op2); +} + +svint32_t test_svtrn1_s32(svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svtrn1_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn1,_s32,,)(op1, op2); +} + +svint64_t test_svtrn1_s64(svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svtrn1_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn1,_s64,,)(op1, op2); +} + +svuint8_t test_svtrn1_u8(svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svtrn1_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn1,_u8,,)(op1, op2); +} + +svuint16_t test_svtrn1_u16(svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svtrn1_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn1,_u16,,)(op1, op2); +} + +svuint32_t test_svtrn1_u32(svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svtrn1_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn1,_u32,,)(op1, op2); +} + +svuint64_t test_svtrn1_u64(svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svtrn1_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn1,_u64,,)(op1, op2); +} + +svfloat16_t test_svtrn1_f16(svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svtrn1_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv8f16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn1,_f16,,)(op1, op2); +} + +svfloat32_t test_svtrn1_f32(svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svtrn1_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv4f32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn1,_f32,,)(op1, op2); +} + +svfloat64_t test_svtrn1_f64(svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svtrn1_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv2f64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn1,_f64,,)(op1, op2); +} + +svbool_t test_svtrn1_b8(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svtrn1_b8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv16i1( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return svtrn1_b8(op1, op2); +} + +svbool_t test_svtrn1_b16(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svtrn1_b16 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv8i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svtrn1_b16(op1, op2); +} + +svbool_t test_svtrn1_b32(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svtrn1_b32 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv4i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svtrn1_b32(op1, op2); +} + +svbool_t test_svtrn1_b64(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svtrn1_b64 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn1.nxv2i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svtrn1_b64(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c new file mode 100644 index 0000000000000..432370bb459f9 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c @@ -0,0 +1,140 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svtrn2_s8(svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svtrn2_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn2,_s8,,)(op1, op2); +} + +svint16_t test_svtrn2_s16(svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svtrn2_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn2,_s16,,)(op1, op2); +} + +svint32_t test_svtrn2_s32(svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svtrn2_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn2,_s32,,)(op1, op2); +} + +svint64_t test_svtrn2_s64(svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svtrn2_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn2,_s64,,)(op1, op2); +} + +svuint8_t test_svtrn2_u8(svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svtrn2_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn2,_u8,,)(op1, op2); +} + +svuint16_t test_svtrn2_u16(svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svtrn2_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn2,_u16,,)(op1, op2); +} + +svuint32_t test_svtrn2_u32(svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svtrn2_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn2,_u32,,)(op1, op2); +} + +svuint64_t test_svtrn2_u64(svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svtrn2_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn2,_u64,,)(op1, op2); +} + +svfloat16_t test_svtrn2_f16(svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svtrn2_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv8f16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn2,_f16,,)(op1, op2); +} + +svfloat32_t test_svtrn2_f32(svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svtrn2_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv4f32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn2,_f32,,)(op1, op2); +} + +svfloat64_t test_svtrn2_f64(svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svtrn2_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv2f64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtrn2,_f64,,)(op1, op2); +} + +svbool_t test_svtrn2_b8(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svtrn2_b8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv16i1( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return svtrn2_b8(op1, op2); +} + +svbool_t test_svtrn2_b16(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svtrn2_b16 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv8i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svtrn2_b16(op1, op2); +} + +svbool_t test_svtrn2_b32(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svtrn2_b32 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv4i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svtrn2_b32(op1, op2); +} + +svbool_t test_svtrn2_b64(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svtrn2_b64 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.trn2.nxv2i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svtrn2_b64(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tsmul.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tsmul.c new file mode 100644 index 0000000000000..1049f8482932b --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tsmul.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svtsmul_f16(svfloat16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svtsmul_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ftsmul.x.nxv8f16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtsmul,_f16,,)(op1, op2); +} + +svfloat32_t test_svtsmul_f32(svfloat32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svtsmul_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ftsmul.x.nxv4f32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtsmul,_f32,,)(op1, op2); +} + +svfloat64_t test_svtsmul_f64(svfloat64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svtsmul_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ftsmul.x.nxv2f64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtsmul,_f64,,)(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tssel.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tssel.c new file mode 100644 index 0000000000000..1c028feb3cfa3 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tssel.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svfloat16_t test_svtssel_f16(svfloat16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svtssel_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ftssel.x.nxv8f16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtssel,_f16,,)(op1, op2); +} + +svfloat32_t test_svtssel_f32(svfloat32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svtssel_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ftssel.x.nxv4f32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtssel,_f32,,)(op1, op2); +} + +svfloat64_t test_svtssel_f64(svfloat64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svtssel_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ftssel.x.nxv2f64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svtssel,_f64,,)(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_unpkhi.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_unpkhi.c new file mode 100644 index 0000000000000..5bab85658561f --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_unpkhi.c @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint16_t test_svunpkhi_s16(svint8_t op) +{ + // CHECK-LABEL: test_svunpkhi_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sunpkhi.nxv8i16( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpkhi,_s16,,)(op); +} + +svint32_t test_svunpkhi_s32(svint16_t op) +{ + // CHECK-LABEL: test_svunpkhi_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sunpkhi.nxv4i32( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpkhi,_s32,,)(op); +} + +svint64_t test_svunpkhi_s64(svint32_t op) +{ + // CHECK-LABEL: test_svunpkhi_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sunpkhi.nxv2i64( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpkhi,_s64,,)(op); +} + +svuint16_t test_svunpkhi_u16(svuint8_t op) +{ + // CHECK-LABEL: test_svunpkhi_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uunpkhi.nxv8i16( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpkhi,_u16,,)(op); +} + +svuint32_t test_svunpkhi_u32(svuint16_t op) +{ + // CHECK-LABEL: test_svunpkhi_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uunpkhi.nxv4i32( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpkhi,_u32,,)(op); +} + +svuint64_t test_svunpkhi_u64(svuint32_t op) +{ + // CHECK-LABEL: test_svunpkhi_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uunpkhi.nxv2i64( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpkhi,_u64,,)(op); +} + +svbool_t test_svunpkhi_b(svbool_t op) +{ + // CHECK-LABEL: test_svunpkhi_b + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.punpkhi.nxv16i1( %op) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svunpkhi,_b,,)(op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_unpklo.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_unpklo.c new file mode 100644 index 0000000000000..343f61c5257f6 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_unpklo.c @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint16_t test_svunpklo_s16(svint8_t op) +{ + // CHECK-LABEL: test_svunpklo_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sunpklo.nxv8i16( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpklo,_s16,,)(op); +} + +svint32_t test_svunpklo_s32(svint16_t op) +{ + // CHECK-LABEL: test_svunpklo_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sunpklo.nxv4i32( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpklo,_s32,,)(op); +} + +svint64_t test_svunpklo_s64(svint32_t op) +{ + // CHECK-LABEL: test_svunpklo_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sunpklo.nxv2i64( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpklo,_s64,,)(op); +} + +svuint16_t test_svunpklo_u16(svuint8_t op) +{ + // CHECK-LABEL: test_svunpklo_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uunpklo.nxv8i16( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpklo,_u16,,)(op); +} + +svuint32_t test_svunpklo_u32(svuint16_t op) +{ + // CHECK-LABEL: test_svunpklo_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uunpklo.nxv4i32( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpklo,_u32,,)(op); +} + +svuint64_t test_svunpklo_u64(svuint32_t op) +{ + // CHECK-LABEL: test_svunpklo_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uunpklo.nxv2i64( %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svunpklo,_u64,,)(op); +} + +svbool_t test_svunpklo_b(svbool_t op) +{ + // CHECK-LABEL: test_svunpklo_b + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.punpklo.nxv16i1( %op) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svunpklo,_b,,)(op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c new file mode 100644 index 0000000000000..d95cb5ec9bf89 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c @@ -0,0 +1,140 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svuzp1_s8(svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svuzp1_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp1,_s8,,)(op1, op2); +} + +svint16_t test_svuzp1_s16(svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svuzp1_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp1,_s16,,)(op1, op2); +} + +svint32_t test_svuzp1_s32(svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svuzp1_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp1,_s32,,)(op1, op2); +} + +svint64_t test_svuzp1_s64(svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svuzp1_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp1,_s64,,)(op1, op2); +} + +svuint8_t test_svuzp1_u8(svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svuzp1_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp1,_u8,,)(op1, op2); +} + +svuint16_t test_svuzp1_u16(svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svuzp1_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp1,_u16,,)(op1, op2); +} + +svuint32_t test_svuzp1_u32(svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svuzp1_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp1,_u32,,)(op1, op2); +} + +svuint64_t test_svuzp1_u64(svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svuzp1_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp1,_u64,,)(op1, op2); +} + +svfloat16_t test_svuzp1_f16(svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svuzp1_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv8f16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp1,_f16,,)(op1, op2); +} + +svfloat32_t test_svuzp1_f32(svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svuzp1_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv4f32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp1,_f32,,)(op1, op2); +} + +svfloat64_t test_svuzp1_f64(svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svuzp1_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv2f64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp1,_f64,,)(op1, op2); +} + +svbool_t test_svuzp1_b8(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svuzp1_b8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv16i1( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return svuzp1_b8(op1, op2); +} + +svbool_t test_svuzp1_b16(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svuzp1_b16 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv8i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svuzp1_b16(op1, op2); +} + +svbool_t test_svuzp1_b32(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svuzp1_b32 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv4i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svuzp1_b32(op1, op2); +} + +svbool_t test_svuzp1_b64(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svuzp1_b64 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp1.nxv2i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svuzp1_b64(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c new file mode 100644 index 0000000000000..b359aa68e8c06 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c @@ -0,0 +1,140 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svuzp2_s8(svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svuzp2_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp2,_s8,,)(op1, op2); +} + +svint16_t test_svuzp2_s16(svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svuzp2_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp2,_s16,,)(op1, op2); +} + +svint32_t test_svuzp2_s32(svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svuzp2_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp2,_s32,,)(op1, op2); +} + +svint64_t test_svuzp2_s64(svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svuzp2_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp2,_s64,,)(op1, op2); +} + +svuint8_t test_svuzp2_u8(svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svuzp2_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp2,_u8,,)(op1, op2); +} + +svuint16_t test_svuzp2_u16(svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svuzp2_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp2,_u16,,)(op1, op2); +} + +svuint32_t test_svuzp2_u32(svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svuzp2_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp2,_u32,,)(op1, op2); +} + +svuint64_t test_svuzp2_u64(svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svuzp2_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp2,_u64,,)(op1, op2); +} + +svfloat16_t test_svuzp2_f16(svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svuzp2_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv8f16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp2,_f16,,)(op1, op2); +} + +svfloat32_t test_svuzp2_f32(svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svuzp2_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv4f32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp2,_f32,,)(op1, op2); +} + +svfloat64_t test_svuzp2_f64(svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svuzp2_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv2f64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svuzp2,_f64,,)(op1, op2); +} + +svbool_t test_svuzp2_b8(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svuzp2_b8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv16i1( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return svuzp2_b8(op1, op2); +} + +svbool_t test_svuzp2_b16(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svuzp2_b16 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv8i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svuzp2_b16(op1, op2); +} + +svbool_t test_svuzp2_b32(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svuzp2_b32 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv4i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svuzp2_b32(op1, op2); +} + +svbool_t test_svuzp2_b64(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svuzp2_b64 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uzp2.nxv2i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svuzp2_b64(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c new file mode 100644 index 0000000000000..3444ac9aa1abb --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c @@ -0,0 +1,140 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svzip1_s8(svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svzip1_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip1,_s8,,)(op1, op2); +} + +svint16_t test_svzip1_s16(svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svzip1_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip1,_s16,,)(op1, op2); +} + +svint32_t test_svzip1_s32(svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svzip1_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip1,_s32,,)(op1, op2); +} + +svint64_t test_svzip1_s64(svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svzip1_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip1,_s64,,)(op1, op2); +} + +svuint8_t test_svzip1_u8(svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svzip1_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip1,_u8,,)(op1, op2); +} + +svuint16_t test_svzip1_u16(svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svzip1_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip1,_u16,,)(op1, op2); +} + +svuint32_t test_svzip1_u32(svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svzip1_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip1,_u32,,)(op1, op2); +} + +svuint64_t test_svzip1_u64(svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svzip1_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip1,_u64,,)(op1, op2); +} + +svfloat16_t test_svzip1_f16(svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svzip1_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv8f16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip1,_f16,,)(op1, op2); +} + +svfloat32_t test_svzip1_f32(svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svzip1_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv4f32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip1,_f32,,)(op1, op2); +} + +svfloat64_t test_svzip1_f64(svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svzip1_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv2f64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip1,_f64,,)(op1, op2); +} + +svbool_t test_svzip1_b8(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svzip1_b8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv16i1( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return svzip1_b8(op1, op2); +} + +svbool_t test_svzip1_b16(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svzip1_b16 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv8i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svzip1_b16(op1, op2); +} + +svbool_t test_svzip1_b32(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svzip1_b32 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv4i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svzip1_b32(op1, op2); +} + +svbool_t test_svzip1_b64(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svzip1_b64 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip1.nxv2i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svzip1_b64(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c new file mode 100644 index 0000000000000..bba3aca2502a3 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c @@ -0,0 +1,140 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svzip2_s8(svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svzip2_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip2,_s8,,)(op1, op2); +} + +svint16_t test_svzip2_s16(svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svzip2_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip2,_s16,,)(op1, op2); +} + +svint32_t test_svzip2_s32(svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svzip2_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip2,_s32,,)(op1, op2); +} + +svint64_t test_svzip2_s64(svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svzip2_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip2,_s64,,)(op1, op2); +} + +svuint8_t test_svzip2_u8(svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svzip2_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv16i8( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip2,_u8,,)(op1, op2); +} + +svuint16_t test_svzip2_u16(svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svzip2_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip2,_u16,,)(op1, op2); +} + +svuint32_t test_svzip2_u32(svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svzip2_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip2,_u32,,)(op1, op2); +} + +svuint64_t test_svzip2_u64(svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svzip2_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip2,_u64,,)(op1, op2); +} + +svfloat16_t test_svzip2_f16(svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svzip2_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv8f16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip2,_f16,,)(op1, op2); +} + +svfloat32_t test_svzip2_f32(svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svzip2_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv4f32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip2,_f32,,)(op1, op2); +} + +svfloat64_t test_svzip2_f64(svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svzip2_f64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv2f64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svzip2,_f64,,)(op1, op2); +} + +svbool_t test_svzip2_b8(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svzip2_b8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv16i1( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + return svzip2_b8(op1, op2); +} + +svbool_t test_svzip2_b16(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svzip2_b16 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv8i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svzip2_b16(op1, op2); +} + +svbool_t test_svzip2_b32(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svzip2_b32 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv4i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svzip2_b32(op1, op2); +} + +svbool_t test_svzip2_b64(svbool_t op1, svbool_t op2) +{ + // CHECK-LABEL: test_svzip2_b64 + // CHECK-DAG: %[[OP1:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op1) + // CHECK-DAG: %[[OP2:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.zip2.nxv2i1( %[[OP1]], %[[OP2]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return svzip2_b64(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cadd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cadd.c index 1f68f6b19438f..39b1ac318ffd7 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cadd.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cadd.c @@ -10,6 +10,48 @@ #include +svfloat16_t test_svcadd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // expected-error@+1 {{argument should be the value 90 or 270}} + return SVE_ACLE_FUNC(svcadd,_f16,_z,)(pg, op1, op2, 0); +} + +svfloat16_t test_svcadd_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // expected-error@+1 {{argument should be the value 90 or 270}} + return SVE_ACLE_FUNC(svcadd,_f16,_z,)(pg, op1, op2, 272); +} + +svfloat16_t test_svcadd_f16_z_2(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // expected-error@+1 {{argument should be the value 90 or 270}} + return SVE_ACLE_FUNC(svcadd,_f16,_z,)(pg, op1, op2, 91); +} + +svfloat16_t test_svcadd_f16_z_3(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // expected-error@+1 {{argument should be the value 90 or 270}} + return SVE_ACLE_FUNC(svcadd,_f16,_z,)(pg, op1, op2, 180); +} + +svfloat16_t test_svcadd_f16_z_4(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // expected-error@+1 {{argument should be the value 90 or 270}} + return SVE_ACLE_FUNC(svcadd,_f16,_z,)(pg, op1, op2, 271); +} + +svfloat32_t test_svcadd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // expected-error@+1 {{argument should be the value 90 or 270}} + return SVE_ACLE_FUNC(svcadd,_f32,_z,)(pg, op1, op2, 0); +} + +svfloat64_t test_svcadd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // expected-error@+1 {{argument should be the value 90 or 270}} + return SVE_ACLE_FUNC(svcadd,_f64,_z,)(pg, op1, op2, 0); +} + svfloat16_t test_svcadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { // expected-error@+1 {{argument should be the value 90 or 270}} @@ -27,3 +69,21 @@ svfloat64_t test_svcadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) // expected-error@+1 {{argument should be the value 90 or 270}} return SVE_ACLE_FUNC(svcadd,_f64,_m,)(pg, op1, op2, 0); } + +svfloat16_t test_svcadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // expected-error@+1 {{argument should be the value 90 or 270}} + return SVE_ACLE_FUNC(svcadd,_f16,_x,)(pg, op1, op2, 0); +} + +svfloat32_t test_svcadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // expected-error@+1 {{argument should be the value 90 or 270}} + return SVE_ACLE_FUNC(svcadd,_f32,_x,)(pg, op1, op2, 0); +} + +svfloat64_t test_svcadd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // expected-error@+1 {{argument should be the value 90 or 270}} + return SVE_ACLE_FUNC(svcadd,_f64,_x,)(pg, op1, op2, 0); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cmla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cmla.c index 94f36260ba37b..ae507be463560 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cmla.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cmla.c @@ -10,6 +10,48 @@ #include +svfloat16_t test_svcmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // expected-error@+1 {{argument should be the value 0, 90, 180 or 270}} + return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 19); +} + +svfloat16_t test_svcmla_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // expected-error@+1 {{argument should be the value 0, 90, 180 or 270}} + return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 1); +} + +svfloat16_t test_svcmla_f16_z_2(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // expected-error@+1 {{argument should be the value 0, 90, 180 or 270}} + return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 18); +} + +svfloat16_t test_svcmla_f16_z_3(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // expected-error@+1 {{argument should be the value 0, 90, 180 or 270}} + return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 91); +} + +svfloat16_t test_svcmla_f16_z_4(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // expected-error@+1 {{argument should be the value 0, 90, 180 or 270}} + return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 181); +} + +svfloat32_t test_svcmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // expected-error@+1 {{argument should be the value 0, 90, 180 or 270}} + return SVE_ACLE_FUNC(svcmla,_f32,_z,)(pg, op1, op2, op3, 19); +} + +svfloat64_t test_svcmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // expected-error@+1 {{argument should be the value 0, 90, 180 or 270}} + return SVE_ACLE_FUNC(svcmla,_f64,_z,)(pg, op1, op2, op3, 19); +} + svfloat16_t test_svcmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { // expected-error@+1 {{argument should be the value 0, 90, 180 or 270}} @@ -28,6 +70,24 @@ svfloat64_t test_svcmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf return SVE_ACLE_FUNC(svcmla,_f64,_m,)(pg, op1, op2, op3, 19); } +svfloat16_t test_svcmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // expected-error@+1 {{argument should be the value 0, 90, 180 or 270}} + return SVE_ACLE_FUNC(svcmla,_f16,_x,)(pg, op1, op2, op3, 19); +} + +svfloat32_t test_svcmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // expected-error@+1 {{argument should be the value 0, 90, 180 or 270}} + return SVE_ACLE_FUNC(svcmla,_f32,_x,)(pg, op1, op2, op3, 19); +} + +svfloat64_t test_svcmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) +{ + // expected-error@+1 {{argument should be the value 0, 90, 180 or 270}} + return SVE_ACLE_FUNC(svcmla,_f64,_x,)(pg, op1, op2, op3, 19); +} + svfloat16_t test_svcmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} @@ -105,3 +165,9 @@ svfloat32_t test_svcmla_lane_f32_3(svfloat32_t op1, svfloat32_t op2, svfloat32_t // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 2, 0); } + +svfloat32_t test_svcmla_lane_f32_4(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) +{ + // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}} + return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 3, 180); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_mul.c b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_mul.c new file mode 100644 index 0000000000000..b8cb87dab9990 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_mul.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +#include + +svfloat16_t test_svmul_lane_f16(svfloat16_t op1, svfloat16_t op2) +{ + // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 7]}} + return SVE_ACLE_FUNC(svmul_lane,_f16,,)(op1, op2, 8); +} + +svfloat32_t test_svmul_lane_f32(svfloat32_t op1, svfloat32_t op2) +{ + // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 3]}} + return SVE_ACLE_FUNC(svmul_lane,_f32,,)(op1, op2, -1); +} + +svfloat64_t test_svmul_lane_f64(svfloat64_t op1, svfloat64_t op2) +{ + // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}} + return SVE_ACLE_FUNC(svmul_lane,_f64,,)(op1, op2, 2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/negative/big_endian.c b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/big_endian.c new file mode 100644 index 0000000000000..fc44481c94c70 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/big_endian.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64_be-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s + +// expected-error@* {{Big endian is currently not supported for arm_sve.h}} +#include diff --git a/clang/test/CodeGen/builtin-movdir.c b/clang/test/CodeGen/builtin-movdir.c index 8d4edfbf8f774..e13cc776edd6d 100644 --- a/clang/test/CodeGen/builtin-movdir.c +++ b/clang/test/CodeGen/builtin-movdir.c @@ -24,11 +24,6 @@ void test_directstore64(void *dst, uint64_t value) { void test_dir64b(void *dst, const void *src) { // CHECK-LABEL: test_dir64b - // CHECK: [[PTRINT1:%.+]] = ptrtoint - // X86: [[MASKEDPTR1:%.+]] = and i32 [[PTRINT1]], 63 - // X86: [[MASKCOND1:%.+]] = icmp eq i32 [[MASKEDPTR1]], 0 - // X86_64: [[MASKEDPTR1:%.+]] = and i64 [[PTRINT1]], 63 - // X86_64: [[MASKCOND1:%.+]] = icmp eq i64 [[MASKEDPTR1]], 0 // CHECK: call void @llvm.x86.movdir64b _movdir64b(dst, src); } diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 8b497d95298cb..7e823c92a2562 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -645,18 +645,6 @@ i32x4 trunc_saturate_u_i32x4_f32x4(f32x4 f) { // WEBASSEMBLY-NEXT: ret } -i64x2 trunc_saturate_s_i64x2_f64x2(f64x2 f) { - return __builtin_wasm_trunc_saturate_s_i64x2_f64x2(f); - // WEBASSEMBLY: call <2 x i64> @llvm.wasm.trunc.saturate.signed.v2i64.v2f64(<2 x double> %f) - // WEBASSEMBLY-NEXT: ret -} - -i64x2 trunc_saturate_u_i64x2_f64x2(f64x2 f) { - return __builtin_wasm_trunc_saturate_u_i64x2_f64x2(f); - // WEBASSEMBLY: call <2 x i64> @llvm.wasm.trunc.saturate.unsigned.v2i64.v2f64(<2 x double> %f) - // WEBASSEMBLY-NEXT: ret -} - i8x16 narrow_s_i8x16_i16x8(i16x8 low, i16x8 high) { return __builtin_wasm_narrow_s_i8x16_i16x8(low, high); // WEBASSEMBLY: call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16( diff --git a/clang/test/CodeGen/cmse-clear-arg.c b/clang/test/CodeGen/cmse-clear-arg.c new file mode 100644 index 0000000000000..1447eb61b66e5 --- /dev/null +++ b/clang/test/CodeGen/cmse-clear-arg.c @@ -0,0 +1,189 @@ +// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-LE,CHECK-SOFTFP +// RUN: %clang_cc1 -triple thumbebv8m.main -O0 -mcmse -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-BE,CHECK-SOFTFP +// RUN: %clang_cc1 -triple thumbv8m.main -O2 -mcmse -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-LE,CHECK-SOFTFP +// RUN: %clang_cc1 -triple thumbebv8m.main -O2 -mcmse -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-BE,CHECK-SOFTFP +// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse -mfloat-abi hard \ +// RUN: -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-LE,CHECK-HARDFP + +// We don't really need to repeat *all* the test cases from cmse-clear-return.c +// as it won't increase test coverage. + +// : Memory layout | Mask +// LE: .......1 ........ ........ ........ | 0x00000001/1 +// BE: 1....... ........ ........ ........ | 0x80000000/-2147483648 +typedef struct T0 { + int a : 1, : 31; +} T0; + +void __attribute__((cmse_nonsecure_call)) (*g0)(T0); + +T0 t0; +void f0() { g0(t0); } +// CHECK: define {{.*}} @f0() +// CHECK-LE: %[[V0:.*]] = and i32 {{.*}}, 1 +// CHECK-BE: %[[V0:.*]] = and i32 {{.*}}, -2147483648 +// CHECK: %[[V1:.*]] = insertvalue [1 x i32] undef, i32 %[[V0]], 0 +// CHECK: call {{.*}} void %0([1 x i32] %[[V1]]) + +// LE: 11111111 111111.. 11111111 11111111 0xfffffcff/-769 +// BE: 11111111 ..111111 11111111 11111111 0xff3fffff/-12582913 +typedef struct T8 { + struct T80 { + char a; + char : 2, b : 6; + } a; + short b; +} T8; + +T8 t8; +void __attribute__((cmse_nonsecure_call)) (*g8)(T8); +void f8() { g8(t8); } +// CHECK: define {{.*}} @f8() +// CHECK-LE: %[[V0:.*]] = and i32 {{.*}}, -769 +// CHECK-BE: %[[V0:.*]] = and i32 {{.*}}, -12582913 +// CHECK: %[[V1:.*]] = insertvalue [1 x i32] undef, i32 %[[V0]], 0 +// CHECK: call {{.*}} void %0([1 x i32] %[[V1]]) + +// LE(0): 11111111 ........ 11111111 11111111 0xffff00ff/-65281 +// LE(4): ...111.. 11111... 11111111 .....111 0x7fff81c/134215708 +// BE(0): 11111111 ........ 11111111 11111111 0xff00ffff/-16711681 +// BE(4): ..111... ...11111 11111111 111..... 0x381fffe0/941621216 +typedef struct T15 { + char a; + short b; + int : 2, c : 3, : 6, d : 16; +} T15; + +T15 t15; + +void __attribute__((cmse_nonsecure_call)) (*g15_0)(T15); +void f15_0() { + g15_0(t15); +} +// CHECK: define {{.*}}@f15_0() +// CHECK: %[[FN:.*]] = load {{.*}} @g15_0 +// CHECK-LE: %cmse.clear = and i32 {{.*}}, -65281 +// CHECK-BE: %cmse.clear = and i32 {{.*}}, -16711681 +// CHECK: %[[R0:.*]] = insertvalue [2 x i32] undef, i32 %cmse.clear, 0 +// CHECK-LE: %cmse.clear1 = and i32 {{.*}}, 134215708 +// CHECK-BE: %cmse.clear1 = and i32 {{.*}}, 941621216 +// CHECK: %[[R1:.*]] = insertvalue [2 x i32] %[[R0]], i32 %cmse.clear1, 1 +// CHECK: call {{.*}} void %[[FN]]([2 x i32] %[[R1]]) + +void __attribute__((cmse_nonsecure_call)) (*g15_1)(int, int, int, T15); +void f15_1() { + g15_1(0, 1, 2, t15); +} +// CHECK: define {{.*}}@f15_1() +// CHECK: %[[FN:.*]] = load {{.*}} @g15_1 +// CHECK-LE: %cmse.clear = and i32 {{.*}}, -65281 +// CHECK-BE: %cmse.clear = and i32 {{.*}}, -16711681 +// CHECK: %[[R0:.*]] = insertvalue [2 x i32] undef, i32 %cmse.clear, 0 +// CHECK-LE: %cmse.clear1 = and i32 {{.*}}, 134215708 +// CHECK-BE: %cmse.clear1 = and i32 {{.*}}, 941621216 +// CHECK: %[[R1:.*]] = insertvalue [2 x i32] %[[R0]], i32 %cmse.clear1, 1 +// CHECK: call {{.*}} void %[[FN]](i32 0, i32 1, i32 2, [2 x i32] %[[R1]]) + +// LE: 11111111 ........ 11111111 11111111 1111.... ...11111 ........ .111111. +// LE: 0xff00fffff01f007e/9079291968726434047 +// BE: 11111111 ........ 11111111 11111111 ....1111 11111... ........ .111111. +// BE: 0xff00ffff0ff8007e/-71776123088273282 + +typedef struct T16 { + char a; + short b; + long long : 4, c : 9, : 12, d : 6; +} T16; + +T16 t16; + +void __attribute__((cmse_nonsecure_call)) (*g16_0)(T16); +void f16_0() { + g16_0(t16); +} +// CHECK: define {{.*}} @f16_0() +// CHECK: %[[FN:.*]] = load {{.*}} @g16_0 +// CHECK-LE: %cmse.clear = and i64 {{.*}}, 9079291968726434047 +// CHECK-BE: %cmse.clear = and i64 {{.*}}, -71776123088273282 +// CHECK: %[[R:.*]] = insertvalue [1 x i64] undef, i64 %cmse.clear, 0 +// CHECK: call {{.*}} void %0([1 x i64] %[[R]]) + + +// LE0: 1111..11 .......1 1111..11 .......1 1111..11 .......1 1111..11 .......1 +// LE4: 1111..11 .......1 1111..11 .......1 11111111 11111111 11111111 ........ +// LE : 0x01f301f3/32702963 * 3 + 0x00ffffff/16777215 +// BE0: 11..1111 1....... 11..1111 1....... 11..1111 1....... 11..1111 1....... +// BE4: 11..1111 1....... 11..1111 1....... 11111111 11111111 11111111 ........ +// BE : 0xcf80cf80/-813641856 * 3 + 0xffffff00/-256 + +typedef struct T18 { + struct T180 { + short a : 2; + short : 2, b : 5; + } a[2][3]; + char b[3]; + char c[]; +} T18; + +T18 t18; + +void __attribute__((cmse_nonsecure_call)) (*g18)(T18); +void f18() { + g18(t18); +} +// CHECK: define {{.*}} @f18() +// CHECK: %[[FN:.*]] = load {{.*}} @g18 +// CHECK-LE: %cmse.clear = and i32 {{.*}}, 32702963 +// CHECK-BE: %cmse.clear = and i32 {{.*}}, -813641856 +// CHECK: %[[R0:.*]] = insertvalue [4 x i32] undef, i32 %cmse.clear, 0 +// CHECK-LE: %cmse.clear1 = and i32 {{.*}}, 32702963 +// CHECK-BE: %cmse.clear1 = and i32 {{.*}}, -813641856 +// CHECK: %[[R1:.*]] = insertvalue [4 x i32] %[[R0]], i32 %cmse.clear1, 1 +// CHECK-LE: %cmse.clear2 = and i32 {{.*}}, 32702963 +// CHECK-BE: %cmse.clear2 = and i32 {{.*}}, -813641856 +// CHECK: %[[R2:.*]] = insertvalue [4 x i32] %[[R1]], i32 %cmse.clear2, 2 +// CHECK-LE: %cmse.clear3 = and i32 {{.*}}, 16777215 +// CHECK-BE: %cmse.clear3 = and i32 {{.*}}, -256 +// CHECK: %[[R3:.*]] = insertvalue [4 x i32] %[[R2]], i32 %cmse.clear3, 3 +// CHECK: call {{.*}} void %[[FN]]([4 x i32] %[[R3]]) + +// LE: 11111111 11111111 ..111... ..111... 0x3838ffff/943259647 +// BE: 11111111 11111111 ...111.. ...111.. 0xffff1c1c/-58340 +typedef union T19 { + short a; + struct T190 { + char : 3, a : 3; + } b[4]; +} T19; + +T19 t19; +void __attribute__((cmse_nonsecure_call)) (*g19)(T19); +void f19() { + g19(t19); +} +// CHECK: define {{.*}} @f19() +// CHECK: %[[FN:.*]] = load {{.*}} @g19 +// CHECK-LE: %cmse.clear = and i32 {{.*}}, 943259647 +// CHECK-BE: %cmse.clear = and i32 {{.*}}, -58340 +// CHECK: %[[R:.*]] = insertvalue [1 x i32] undef, i32 %cmse.clear, 0 +// CHECK: call {{.*}} void %[[FN]]([1 x i32] %[[R]]) + + +typedef struct T20 { + float a[2]; +} T20; + +T20 t20; +void __attribute__((cmse_nonsecure_call)) (*g20)(T20); +void f20() { + g20(t20); +} +// CHECK: define {{.*}} @f20() +// CHECK: %[[FN:.*]] = load {{.*}} @g20 +// CHECK-SOFTFP: call arm_aapcscc void %[[FN]]([2 x i32] +// CHECK-HARDFP: call arm_aapcs_vfpcc void %[[FN]](%struct.T20 diff --git a/clang/test/CodeGen/cmse-clear-fp16.c b/clang/test/CodeGen/cmse-clear-fp16.c new file mode 100644 index 0000000000000..a52190d39c306 --- /dev/null +++ b/clang/test/CodeGen/cmse-clear-fp16.c @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse -S -emit-llvm \ +// RUN: -fallow-half-arguments-and-returns %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOPT-SOFT +// RUN: %clang_cc1 -triple thumbv8m.main -O2 -mcmse -S -emit-llvm \ +// RUN: -fallow-half-arguments-and-returns %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-OPT-SOFT +// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse -S -emit-llvm \ +// RUN: -fallow-half-arguments-and-returns -mfloat-abi hard %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOPT-HARD +// RUN: %clang_cc1 -triple thumbv8m.main -O2 -mcmse -S -emit-llvm \ +// RUN: -fallow-half-arguments-and-returns -mfloat-abi hard %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-OPT-HARD + +__fp16 g0(); +__attribute__((cmse_nonsecure_entry)) __fp16 f0() { + return g0(); +} +// CHECK: define {{.*}}@f0() + +// CHECK-NOPT-SOFT: %[[V0:.*]] = load i32 +// CHECK-NOPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535 +// CHECK-NOPT-SOFT: ret i32 %[[V1]] + +// CHECK-OPT-SOFT: %[[V0:.*]] = tail call {{.*}} @g0 +// CHECK-OPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535 +// CHECK-OPT-SOFT: ret i32 %[[V1]] + +// CHECK-NOPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32 +// CHECK-NOPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535 +// CHECK-NOPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float +// CHECK-NOPT-HARD: ret float %[[V2]] + +// CHECK-OPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32 +// CHECK-OPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535 +// CHECK-OPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float +// CHECK-OPT-HARD: ret float %[[V2]] + +void __attribute__((cmse_nonsecure_call)) (*g1)(__fp16); +__fp16 x; +void f1() { + g1(x); +} +// CHECK: define {{.*}}@f1() + +// CHECK-NOPT-SOFT: %[[V0:.*]] = load i32 +// CHECK-NOPT-SOFT: %[[V1:.*]] = and i32 %[[V0]], 65535 +// CHECK-NOPT-SOFT: call {{.*}} void {{.*}}(i32 %[[V1]]) + +// CHECK-OPT-SOFT: %[[V1:.*]] = zext i16 {{.*}} to i32 +// CHECK-OPT-SOFT: call {{.*}} void {{.*}}(i32 %[[V1]]) + +// CHECK-NOPT-HARD: %[[V0:.*]] = bitcast float {{.*}} to i32 +// CHECK-NOPT-HARD: %[[V1:.*]] = and i32 %[[V0]], 65535 +// CHECK-NOPT-HARD: %[[V2:.*]] = bitcast i32 %[[V1]] to float +// CHECK-NOPT-HARD: call {{.*}}(float %[[V2]]) + +// CHECK-OPT-HARD: %[[V0:.*]] = zext i16 {{.*}} to i32 +// CHECK-OPT-HARD: %[[V1:.*]] = bitcast i32 %[[V0]] to float +// CHECK-OPT-HARD: call {{.*}}(float %[[V1]]) diff --git a/clang/test/CodeGen/cmse-clear-return.c b/clang/test/CodeGen/cmse-clear-return.c new file mode 100644 index 0000000000000..0174e2475ec09 --- /dev/null +++ b/clang/test/CodeGen/cmse-clear-return.c @@ -0,0 +1,265 @@ +// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-LE,CHECK-LE-NOPT,CHECK-SOFT +// RUN: %clang_cc1 -triple thumbebv8m.main -O0 -mcmse -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-BE,CHECK-BE-NOPT,CHECK-SOFT +// RUN: %clang_cc1 -triple thumbv8m.main -O2 -mcmse -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-LE,CHECK-LE-OPT,CHECK-SOFT +// RUN: %clang_cc1 -triple thumbebv8m.main -O2 -mcmse -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-BE,CHECK-BE-OPT,CHECK-SOFT +// RUN: %clang_cc1 -triple thumbv8m.main -O0 -mcmse -S -emit-llvm %s -o - \ +// RUN: -mfloat-abi hard | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-LE,CHECK-LE-NOPT,CHECK-HARD + + +// : Memory layout | Mask +// LE: .......1 ........ ........ ........ | 0x00000001/1 +// BE: 1....... ........ ........ ........ | 0x80000000/-2147483648 +typedef struct T0 { + int a : 1, : 31; +} T0; + +T0 t0; +__attribute__((cmse_nonsecure_entry)) T0 f0() { return t0; } +// CHECK: define {{.*}} @f0() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, 1 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, -2147483648 +// CHECK: ret i32 %[[R]] + +// LE: ......1. ........ ........ ........ 0x00000002/2 +// BE: .1...... ........ ........ ........ 0x40000000/1073741824 +typedef struct T1 { + int : 1, a : 1, : 30; +} T1; + +T1 t1; +__attribute__((cmse_nonsecure_entry)) T1 f1() { return t1; } +// CHECK: define {{.*}} @f1() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, 2 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, 1073741824 +// CHECK: ret i32 %[[R]] + +// LE: ........ .......1 ........ ........ 0x00000100/256 +// BE: ........ 1....... ........ ........ 0x00800000/8388608 +typedef struct T2 { + int : 8, a : 1, : 23; +} T2; + +T2 t2; +__attribute__((cmse_nonsecure_entry)) T2 f2() { return t2; } +// CHECK: define {{.*}} @f2() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, 256 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, 8388608 +// CHECK: ret i32 %[[R]] + +// LE: ........ .....1.. ........ ........ 0x00000400/1024 +// BE: ........ ..1..... ........ ........ 0x00200000/2097152 +typedef struct T3 { + int : 10, a : 1; +} T3; + +T3 t3; +__attribute__((cmse_nonsecure_entry)) T3 f3() { return t3; } +// CHECK: define {{.*}} @f3() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, 1024 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, 2097152 +// CHECK: ret i32 %[[R]] + +// LE: 11111111 ........ ........ ........ 0x000000ff/255 +// BE: 11111111 ........ ........ ........ 0xff000000/-16777216 +typedef struct T4 { + int a : 8, : 24; +} T4; + +T4 t4; +__attribute__((cmse_nonsecure_entry)) T4 f4() { return t4; } +// CHECK: define {{.*}} @f4() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, 255 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, -16777216 +// CHECK: ret i32 %[[R]] + +// LE: 1111111. .......1 ........ ........ 0x000001fe/510 +// BE: .1111111 1....... ........ ........ 0x7f800000/2139095040 +typedef struct T5 { + int : 1, a : 8, : 23; +} T5; + +T5 t5; +__attribute__((cmse_nonsecure_entry)) T5 f5() { return t5; } +// CHECK: define {{.*}} @f5() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, 510 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, 2139095040 +// CHECK: ret i32 %[[R]] + +// LE: 1111111. 11111111 ........ ........ 0x0000fffe/65534 +// BE: .1111111 11111111 ........ ........ 0x7fff0000/2147418112 +typedef struct T6 { + int : 1, a : 15, : 16; +} T6; + +T6 t6; +__attribute__((cmse_nonsecure_entry)) T6 f6() { return t6; } +// CHECK: define {{.*}} @f6() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, 65534 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, 2147418112 +// CHECK: ret i32 %[[R]] + +// LE: 1111111. 11111111 .......1 ........ 0x0001fffe/131070 +// BE: .1111111 11111111 1....... ........ 0x7fff8000/2147450880 +typedef struct T7 { + int : 1, a : 16, : 15; +} T7; + +T7 t7; +__attribute__((cmse_nonsecure_entry)) T7 f7() { return t7; } +// CHECK: define {{.*}} @f7() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, 131070 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, 2147450880 +// CHECK: ret i32 %[[R]] + +// LE: 11111111 111111.. 11111111 11111111 0xfffffcff/-769 +// BE: 11111111 ..111111 11111111 11111111 0xff3fffff/-12582913 +typedef struct T8 { + struct T80 { + char a; + char : 2, b : 6; + } a; + short b; +} T8; + +T8 t8; +__attribute__((cmse_nonsecure_entry)) T8 f8() { return t8; } +// CHECK: define {{.*}} @f8() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, -769 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, -12582913 +// CHECK: ret i32 %[[R]] + +// LE: ......11 ..111111 ...11111 ........ 0x001f3f03/2047747 +// BE: 11...... 111111.. 11111... ........ 0xc0fcf800/-1057163264 +typedef struct T9 { + struct T90 { + char a : 2; + char : 0; + short b : 6; + } a; + int b : 5; +} T9; + +T9 t9; +__attribute__((cmse_nonsecure_entry)) T9 f9() { return t9; } +// CHECK: define {{.*}} @f9() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, 2047747 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, -1057163264 +// CHECK: ret i32 %[[R]] + +T9 f91() { return t9; } +// CHECK: define {{.*}} @f91() +// CHECK: %[[R:.*]] = load i32 +// CHECK: ret i32 %[[R]] + +// LE: 11111111 ........ 11111111 11111111 0xffff00ff/-65281 +// BE: 11111111 ........ 11111111 11111111 0xff00ffff/16711681 +typedef struct T10 { + char a; + short b; +} T10; + +T10 t10; +__attribute__((cmse_nonsecure_entry)) T10 f10() { return t10; } +// CHECK: define {{.*}} @f10() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, -65281 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, -16711681 +// CHECK: ret i32 %[[R]] + +// LE: 11111111 11111111 11111111 ........ 0x00ffffff/16777215 +// BE: 11111111 11111111 11111111 ........ 0xffffff00/-256 +typedef struct T11 { + short a; + char b; +} T11; + +T11 t11; +__attribute__((cmse_nonsecure_entry)) T11 f11() { return t11; } +// CHECK: define {{.*}} @f11() +// CHECK-LE: %[[R:.*]] = and i32 %{{.*}}, 16777215 +// CHECK-BE: %[[R:.*]] = and i32 %{{.*}}, -256 +// CHECK: ret i32 %[[R]] + +// LE: 11111111 11111111 11111111 ........ 0x00ffffff/16777215 +// BE: 11111111 11111111 11111111 ........ 0xffffff00/-256 +typedef struct T12 { + char a[3]; +} T12; + +T12 t12; +__attribute__((cmse_nonsecure_entry)) T12 f12() { return t12; } +// CHECK: define {{.*}} @f12() +// CHECK-LE-OPT: %[[V0:.*]] = load i24, i24* bitcast (%struct.T12* @t12 +// CHECK-LE-OPT: %[[R:.*]] = zext i24 %[[V0]] to i32 +// CHECK-LE-NOPT: %[[R:.*]] = and i32 %{{.*}}, 16777215 + +// CHECK-BE-OPT: %[[V0:.*]] = load i24, i24* bitcast (%struct.T12* @t12 +// CHECK-BE-OPT: %[[V1:.*]] = zext i24 %[[V0]] to i32 +// CHECK-BE-OPT: %[[R:.*]] = shl nuw i32 %[[V1]], 8 +// CHECK: ret i32 %[[R]] + +// LE: 11111111 11111111 11111111 ........ 0x00ffffff/16777215 +// BE: 11111111 11111111 11111111 ........ 0xffffff00/-256 +typedef struct __attribute__((packed)) T13 { + char a; + short b; +} T13; + +T13 t13; +__attribute__((cmse_nonsecure_entry)) T13 f13() { return t13; } +// CHECK: define {{.*}} @f13() +// CHECK-LE-OPT: %[[V0:.*]] = load i24, i24* bitcast (%struct.T13* @t13 +// CHECK-LE-OPT: %[[R:.*]] = zext i24 %[[V0]] to i32 +// CHECK-LE-NOPT: %[[R:.*]] = and i32 %{{.*}}, 16777215 + +// CHECK-BE-OPT: %[[V0:.*]] = load i24, i24* bitcast (%struct.T13* @t13 +// CHECK-BE-OPT: %[[V1:.*]] = zext i24 %[[V0]] to i32 +// CHECK-BE-OPT: %[[R:.*]] = shl nuw i32 %[[V1]], 8 +// CHECK: ret i32 %[[R]] + +typedef struct __attribute__((packed)) T14 { + short a; + short b; +} T14; + +T14 t14; +__attribute__((cmse_nonsecure_entry)) T14 f14() { return t14; } +// CHECK: define {{.*}} @f14() +// CHECK: %[[R:.*]] = load +// CHECK: ret i32 %[[R]] + +// LE: 1111..11 1111..11 11111111 11111111 0xfffff3f3/-3085 +// BE: 11..1111 11..1111 11111111 11111111 0xcfcfffff/-808452097 +typedef struct T17 { + struct T170 { + char a : 2; + char : 2, b : 4; + } a[2]; + char b[2]; + char c[]; +} T17; + +T17 t17; +__attribute__((cmse_nonsecure_entry)) T17 f17() { return t17; } +// CHECK: define {{.*}} @f17() +// CHECK-LE: %[[R:.*]] = and i32 {{.*}}, -3085 +// CHECK-BE: %[[R:.*]] = and i32 {{.*}}, -808452097 +// CHECK: ret i32 %[[R]] + +typedef struct T21 { + float a; +} T21; + +T21 t21; +__attribute__((cmse_nonsecure_entry)) T21 f21() { return t21; } +// CHECK: define {{.*}} @f21() +// CHECK-SOFT: ret i32 +// CHECK-HARD: ret %struct.T21 + +__attribute__((cmse_nonsecure_entry)) float f22() { return 1.0f; } +// CHECK: define {{.*}} @f22() +// CHECK: ret float diff --git a/clang/test/CodeGen/ext-int-cc.c b/clang/test/CodeGen/ext-int-cc.c new file mode 100644 index 0000000000000..41410be654976 --- /dev/null +++ b/clang/test/CodeGen/ext-int-cc.c @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=LIN64 +// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=WIN64 +// RUN: %clang_cc1 -triple i386-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=LIN32 +// RUN: %clang_cc1 -triple i386-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=WIN32 + +// Make sure 128 and 64 bit versions are passed like integers, and that >128 +// is passed indirectly. +void ParamPassing(_ExtInt(129) a, _ExtInt(128) b, _ExtInt(64) c) {} +// LIN64: define void @ParamPassing(i129* byval(i129) align 8 %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i64 %{{.+}}) +// WIN64: define dso_local void @ParamPassing(i129* %{{.+}}, i128* %{{.+}}, i64 %{{.+}}) +// LIN32: define void @ParamPassing(i129* %{{.+}}, i128* %{{.+}}, i64 %{{.+}}) +// WIN32: define dso_local void @ParamPassing(i129* %{{.+}}, i128* %{{.+}}, i64 %{{.+}}) +void ParamPassing2(_ExtInt(129) a, _ExtInt(127) b, _ExtInt(63) c) {} +// LIN64: define void @ParamPassing2(i129* byval(i129) align 8 %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i64 %{{.+}}) +// WIN64: define dso_local void @ParamPassing2(i129* %{{.+}}, i127* %{{.+}}, i63 %{{.+}}) +// LIN32: define void @ParamPassing2(i129* %{{.+}}, i127* %{{.+}}, i63 %{{.+}}) +// WIN32: define dso_local void @ParamPassing2(i129* %{{.+}}, i127* %{{.+}}, i63 %{{.+}}) +_ExtInt(63) ReturnPassing(){} +// LIN64: define i64 @ReturnPassing( +// WIN64: define dso_local i63 @ReturnPassing( +// LIN32: define i63 @ReturnPassing( +// WIN32: define dso_local i63 @ReturnPassing( +_ExtInt(64) ReturnPassing2(){} +// LIN64: define i64 @ReturnPassing2( +// WIN64: define dso_local i64 @ReturnPassing2( +// LIN32: define i64 @ReturnPassing2( +// WIN32: define dso_local i64 @ReturnPassing2( +_ExtInt(127) ReturnPassing3(){} +// LIN64: define { i64, i64 } @ReturnPassing3( +// WIN64: define dso_local void @ReturnPassing3(i127* noalias sret +// LIN32: define i127 @ReturnPassing3( +// WIN32: define dso_local i127 @ReturnPassing3( +_ExtInt(128) ReturnPassing4(){} +// LIN64: define { i64, i64 } @ReturnPassing4( +// WIN64: define dso_local void @ReturnPassing4(i128* noalias sret +// LIN32: define i128 @ReturnPassing4( +// WIN32: define dso_local i128 @ReturnPassing4( +_ExtInt(129) ReturnPassing5(){} +// LIN64: define void @ReturnPassing5(i129* noalias sret +// WIN64: define dso_local void @ReturnPassing5(i129* noalias sret +// LIN32: define i129 @ReturnPassing5( +// WIN32: define dso_local i129 @ReturnPassing5( diff --git a/clang/test/CodeGen/ext-int-sanitizer.cpp b/clang/test/CodeGen/ext-int-sanitizer.cpp index ddf3180e1a1b6..eaaab1b56c34c 100644 --- a/clang/test/CodeGen/ext-int-sanitizer.cpp +++ b/clang/test/CodeGen/ext-int-sanitizer.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-gnu-linux -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-gnu-linux -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s // CHECK: define void @_Z6BoundsRA10_KiU7_ExtIntILi15EEi @@ -56,9 +56,11 @@ void UIntTruncation(unsigned _ExtInt(35) E, unsigned int i, unsigned long long l i = E; // CHECK: %[[LOADE:.+]] = load i35 - // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE]] to i32 + // CHECK: store i35 %[[LOADE]], i35* %[[EADDR:.+]] + // CHECK: %[[LOADE2:.+]] = load i35, i35* %[[EADDR]] + // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE2]] to i32 // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i35 - // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE]] + // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE2]] // CHECK: br i1 %[[CHECK]] // CHECK: call void @__ubsan_handle_implicit_conversion_abort @@ -76,9 +78,11 @@ void IntTruncation(_ExtInt(35) E, unsigned _ExtInt(42) UE, int i, unsigned j) { j = E; // CHECK: %[[LOADE:.+]] = load i35 - // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE]] to i32 + // CHECK: store i35 %[[LOADE]], i35* %[[EADDR:.+]] + // CHECK: %[[LOADE2:.+]] = load i35, i35* %[[EADDR]] + // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE2]] to i32 // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i35 - // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE]] + // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE2]] // CHECK: br i1 %[[CHECK]] // CHECK: call void @__ubsan_handle_implicit_conversion_abort @@ -118,16 +122,19 @@ void IntTruncation(_ExtInt(35) E, unsigned _ExtInt(42) UE, int i, unsigned j) { // CHECK: define void @_Z15SignChangeCheckU7_ExtIntILi39EEjU7_ExtIntILi39EEi void SignChangeCheck(unsigned _ExtInt(39) UE, _ExtInt(39) E) { UE = E; + // CHECK: %[[LOADEU:.+]] = load i39 // CHECK: %[[LOADE:.+]] = load i39 - // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADE]], 0 + // CHECK: store i39 %[[LOADE]], i39* %[[EADDR:.+]] + // CHECK: %[[LOADE2:.+]] = load i39, i39* %[[EADDR]] + // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADE2]], 0 // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 %[[NEG]], false // CHECK: br i1 %[[SIGNCHECK]] // CHECK: call void @__ubsan_handle_implicit_conversion_abort - E = UE; - // CHECK: %[[LOADUE:.+]] = load i39 - // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADUE]], 0 + // CHECK: store i39 %[[LOADE2]], i39* %[[UEADDR:.+]] + // CHECK: %[[LOADUE2:.+]] = load i39, i39* %[[UEADDR]] + // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADUE2]], 0 // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]] // CHECK: br i1 %[[SIGNCHECK]] // CHECK: call void @__ubsan_handle_implicit_conversion_abort @@ -138,8 +145,10 @@ void DivByZero(_ExtInt(11) E, int i) { // Also triggers signed integer overflow. E / E; - // CHECK: %[[E:.+]] = load i11, i11* - // CHECK: %[[E2:.+]] = load i11, i11* + // CHECK: %[[E1LOAD:.+]] = load i11 + // CHECK: store i11 %[[E1LOAD]], i11* %[[EADDR:.+]] + // CHECK: %[[E:.+]] = load i11, i11* %[[EADDR]] + // CHECK: %[[E2:.+]] = load i11, i11* %[[EADDR]] // CHECK: %[[NEZERO:.+]] = icmp ne i11 %[[E2]], 0 // CHECK: %[[NEMIN:.+]] = icmp ne i11 %[[E]], -1024 // CHECK: %[[NENEG1:.+]] = icmp ne i11 %[[E2]], -1 @@ -154,8 +163,10 @@ void DivByZero(_ExtInt(11) E, int i) { // CHECK: define void @_Z6ShiftsU7_ExtIntILi9EEi void Shifts(_ExtInt(9) E) { E >> E; - // CHECK: %[[LHSE:.+]] = load i9, i9* - // CHECK: %[[RHSE:.+]] = load i9, i9* + // CHECK: %[[E1LOAD:.+]] = load i9, i9* + // CHECK: store i9 %[[E1LOAD]], i9* %[[EADDR:.+]] + // CHECK: %[[LHSE:.+]] = load i9, i9* %[[EADDR]] + // CHECK: %[[RHSE:.+]] = load i9, i9* %[[EADDR]] // CHECK: %[[CMP:.+]] = icmp ule i9 %[[RHSE]], 8 // CHECK: br i1 %[[CMP]] // CHECK: call void @__ubsan_handle_shift_out_of_bounds_abort @@ -179,8 +190,10 @@ void SignedIntegerOverflow(_ExtInt(93) BiggestE, _ExtInt(4) SmallestE, _ExtInt(31) JustRightE) { BiggestE + BiggestE; - // CHECK: %[[LOAD1:.+]] = load i93, i93* - // CHECK: %[[LOAD2:.+]] = load i93, i93* + // CHECK: %[[LOADBIGGESTE2:.+]] = load i93 + // CHECK: store i93 %[[LOADBIGGESTE2]], i93* %[[BIGGESTEADDR:.+]] + // CHECK: %[[LOAD1:.+]] = load i93, i93* %[[BIGGESTEADDR]] + // CHECK: %[[LOAD2:.+]] = load i93, i93* %[[BIGGESTEADDR]] // CHECK: %[[OFCALL:.+]] = call { i93, i1 } @llvm.sadd.with.overflow.i93(i93 %[[LOAD1]], i93 %[[LOAD2]]) // CHECK: %[[EXRESULT:.+]] = extractvalue { i93, i1 } %[[OFCALL]], 0 // CHECK: %[[OFRESULT:.+]] = extractvalue { i93, i1 } %[[OFCALL]], 1 @@ -214,8 +227,10 @@ void UnsignedIntegerOverflow(unsigned u, unsigned _ExtInt(23) SmallE, unsigned _ExtInt(35) BigE) { u = SmallE + SmallE; - // CHECK: %[[LOADE1:.+]] = load i23, i23* - // CHECK: %[[LOADE2:.+]] = load i23, i23* + // CHECK: %[[LOADBIGGESTE2:.+]] = load i23 + // CHECK: store i23 %[[LOADBIGGESTE2]], i23* %[[BIGGESTEADDR:.+]] + // CHECK: %[[LOADE1:.+]] = load i23, i23* %[[BIGGESTEADDR]] + // CHECK: %[[LOADE2:.+]] = load i23, i23* %[[BIGGESTEADDR]] // CHECK: %[[OFCALL:.+]] = call { i23, i1 } @llvm.uadd.with.overflow.i23(i23 %[[LOADE1]], i23 %[[LOADE2]]) // CHECK: %[[EXRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 0 // CHECK: %[[OFRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 1 diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c index ef48dd331652b..196bb810b61ab 100644 --- a/clang/test/CodeGen/ext-int.c +++ b/clang/test/CodeGen/ext-int.c @@ -1,6 +1,7 @@ -// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK -// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK - +// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -triple i386-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN32 +// RUN: %clang_cc1 -triple i386-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN32 void GenericTest(_ExtInt(3) a, unsigned _ExtInt(3) b, _ExtInt(4) c) { // CHECK: define {{.*}}void @GenericTest @@ -15,14 +16,14 @@ void GenericTest(_ExtInt(3) a, unsigned _ExtInt(3) b, _ExtInt(4) c) { void VLATest(_ExtInt(3) A, _ExtInt(99) B, _ExtInt(123456) C) { // CHECK: define {{.*}}void @VLATest int AR1[A]; - // CHECK: %[[A:.+]] = zext i3 %{{.+}} to i64 - // CHECK: %[[VLA1:.+]] = alloca i32, i64 %[[A]] + // CHECK: %[[A:.+]] = zext i3 %{{.+}} to i[[INDXSIZE:[0-9]+]] + // CHECK: %[[VLA1:.+]] = alloca i32, i[[INDXSIZE]] %[[A]] int AR2[B]; - // CHECK: %[[B:.+]] = trunc i99 %{{.+}} to i64 - // CHECK: %[[VLA2:.+]] = alloca i32, i64 %[[B]] + // CHECK: %[[B:.+]] = trunc i99 %{{.+}} to i[[INDXSIZE]] + // CHECK: %[[VLA2:.+]] = alloca i32, i[[INDXSIZE]] %[[B]] int AR3[C]; - // CHECK: %[[C:.+]] = trunc i123456 %{{.+}} to i64 - // CHECK: %[[VLA3:.+]] = alloca i32, i64 %[[C]] + // CHECK: %[[C:.+]] = trunc i123456 %{{.+}} to i[[INDXSIZE]] + // CHECK: %[[VLA3:.+]] = alloca i32, i[[INDXSIZE]] %[[C]] } struct S { @@ -32,13 +33,15 @@ struct S { }; void OffsetOfTest() { - // CHECK: define {{.*}}void @OffsetOfTest + // CHECK: define {{.*}}void @OffsetOfTest int A = __builtin_offsetof(struct S,A); // CHECK: store i32 0, i32* %{{.+}} int B = __builtin_offsetof(struct S,B); - // CHECK: store i32 8, i32* %{{.+}} + // CHECK64: store i32 8, i32* %{{.+}} + // LIN32: store i32 4, i32* %{{.+}} + // WINCHECK32: store i32 8, i32* %{{.+}} int C = __builtin_offsetof(struct S,C); - // CHECK: store i32 2097160, i32* %{{.+}} + // CHECK64: store i32 2097160, i32* %{{.+}} + // LIN32: store i32 2097156, i32* %{{.+}} + // WIN32: store i32 2097160, i32* %{{.+}} } - - diff --git a/clang/test/CodeGen/windows-seh-abnormal-exits.c b/clang/test/CodeGen/windows-seh-abnormal-exits.c new file mode 100644 index 0000000000000..971e4f008a412 --- /dev/null +++ b/clang/test/CodeGen/windows-seh-abnormal-exits.c @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -triple x86_64-windows -fms-extensions -Wno-implicit-function-declaration -S -emit-llvm %s -o - | FileCheck %s + +// CHECK: %[[src:[0-9-]+]] = call i8* @llvm.localaddress() +// CHECK-NEXT: %cleanup.dest = load i32, i32* %cleanup.dest.slot, align 4 +// CHECK-NEXT: %[[src2:[0-9-]+]] = icmp ne i32 %cleanup.dest, 0 +// CHECK-NEXT: %[[src3:[0-9-]+]] = zext i1 %[[src2]] to i8 +// CHECK-NEXT: call void @"?fin$0@0@seh_abnormal_exits@@"(i8 %[[src3]], i8* %[[src]]) + +void seh_abnormal_exits(int *Counter) { + for (int i = 0; i < 5; i++) { + __try { + if (i == 0) + continue; // abnormal termination + else if (i == 1) + goto t10; // abnormal termination + else if (i == 2) + __leave; // normal execution + else if (i == 4) + return; // abnormal termination + } + __finally { + if (AbnormalTermination()) { + *Counter += 1; + } + } + t10:; + } + return; // *Counter == 3 +} + diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu index 6e4de1f0f5c3a..8aeb0f759e6c1 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -67,3 +67,10 @@ __global__ void kernel6(struct T t) { t.x[0][0] += 1.f; t.x[1][0] += 2.f; } + +// Check that coerced pointers retain the noalias attribute when qualified with __restrict. +// CHECK: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias %x.coerce) +// HOST: define void @_Z22__device_stub__kernel7Pi(i32* noalias %x) +__global__ void kernel7(int *__restrict x) { + x[0]++; +} diff --git a/clang/test/CodeGenCUDA/hip-pinned-shadow.hip b/clang/test/CodeGenCUDA/hip-pinned-shadow.hip deleted file mode 100644 index 7f0e7544d8284..0000000000000 --- a/clang/test/CodeGenCUDA/hip-pinned-shadow.hip +++ /dev/null @@ -1,27 +0,0 @@ -// REQUIRES: amdgpu-registered-target - -// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 -fvisibility hidden -fapply-global-visibility-to-externs \ -// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=HIPDEV %s -// RUN: %clang_cc1 -triple x86_64 -std=c++11 \ -// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=HIPHOST %s -// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 -fvisibility hidden -fapply-global-visibility-to-externs \ -// RUN: -O3 -emit-llvm -o - %s | FileCheck -check-prefixes=HIPDEVUNSED %s - -struct textureReference { - int a; -}; - -template -struct texture : public textureReference { -texture() { a = 1; } -}; - -__attribute__((hip_pinned_shadow)) texture tex; -// CUDADEV-NOT: @tex -// CUDAHOST-NOT: call i32 @__hipRegisterVar{{.*}}@tex -// HIPDEV: @tex = external addrspace(1) global %struct.texture -// HIPDEV-NOT: declare{{.*}}void @_ZN7textureIfLi2ELi1EEC1Ev -// HIPHOST: define{{.*}}@_ZN7textureIfLi2ELi1EEC1Ev -// HIPHOST: call i32 @__hipRegisterVar{{.*}}@tex{{.*}}i32 0, i32 4, i32 0, i32 0) -// HIPDEVUNSED: @tex = external addrspace(1) global %struct.texture -// HIPDEVUNSED-NOT: declare{{.*}}void @_ZN7textureIfLi2ELi1EEC1Ev diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp index 4e0c58fe1e40d..3e72d3ab66c19 100644 --- a/clang/test/CodeGenCXX/ext-int.cpp +++ b/clang/test/CodeGenCXX/ext-int.cpp @@ -98,7 +98,7 @@ enum AsEnumUnderlyingType : _ExtInt(9) { }; void UnderlyingTypeUsage(AsEnumUnderlyingType Param) { - // LIN: define void @_Z19UnderlyingTypeUsage20AsEnumUnderlyingType(i9 % + // LIN: define void @_Z19UnderlyingTypeUsage20AsEnumUnderlyingType(i16 % // WIN: define dso_local void @"?UnderlyingTypeUsage@@YAXW4AsEnumUnderlyingType@@@Z"(i9 % AsEnumUnderlyingType Var; // CHECK: alloca i9, align 2 @@ -106,13 +106,13 @@ void UnderlyingTypeUsage(AsEnumUnderlyingType Param) { } unsigned _ExtInt(33) ManglingTestRetParam(unsigned _ExtInt(33) Param) { -// LIN: define i33 @_Z20ManglingTestRetParamU7_ExtIntILi33EEj(i33 % +// LIN: define i64 @_Z20ManglingTestRetParamU7_ExtIntILi33EEj(i64 % // WIN: define dso_local i33 @"?ManglingTestRetParam@@YAU?$_UExtInt@$0CB@@__clang@@U12@@Z"(i33 return 0; } _ExtInt(33) ManglingTestRetParam(_ExtInt(33) Param) { -// LIN: define i33 @_Z20ManglingTestRetParamU7_ExtIntILi33EEi(i33 % +// LIN: define i64 @_Z20ManglingTestRetParamU7_ExtIntILi33EEi(i64 % // WIN: define dso_local i33 @"?ManglingTestRetParam@@YAU?$_ExtInt@$0CB@@__clang@@U12@@Z"(i33 return 0; } @@ -155,13 +155,14 @@ void TakesVarargs(int i, ...) { _ExtInt(92) A = __builtin_va_arg(args, _ExtInt(92)); // LIN: %[[AD1:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]] - // LIN: %[[OFA_P1:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD1]], i32 0, i32 2 - // LIN: %[[OFA1:.+]] = load i8*, i8** %[[OFA_P1]] - // LIN: %[[BC1:.+]] = bitcast i8* %[[OFA1]] to i92* - // LIN: %[[OFANEXT1:.+]] = getelementptr i8, i8* %[[OFA1]], i32 16 - // LIN: store i8* %[[OFANEXT1]], i8** %[[OFA_P1]] + // LIN: %[[OFA_P1:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD1]], i32 0, i32 0 + // LIN: %[[GPOFFSET:.+]] = load i32, i32* %[[OFA_P1]] + // LIN: %[[FITSINGP:.+]] = icmp ule i32 %[[GPOFFSET]], 32 + // LIN: br i1 %[[FITSINGP]] + // LIN: %[[BC1:.+]] = phi i92* // LIN: %[[LOAD1:.+]] = load i92, i92* %[[BC1]] // LIN: store i92 %[[LOAD1]], i92* + // WIN: %[[CUR1:.+]] = load i8*, i8** %[[ARGS]] // WIN: %[[NEXT1:.+]] = getelementptr inbounds i8, i8* %[[CUR1]], i64 16 // WIN: store i8* %[[NEXT1]], i8** %[[ARGS]] @@ -171,15 +172,16 @@ void TakesVarargs(int i, ...) { _ExtInt(31) B = __builtin_va_arg(args, _ExtInt(31)); // LIN: %[[AD2:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]] - // LIN: %[[OFA_P2:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD2]], i32 0, i32 2 - // LIN: %[[OFA2:.+]] = load i8*, i8** %[[OFA_P2]] - // LIN: %[[BC2:.+]] = bitcast i8* %[[OFA2]] to i31* - // LIN: %[[OFANEXT2:.+]] = getelementptr i8, i8* %[[OFA2]], i32 8 - // LIN: store i8* %[[OFANEXT2]], i8** %[[OFA_P2]] - // LIN: %[[LOAD2:.+]] = load i31, i31* %[[BC2]] - // LIN: store i31 %[[LOAD2]], i31* + // LIN: %[[OFA_P2:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD2]], i32 0, i32 0 + // LIN: %[[GPOFFSET:.+]] = load i32, i32* %[[OFA_P2]] + // LIN: %[[FITSINGP:.+]] = icmp ule i32 %[[GPOFFSET]], 40 + // LIN: br i1 %[[FITSINGP]] + // LIN: %[[BC1:.+]] = phi i31* + // LIN: %[[LOAD1:.+]] = load i31, i31* %[[BC1]] + // LIN: store i31 %[[LOAD1]], i31* + // WIN: %[[CUR2:.+]] = load i8*, i8** %[[ARGS]] - // WIN: %[[NEXT2:.+]] = getelementptr inbounds i8, i8* %[[CUR2]], i64 8 + // WIN: %[[NEXT2:.+]] = getelementptr inbounds i8, i8* %[[CUR2]], i64 8 // WIN: store i8* %[[NEXT2]], i8** %[[ARGS]] // WIN: %[[BC2:.+]] = bitcast i8* %[[CUR2]] to i31* // WIN: %[[LOADV2:.+]] = load i31, i31* %[[BC2]] @@ -187,13 +189,14 @@ void TakesVarargs(int i, ...) { _ExtInt(16) C = __builtin_va_arg(args, _ExtInt(16)); // LIN: %[[AD3:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]] - // LIN: %[[OFA_P3:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD3]], i32 0, i32 2 - // LIN: %[[OFA3:.+]] = load i8*, i8** %[[OFA_P3]] - // LIN: %[[BC3:.+]] = bitcast i8* %[[OFA3]] to i16* - // LIN: %[[OFANEXT3:.+]] = getelementptr i8, i8* %[[OFA3]], i32 8 - // LIN: store i8* %[[OFANEXT3]], i8** %[[OFA_P3]] - // LIN: %[[LOAD3:.+]] = load i16, i16* %[[BC3]] - // LIN: store i16 %[[LOAD3]], i16* + // LIN: %[[OFA_P3:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD3]], i32 0, i32 0 + // LIN: %[[GPOFFSET:.+]] = load i32, i32* %[[OFA_P3]] + // LIN: %[[FITSINGP:.+]] = icmp ule i32 %[[GPOFFSET]], 40 + // LIN: br i1 %[[FITSINGP]] + // LIN: %[[BC1:.+]] = phi i16* + // LIN: %[[LOAD1:.+]] = load i16, i16* %[[BC1]] + // LIN: store i16 %[[LOAD1]], i16* + // WIN: %[[CUR3:.+]] = load i8*, i8** %[[ARGS]] // WIN: %[[NEXT3:.+]] = getelementptr inbounds i8, i8* %[[CUR3]], i64 8 // WIN: store i8* %[[NEXT3]], i8** %[[ARGS]] @@ -210,8 +213,9 @@ void TakesVarargs(int i, ...) { // LIN: store i8* %[[OFANEXT4]], i8** %[[OFA_P4]] // LIN: %[[LOAD4:.+]] = load i129, i129* %[[BC4]] // LIN: store i129 %[[LOAD4]], i129* + // WIN: %[[CUR4:.+]] = load i8*, i8** %[[ARGS]] - // WIN: %[[NEXT4:.+]] = getelementptr inbounds i8, i8* %[[CUR4]], i64 24 + // WIN: %[[NEXT4:.+]] = getelementptr inbounds i8, i8* %[[CUR4]], i64 24 // WIN: store i8* %[[NEXT4]], i8** %[[ARGS]] // WIN: %[[BC4:.+]] = bitcast i8* %[[CUR4]] to i129* // WIN: %[[LOADV4:.+]] = load i129, i129* %[[BC4]] @@ -226,6 +230,7 @@ void TakesVarargs(int i, ...) { // LIN: store i8* %[[OFANEXT5]], i8** %[[OFA_P5]] // LIN: %[[LOAD5:.+]] = load i16777200, i16777200* %[[BC5]] // LIN: store i16777200 %[[LOAD5]], i16777200* + // WIN: %[[CUR5:.+]] = load i8*, i8** %[[ARGS]] // WIN: %[[NEXT5:.+]] = getelementptr inbounds i8, i8* %[[CUR5]], i64 2097152 // WIN: store i8* %[[NEXT5]], i8** %[[ARGS]] @@ -268,7 +273,7 @@ void typeid_tests() { } void ExplicitCasts() { - // LIN: define void @_Z13ExplicitCastsv() + // LIN: define void @_Z13ExplicitCastsv() // WIN: define dso_local void @"?ExplicitCasts@@YAXXZ"() _ExtInt(33) a; @@ -292,7 +297,7 @@ struct S { }; void OffsetOfTest() { - // LIN: define void @_Z12OffsetOfTestv() + // LIN: define void @_Z12OffsetOfTestv() // WIN: define dso_local void @"?OffsetOfTest@@YAXXZ"() auto A = __builtin_offsetof(S,A); @@ -318,7 +323,7 @@ void ShiftExtIntByConstant(_ExtInt(28) Ext) { // UB in C/C++, Defined in OpenCL. Ext << 29; - // CHECK: shl i28 %{{.+}}, 29 + // CHECK: shl i28 %{{.+}}, 29 Ext >> 29; // CHECK: ashr i28 %{{.+}}, 29 } diff --git a/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll b/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll new file mode 100644 index 0000000000000..597fe94885e34 --- /dev/null +++ b/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll @@ -0,0 +1,31 @@ +; REQUIRES: amdgpu-registered-target +; RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -S -o - %s 2>&1 | FileCheck %s + +; Check that a DiagnosticUnsupported reported as a warning works +; correctly, and is not emitted as an error. + +; CHECK: warning: test.c:2:20: in function use_lds_global_in_func i32 (): local memory global used by non-kernel function + +target triple = "amdgcn-amd-amdhsa" + +@lds = external addrspace(3) global i32, align 4 + +define i32 @use_lds_global_in_func() !dbg !5 { + %load = load i32, i32 addrspace(3)* @lds, !dbg !9 + ret i32 %load, !dbg !10 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !6, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!6 = !DISubroutineType(types: !7) +!7 = !{!8} +!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !DILocation(line: 2, column: 20, scope: !5) +!10 = !DILocation(line: 2, column: 13, scope: !5) diff --git a/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/i386-gnu/bin/as b/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/i386-gnu/bin/as new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/i386-gnu/bin/ld b/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/i386-gnu/bin/ld new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/i386-gnu/lib/.keep b/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/i386-gnu/lib/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0/crtbegin.o b/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0/crtbegin.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/basic_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0/crtbegin.o b/clang/test/Driver/Inputs/basic_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0/crtbegin.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/basic_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0/crtbeginS.o b/clang/test/Driver/Inputs/basic_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0/crtbeginS.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/basic_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0/crtbeginT.o b/clang/test/Driver/Inputs/basic_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0/crtbeginT.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c index cf12a51556892..d77ab3782838a 100644 --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -283,6 +283,20 @@ // ARM64-A64FX: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "a64fx" // ARM64-A64FX-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic" +// RUN: %clang -target aarch64 -mcpu=carmel -### -c %s 2>&1 | FileCheck -check-prefix=CARMEL %s +// RUN: %clang -target aarch64 -mlittle-endian -mcpu=carmel -### -c %s 2>&1 | FileCheck -check-prefix=CARMEL %s +// RUN: %clang -target aarch64 -mtune=carmel -### -c %s 2>&1 | FileCheck -check-prefix=CARMEL-TUNE %s +// RUN: %clang -target aarch64 -mlittle-endian -mtune=carmel -### -c %s 2>&1 | FileCheck -check-prefix=CARMEL-TUNE %s +// CARMEL: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "carmel" +// CARMEL-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" + +// RUN: %clang -target arm64 -mcpu=carmel -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CARMEL %s +// RUN: %clang -target arm64 -mlittle-endian -mcpu=carmel -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CARMEL %s +// RUN: %clang -target arm64 -mtune=carmel -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CARMEL-TUNE %s +// RUN: %clang -target arm64 -mlittle-endian -mtune=carmel -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CARMEL-TUNE %s +// ARM64-CARMEL: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "carmel" +// ARM64-CARMEL-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic" + // RUN: %clang -target aarch64_be -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC-BE %s // RUN: %clang -target aarch64 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC-BE %s // RUN: %clang -target aarch64_be -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC-BE %s diff --git a/clang/test/Driver/aarch64-fixed-x-register.c b/clang/test/Driver/aarch64-fixed-x-register.c index ed8e7c2013dbf..52f62e68ef597 100644 --- a/clang/test/Driver/aarch64-fixed-x-register.c +++ b/clang/test/Driver/aarch64-fixed-x-register.c @@ -94,6 +94,10 @@ // RUN: FileCheck --check-prefix=CHECK-FIXED-X28 < %t %s // CHECK-FIXED-X28: "-target-feature" "+reserve-x28" +// RUN: %clang -target aarch64-none-gnu -ffixed-x30 -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-FIXED-X30 < %t %s +// CHECK-FIXED-X30: "-target-feature" "+reserve-x30" + // Test multiple of reserve-x# options together. // RUN: %clang -target aarch64-none-gnu \ // RUN: -ffixed-x1 \ diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index 67744d95e6e9c..848a532b24f93 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -67,11 +67,11 @@ // RUN: %clang_cl -### /FA -fprofile-instr-generate -- %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-INSTR-GENERATE %s // RUN: %clang_cl -### /FA -fprofile-instr-generate=/tmp/somefile.profraw -- %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-INSTR-GENERATE-FILE %s -// CHECK-PROFILE-INSTR-GENERATE: "-fprofile-instrument=clang" "--dependent-lib={{[^"]*}}clang_rt.profile-{{[^"]*}}.lib" +// CHECK-PROFILE-INSTR-GENERATE: "-fprofile-instrument=clang" "--dependent-lib=clang_rt.profile-{{[^"]*}}.lib" // CHECK-PROFILE-INSTR-GENERATE-FILE: "-fprofile-instrument-path=/tmp/somefile.profraw" // RUN: %clang_cl -### /FA -fprofile-generate -- %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-GENERATE %s -// CHECK-PROFILE-GENERATE: "-fprofile-instrument=llvm" "--dependent-lib={{[^"]*}}clang_rt.profile-{{[^"]*}}.lib" +// CHECK-PROFILE-GENERATE: "-fprofile-instrument=llvm" "--dependent-lib=clang_rt.profile-{{[^"]*}}.lib" // RUN: %clang_cl -### /FA -fprofile-instr-generate -fprofile-instr-use -- %s 2>&1 | FileCheck -check-prefix=CHECK-NO-MIX-GEN-USE %s // RUN: %clang_cl -### /FA -fprofile-instr-generate -fprofile-instr-use=file -- %s 2>&1 | FileCheck -check-prefix=CHECK-NO-MIX-GEN-USE %s diff --git a/clang/test/Driver/fsanitize-blacklist.c b/clang/test/Driver/fsanitize-blacklist.c index 2d3ef3f6eee70..254421cc34be9 100644 --- a/clang/test/Driver/fsanitize-blacklist.c +++ b/clang/test/Driver/fsanitize-blacklist.c @@ -59,8 +59,16 @@ // CHECK-ONLY-FIRST-DISABLED: -fsanitize-blacklist={{.*}}.second // CHECK-ONLY_FIRST-DISABLED-NOT: good +// -fno-sanitize-blacklist disables the system blacklists. +// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fno-sanitize-blacklist %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLED-SYSTEM --check-prefix=DELIMITERS +// CHECK-DISABLED-SYSTEM-NOT: -fsanitize-system-blacklist + // If cfi_blacklist.txt cannot be found in the resource dir, driver should fail. -// RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi -resource-dir=/dev/null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING-CFI-BLACKLIST +// RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi -flto -fvisibility=default -resource-dir=/dev/null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING-CFI-BLACKLIST // CHECK-MISSING-CFI-BLACKLIST: error: no such file or directory: '{{.*}}cfi_blacklist.txt' +// -fno-sanitize-blacklist disables checking for cfi_blacklist.txt in the resource dir. +// RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi -flto -fvisibility=default -fno-sanitize-blacklist -resource-dir=/dev/null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING-CFI-NO-BLACKLIST +// CHECK-MISSING-CFI-NO-BLACKLIST-NOT: error: no such file or directory: '{{.*}}cfi_blacklist.txt' + // DELIMITERS: {{^ *"}} diff --git a/clang/test/Driver/hurd.c b/clang/test/Driver/hurd.c index a6ca8ea3379f6..7dbbe923557b7 100644 --- a/clang/test/Driver/hurd.c +++ b/clang/test/Driver/hurd.c @@ -11,7 +11,7 @@ // CHECK: "-internal-externc-isystem" "[[SYSROOT]]/usr/include" // CHECK: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" // CHECK: "-dynamic-linker" "/lib/ld.so" -// CHECK: "crtbegin.o" +// CHECK: "{{.*}}/usr/lib/gcc/i386-gnu/4.6.0{{/|\\\\}}crtbegin.o" // CHECK: "-L[[SYSROOT]]/lib/i386-gnu" // CHECK: "-L[[SYSROOT]]/lib/../lib32" // CHECK: "-L[[SYSROOT]]/usr/lib/i386-gnu" @@ -33,7 +33,7 @@ // CHECK-STATIC: "-internal-externc-isystem" "[[SYSROOT]]/usr/include" // CHECK-STATIC: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" // CHECK-STATIC: "-static" -// CHECK-STATIC: "crtbeginT.o" +// CHECK-STATIC: "{{.*}}/usr/lib/gcc/i386-gnu/4.6.0{{/|\\\\}}crtbeginT.o" // CHECK-STATIC: "-L[[SYSROOT]]/lib/i386-gnu" // CHECK-STATIC: "-L[[SYSROOT]]/lib/../lib32" // CHECK-STATIC: "-L[[SYSROOT]]/usr/lib/i386-gnu" @@ -53,10 +53,21 @@ // CHECK-SHARED: "-internal-externc-isystem" "[[SYSROOT]]/include" // CHECK-SHARED: "-internal-externc-isystem" "[[SYSROOT]]/usr/include" // CHECK-SHARED: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" -// CHECK-SHARED: "crtbeginS.o" +// CHECK-SHARED: "{{.*}}/usr/lib/gcc/i386-gnu/4.6.0{{/|\\\\}}crtbeginS.o" // CHECK-SHARED: "-L[[SYSROOT]]/lib/i386-gnu" // CHECK-SHARED: "-L[[SYSROOT]]/lib/../lib32" // CHECK-SHARED: "-L[[SYSROOT]]/usr/lib/i386-gnu" // CHECK-SHARED: "-L[[SYSROOT]]/usr/lib/../lib32" // CHECK-SHARED: "-L[[SYSROOT]]/lib" // CHECK-SHARED: "-L[[SYSROOT]]/usr/lib" + +// RUN: %clang -### -o %t %s 2>&1 -no-integrated-as -fuse-ld=ld \ +// RUN: --gcc-toolchain=%S/Inputs/basic_cross_hurd_tree/usr \ +// RUN: --target=i386-pc-gnu \ +// RUN: | FileCheck --check-prefix=CHECK-CROSS %s +// CHECK-CROSS-NOT: warning: +// CHECK-CROSS: "-cc1" "-triple" "i386-pc-hurd-gnu" +// CHECK-CROSS: "{{.*}}/Inputs/basic_cross_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0/../../../../i386-gnu/bin{{/|\\\\}}as" "--32" +// CHECK-CROSS: "{{.*}}/Inputs/basic_cross_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0/../../../../i386-gnu/bin{{/|\\\\}}ld" {{.*}} "-m" "elf_i386" +// CHECK-CROSS: "{{.*}}/Inputs/basic_cross_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0{{/|\\\\}}crtbegin.o" +// CHECK-CROSS: "-L{{.*}}/Inputs/basic_cross_hurd_tree/usr/lib/gcc/i386-gnu/4.6.0/../../../../i386-gnu/lib" diff --git a/clang/test/Driver/sanitizer-ld.c b/clang/test/Driver/sanitizer-ld.c index 82a033fda96f0..52a432ca18034 100644 --- a/clang/test/Driver/sanitizer-ld.c +++ b/clang/test/Driver/sanitizer-ld.c @@ -656,16 +656,16 @@ // RUN: -target x86_64-pc-windows \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-CFI-STATS-WIN64 %s -// CHECK-CFI-STATS-WIN64: "--dependent-lib={{[^"]*}}clang_rt.stats_client-x86_64.lib" -// CHECK-CFI-STATS-WIN64: "--dependent-lib={{[^"]*}}clang_rt.stats-x86_64.lib" +// CHECK-CFI-STATS-WIN64: "--dependent-lib=clang_rt.stats_client-x86_64.lib" +// CHECK-CFI-STATS-WIN64: "--dependent-lib=clang_rt.stats-x86_64.lib" // CHECK-CFI-STATS-WIN64: "--linker-option=/include:__sanitizer_stats_register" // RUN: %clang -fsanitize=cfi -fsanitize-stats %s -### -o %t.o 2>&1 \ // RUN: -target i686-pc-windows \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-CFI-STATS-WIN32 %s -// CHECK-CFI-STATS-WIN32: "--dependent-lib={{[^"]*}}clang_rt.stats_client-i386.lib" -// CHECK-CFI-STATS-WIN32: "--dependent-lib={{[^"]*}}clang_rt.stats-i386.lib" +// CHECK-CFI-STATS-WIN32: "--dependent-lib=clang_rt.stats_client-i386.lib" +// CHECK-CFI-STATS-WIN32: "--dependent-lib=clang_rt.stats-i386.lib" // CHECK-CFI-STATS-WIN32: "--linker-option=/include:___sanitizer_stats_register" // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ diff --git a/clang/test/Headers/x86intrin-2.c b/clang/test/Headers/x86intrin-2.c index 90475c658fceb..1bce603e7634a 100644 --- a/clang/test/Headers/x86intrin-2.c +++ b/clang/test/Headers/x86intrin-2.c @@ -1,10 +1,11 @@ -// RUN: %clang_cc1 -fsyntax-only -ffreestanding -Wcast-qual %s -verify -// RUN: %clang_cc1 -fsyntax-only -ffreestanding -flax-vector-conversions=none -Wcast-qual %s -verify -// RUN: %clang_cc1 -fsyntax-only -ffreestanding -Wcast-qual -x c++ %s -verify +// RUN: %clang_cc1 -triple i386-unknown-unknown -fsyntax-only -ffreestanding -Wcast-qual %s -verify +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -ffreestanding -Wcast-qual %s -verify +// RUN: %clang_cc1 -triple i386-unknown-unknown -fsyntax-only -ffreestanding -flax-vector-conversions=none -Wcast-qual %s -verify +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -ffreestanding -flax-vector-conversions=none -Wcast-qual %s -verify +// RUN: %clang_cc1 -triple i386-unknown-unknown -fsyntax-only -ffreestanding -Wcast-qual -x c++ %s -verify +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -ffreestanding -Wcast-qual -x c++ %s -verify // expected-no-diagnostics -#if defined(i386) || defined(__x86_64__) - // Include the metaheader that includes all x86 intrinsic headers. #include @@ -133,5 +134,3 @@ __m128 __attribute__((__target__("f16c"))) mm_cvtph_ps_wrap(__m128i a) { int __attribute__((__target__("rtm"))) xtest_wrap(void) { return _xtest(); } - -#endif diff --git a/clang/test/Index/cxx17-if-with-initializer.cpp b/clang/test/Index/cxx17-if-with-initializer.cpp new file mode 100644 index 0000000000000..fb34434117d53 --- /dev/null +++ b/clang/test/Index/cxx17-if-with-initializer.cpp @@ -0,0 +1,17 @@ +// Test is line- and column-sensitive; see below. + +void foo() { + if (bool bar = true; bar) { + } +} + +// RUN: c-index-test -test-load-source all -std=c++17 %s | FileCheck -check-prefix=CHECK-LOAD %s +// CHECK-LOAD: cxx17-if-with-initializer.cpp:3:6: FunctionDecl=foo:3:6 (Definition) Extent=[3:1 - 6:2] +// CHECK-LOAD: cxx17-if-with-initializer.cpp:3:12: CompoundStmt= Extent=[3:12 - 6:2] +// CHECK-LOAD: cxx17-if-with-initializer.cpp:4:3: IfStmt= Extent=[4:3 - 5:4] +// CHECK-LOAD: cxx17-if-with-initializer.cpp:4:7: DeclStmt= Extent=[4:7 - 4:23] +// CHECK-LOAD: cxx17-if-with-initializer.cpp:4:12: VarDecl=bar:4:12 (Definition) Extent=[4:7 - 4:22] +// CHECK-LOAD: cxx17-if-with-initializer.cpp:4:18: CXXBoolLiteralExpr= Extent=[4:18 - 4:22] +// CHECK-LOAD: cxx17-if-with-initializer.cpp:4:24: UnexposedExpr=bar:4:12 Extent=[4:24 - 4:27] +// CHECK-LOAD: cxx17-if-with-initializer.cpp:4:24: DeclRefExpr=bar:4:12 Extent=[4:24 - 4:27] +// CHECK-LOAD: cxx17-if-with-initializer.cpp:4:29: CompoundStmt= Extent=[4:29 - 5:4] diff --git a/clang/test/Index/cxx17-structured-binding.cpp b/clang/test/Index/cxx17-structured-binding.cpp new file mode 100644 index 0000000000000..3fbd262eadae6 --- /dev/null +++ b/clang/test/Index/cxx17-structured-binding.cpp @@ -0,0 +1,25 @@ +// Test is line- and column-sensitive; see below. +int main() { + int a[2] = {1, 2}; + auto [x, y] = a; +} + +// RUN: c-index-test -test-load-source all -std=c++17 %s | FileCheck -check-prefix=CHECK-LOAD %s +// CHECK-LOAD: cxx17-structured-binding.cpp:2:5: FunctionDecl=main:2:5 (Definition) Extent=[2:1 - 5:2] +// CHECK-LOAD: cxx17-structured-binding.cpp:2:12: CompoundStmt= Extent=[2:12 - 5:2] +// CHECK-LOAD: cxx17-structured-binding.cpp:3:3: DeclStmt= Extent=[3:3 - 3:21] +// CHECK-LOAD: cxx17-structured-binding.cpp:3:7: VarDecl=a:3:7 (Definition) Extent=[3:3 - 3:20] +// CHECK-LOAD: cxx17-structured-binding.cpp:3:9: IntegerLiteral= Extent=[3:9 - 3:10] +// CHECK-LOAD: cxx17-structured-binding.cpp:3:14: InitListExpr= Extent=[3:14 - 3:20] +// CHECK-LOAD: cxx17-structured-binding.cpp:3:15: IntegerLiteral= Extent=[3:15 - 3:16] +// CHECK-LOAD: cxx17-structured-binding.cpp:3:18: IntegerLiteral= Extent=[3:18 - 3:19] +// CHECK-LOAD: cxx17-structured-binding.cpp:4:3: DeclStmt= Extent=[4:3 - 4:19] +// CHECK-LOAD: cxx17-structured-binding.cpp:4:8: UnexposedDecl=[x, y]:4:8 (Definition) Extent=[4:3 - 4:18] +// CHECK-LOAD: cxx17-structured-binding.cpp:4:9: UnexposedDecl=x:4:9 (Definition) Extent=[4:9 - 4:10] +// CHECK-LOAD: cxx17-structured-binding.cpp:4:12: UnexposedDecl=y:4:12 (Definition) Extent=[4:12 - 4:13] +// CHECK-LOAD: cxx17-structured-binding.cpp:4:17: UnexposedExpr= Extent=[4:17 - 4:18] +// CHECK-LOAD: cxx17-structured-binding.cpp:4:17: DeclRefExpr=a:3:7 Extent=[4:17 - 4:18] +// CHECK-LOAD: cxx17-structured-binding.cpp:4:17: UnexposedExpr= Extent=[4:17 - 4:9] +// CHECK-LOAD: cxx17-structured-binding.cpp:4:17: ArraySubscriptExpr= Extent=[4:17 - 4:9] +// CHECK-LOAD: cxx17-structured-binding.cpp:4:17: UnexposedExpr=a:3:7 Extent=[4:17 - 4:18] +// CHECK-LOAD: cxx17-structured-binding.cpp:4:17: DeclRefExpr=a:3:7 Extent=[4:17 - 4:18] diff --git a/clang/test/Lexer/case-insensitive-include-ms.c b/clang/test/Lexer/case-insensitive-include-ms.c index 1726ba5730bd4..eff7309733eba 100644 --- a/clang/test/Lexer/case-insensitive-include-ms.c +++ b/clang/test/Lexer/case-insensitive-include-ms.c @@ -11,10 +11,14 @@ #include "..\Output\.\case-insensitive-include.h" #include "..\Output\.\Case-Insensitive-Include.h" // expected-warning {{non-portable path}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:50}:"\"..\\Output\\.\\case-insensitive-include.h\"" +#include "..\\Output\.\\Case-Insensitive-Include.h" // expected-warning {{non-portable path}} +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:52}:"\"..\\\\Output\\.\\\\case-insensitive-include.h\"" #include "..\output\.\case-insensitive-include.h" // expected-warning {{non-portable path}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:50}:"\"..\\Output\\.\\case-insensitive-include.h\"" #include "apath\..\.\case-insensitive-include.h" #include "apath\..\.\Case-Insensitive-Include.h" // expected-warning {{non-portable path}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:49}:"\"apath\\..\\.\\case-insensitive-include.h\"" +#include "apath\\..\\.\\Case-Insensitive-Include.h" // expected-warning {{non-portable path}} +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:52}:"\"apath\\\\..\\\\.\\\\case-insensitive-include.h\"" #include "APath\..\.\case-insensitive-include.h" // For the sake of efficiency, this case is not diagnosed. :-( diff --git a/clang/test/Lexer/case-insensitive-include-pr31836.sh b/clang/test/Lexer/case-insensitive-include-pr31836.sh index e842badc7f28e..b60e6ca6ff2bd 100644 --- a/clang/test/Lexer/case-insensitive-include-pr31836.sh +++ b/clang/test/Lexer/case-insensitive-include-pr31836.sh @@ -1,5 +1,4 @@ // REQUIRES: case-insensitive-filesystem -// UNSUPPORTED: system-windows // RUN: mkdir -p %t // RUN: touch %t/case-insensitive-include-pr31836.h diff --git a/clang/test/Lexer/case-insensitive-include.c b/clang/test/Lexer/case-insensitive-include.c index 236159bec5bb8..46f7085105378 100644 --- a/clang/test/Lexer/case-insensitive-include.c +++ b/clang/test/Lexer/case-insensitive-include.c @@ -18,11 +18,11 @@ #include "Case-Insensitive-Include.h" // expected-warning {{non-portable path}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:38}:"\"case-insensitive-include.h\"" -#include "../Output/./case-insensitive-include.h" -#include "../Output/./Case-Insensitive-Include.h" // expected-warning {{non-portable path}} -// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:50}:"\"../Output/./case-insensitive-include.h\"" -#include "../output/./case-insensitive-include.h" // expected-warning {{non-portable path}} -// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:50}:"\"../Output/./case-insensitive-include.h\"" +#include "../Output/.//case-insensitive-include.h" +#include "../Output/.//Case-Insensitive-Include.h" // expected-warning {{non-portable path}} +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:51}:"\"../Output/.//case-insensitive-include.h\"" +#include "../output/.//case-insensitive-include.h" // expected-warning {{non-portable path}} +// CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:51}:"\"../Output/.//case-insensitive-include.h\"" #include "apath/.././case-insensitive-include.h" #include "apath/.././Case-Insensitive-Include.h" // expected-warning {{non-portable path}} diff --git a/clang/test/Lexer/char8_t.cpp b/clang/test/Lexer/char8_t.cpp index 20f820e24015f..d65597c68d8bc 100644 --- a/clang/test/Lexer/char8_t.cpp +++ b/clang/test/Lexer/char8_t.cpp @@ -1,5 +1,14 @@ -// RUN: %clang_cc1 -std=c++2a -verify %s -// RUN: %clang_cc1 -std=c++2a -verify %s -fchar8_t +// RUN: %clang_cc1 -std=c++20 -verify %s -DCHAR8_T +// RUN: %clang_cc1 -std=c++20 -verify %s -fchar8_t -DCHAR8_T +// RUN: %clang_cc1 -std=c++17 -verify %s -fchar8_t -DCHAR8_T + +// RUN: %clang_cc1 -std=c++17 -verify %s +// RUN: %clang_cc1 -std=c++17 -verify %s -fno-char8_t +// RUN: %clang_cc1 -std=c++20 -verify %s -fno-char8_t + +#if defined(__cpp_char8_t) != defined(CHAR8_T) +#error wrong setting for __cpp_char_t +#endif #if defined(__cpp_char8_t) && __is_identifier(char8_t) #error char8_t is an identifier under -fchar8_t diff --git a/clang/test/Misc/warning-flags.c b/clang/test/Misc/warning-flags.c index 05172b2208596..9c28d3adcf93d 100644 --- a/clang/test/Misc/warning-flags.c +++ b/clang/test/Misc/warning-flags.c @@ -18,7 +18,7 @@ This test serves two purposes: The list of warnings below should NEVER grow. It should gradually shrink to 0. -CHECK: Warnings without flags (74): +CHECK: Warnings without flags (75): CHECK-NEXT: ext_excess_initializers CHECK-NEXT: ext_excess_initializers_in_char_array_initializer CHECK-NEXT: ext_expected_semi_decl_list @@ -56,6 +56,7 @@ CHECK-NEXT: warn_dup_category_def CHECK-NEXT: warn_enum_value_overflow CHECK-NEXT: warn_expected_qualified_after_typename CHECK-NEXT: warn_extraneous_char_constant +CHECK-NEXT: warn_fe_backend_unsupported CHECK-NEXT: warn_fe_cc_log_diagnostics_failure CHECK-NEXT: warn_fe_cc_print_header_failure CHECK-NEXT: warn_fe_macro_contains_embedded_newline diff --git a/clang/test/Modules/auto-module-import.m b/clang/test/Modules/auto-module-import.m index f6127adcbd895..7b90e867d4370 100644 --- a/clang/test/Modules/auto-module-import.m +++ b/clang/test/Modules/auto-module-import.m @@ -18,7 +18,7 @@ #ifdef ERRORS Module *mod; // expected-error{{declaration of 'Module' must be imported from module 'Module' before it is required}} -// expected-note@Inputs/Module.framework/Headers/Module.h:15 {{previous}} +// expected-note@Inputs/Module.framework/Headers/Module.h:15 {{not visible}} #else #import // expected-warning{{treating #import as an import of module 'AlsoDependsOnModule'}} #endif @@ -29,7 +29,7 @@ void testSubframeworkOther() { #ifdef ERRORS double *sfo1 = sub_framework_other; // expected-error{{declaration of 'sub_framework_other' must be imported from module 'DependsOnModule.SubFramework.Other'}} - // expected-note@Inputs/DependsOnModule.framework/Frameworks/SubFramework.framework/Headers/Other.h:15 {{previous}} + // expected-note@Inputs/DependsOnModule.framework/Frameworks/SubFramework.framework/Headers/Other.h:15 {{not visible}} #endif } @@ -73,7 +73,7 @@ void testModuleSubFrameworkAgain() { int getNoUmbrellaAPrivate() { return no_umbrella_A_private; } int getNoUmbrellaBPrivateFail() { return no_umbrella_B_private; } // expected-error{{declaration of 'no_umbrella_B_private' must be imported from module 'NoUmbrella.Private.B_Private'}} -// expected-note@Inputs/NoUmbrella.framework/PrivateHeaders/B_Private.h:1 {{previous}} +// expected-note@Inputs/NoUmbrella.framework/PrivateHeaders/B_Private.h:1 {{not visible}} // Test inclusion of headers that are under an umbrella directory but // not actually part of the module. diff --git a/clang/test/Modules/cxx-templates.cpp b/clang/test/Modules/cxx-templates.cpp index c085eb5f676cc..5bb3ca3380516 100644 --- a/clang/test/Modules/cxx-templates.cpp +++ b/clang/test/Modules/cxx-templates.cpp @@ -83,7 +83,7 @@ void g() { // a big spew of errors here. // // expected-error@Inputs/cxx-templates-a.h:19 {{definition of 'DefinedInBImpl' must be imported}} - // expected-note@Inputs/cxx-templates-b-impl.h:1 +{{definition is here}} + // expected-note@Inputs/cxx-templates-b-impl.h:1 +{{definition here is not reachable}} // expected-error@Inputs/cxx-templates-a.h:19 +{{}} // expected-error@Inputs/cxx-templates-a.h:20 +{{}} PerformDelayedLookup(defined_in_b_impl); // expected-note {{in instantiation of}} @@ -106,8 +106,8 @@ void g() { TemplateInstantiationVisibility tiv1; TemplateInstantiationVisibility tiv2; TemplateInstantiationVisibility tiv3; // expected-error 5{{must be imported from module 'cxx_templates_b_impl'}} - // expected-note@cxx-templates-b-impl.h:10 3{{explicit specialization declared here}} - // expected-note@cxx-templates-b-impl.h:10 2{{previous definition is here}} + // expected-note@cxx-templates-b-impl.h:10 3{{explicit specialization declared here is not reachable}} + // expected-note@cxx-templates-b-impl.h:10 2{{definition here is not reachable}} TemplateInstantiationVisibility tiv4; int &p = WithPartialSpecializationUse().f(); diff --git a/clang/test/Modules/decldef.m b/clang/test/Modules/decldef.m index 784743ff6e79b..1fb9bdf584554 100644 --- a/clang/test/Modules/decldef.m +++ b/clang/test/Modules/decldef.m @@ -2,7 +2,7 @@ // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fobjc-arc -I %S/Inputs -fmodules-cache-path=%t %s -verify -DUSE_EARLY // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fobjc-arc -I %S/Inputs -fmodules-cache-path=%t %s -verify -// expected-note@Inputs/def.h:5 {{previous}} +// expected-note@Inputs/def.h:5 {{here}} @class Def; Def *def; @@ -16,7 +16,7 @@ // expected-error@-2{{must use 'struct' tag to refer to type 'B'}} #else // expected-error@-4{{declaration of 'B' must be imported from module 'decldef.Decl' before it is required}} -// expected-note@Inputs/decl.h:2 {{previous}} +// expected-note@Inputs/decl.h:2 {{not visible}} #endif @import decldef.Decl; diff --git a/clang/test/Modules/decldef.mm b/clang/test/Modules/decldef.mm index ab271fc2ba95c..e8f070b511591 100644 --- a/clang/test/Modules/decldef.mm +++ b/clang/test/Modules/decldef.mm @@ -4,9 +4,9 @@ // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fobjc-arc -I %S/Inputs -fmodules-cache-path=%t %s -verify -DUSE_3 -DUSE_4 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fobjc-arc -I %S/Inputs -fmodules-cache-path=%t %s -verify -DUSE_4 -// expected-note@Inputs/def.h:5 0-1{{previous}} -// expected-note@Inputs/def.h:16 0-1{{previous}} -// expected-note@Inputs/def-include.h:11 0-1{{previous}} +// expected-note@Inputs/def.h:5 0-1{{here}} +// expected-note@Inputs/def.h:16 0-1{{here}} +// expected-note@Inputs/def-include.h:11 0-1{{here}} @class Def; Def *def; diff --git a/clang/test/Modules/diagnose-missing-import.m b/clang/test/Modules/diagnose-missing-import.m index e0c0457e63572..2c67e01944a9b 100644 --- a/clang/test/Modules/diagnose-missing-import.m +++ b/clang/test/Modules/diagnose-missing-import.m @@ -8,7 +8,7 @@ void foo() { XYZLogEvent(xyzRiskyCloseOpenParam, xyzRiskyCloseOpenParam); // expected-error {{implicit declaration of function 'XYZLogEvent'}} expected-error {{declaration of 'XYZLogEvent' must be imported}} expected-error {{declaration of 'xyzRiskyCloseOpenParam' must be imported from module 'NCI.A'}} expected-error {{declaration of 'xyzRiskyCloseOpenParam' must be imported from module 'NCI.A'}} } -// expected-note@Inputs/diagnose-missing-import/a.h:5 {{previous declaration is here}} -// expected-note@Inputs/diagnose-missing-import/a.h:5 {{previous declaration is here}} -// expected-note@Inputs/diagnose-missing-import/a.h:6 {{previous declaration is here}} +// expected-note@Inputs/diagnose-missing-import/a.h:5 {{declaration here is not visible}} +// expected-note@Inputs/diagnose-missing-import/a.h:5 {{declaration here is not visible}} +// expected-note@Inputs/diagnose-missing-import/a.h:6 {{declaration here is not visible}} diff --git a/clang/test/Modules/interface-diagnose-missing-import.m b/clang/test/Modules/interface-diagnose-missing-import.m index 5bbac36423006..c455b1501c9cc 100644 --- a/clang/test/Modules/interface-diagnose-missing-import.m +++ b/clang/test/Modules/interface-diagnose-missing-import.m @@ -8,4 +8,4 @@ @interface Buggy @interface Buggy (MyExt) // expected-error {{definition of 'Buggy' must be imported from module 'Foo' before it is required}} @end -// expected-note@Foo/RandoPriv.h:3{{previous definition is here}} +// expected-note@Foo/RandoPriv.h:3{{definition here is not reachable}} diff --git a/clang/test/Modules/ms-enums.cpp b/clang/test/Modules/ms-enums.cpp index b3a377c6fa634..da75d152ffa40 100644 --- a/clang/test/Modules/ms-enums.cpp +++ b/clang/test/Modules/ms-enums.cpp @@ -2,8 +2,8 @@ // RUN: %clang_cc1 -triple x86_64-windows-msvc -fms-extensions -fms-compatibility -x c++ -std=c++20 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/ms-enums %s -verify -fno-modules-error-recovery #include "B.h" -// expected-note@A.h:1 {{previous declaration is here}} -// expected-note@A.h:1 2 {{previous definition is here}} +// expected-note@A.h:1 {{declaration here is not visible}} +// expected-note@A.h:1 2{{definition here is not reachable}} fwd_enum gv_enum; // expected-error {{must be imported}} diff --git a/clang/test/Modules/no-module-map.cpp b/clang/test/Modules/no-module-map.cpp index 46ca38af7eed3..81533417e856a 100644 --- a/clang/test/Modules/no-module-map.cpp +++ b/clang/test/Modules/no-module-map.cpp @@ -30,7 +30,7 @@ #error A_H should not be defined #endif // expected-error@+3 {{must be imported from}} -// expected-note@* {{previous declaration}} +// expected-note@* {{declaration}} #endif void use_a() { a(); } @@ -43,6 +43,6 @@ void use_a() { a(); } #error B_H should not be defined #endif // expected-error@+3 {{must be imported from}} -// expected-note@* {{previous declaration}} +// expected-note@* {{declaration}} #endif void use_b() { b(); } diff --git a/clang/test/Modules/normal-module-map.cpp b/clang/test/Modules/normal-module-map.cpp index 5db1f3f33e947..6b8befb77293a 100644 --- a/clang/test/Modules/normal-module-map.cpp +++ b/clang/test/Modules/normal-module-map.cpp @@ -25,7 +25,7 @@ int testNestedUmbrellaA() { int testNestedUmbrellaBFail() { return nested_umbrella_b; // expected-error@-1{{declaration of 'nested_umbrella_b' must be imported from module 'nested_umbrella.b' before it is required}} - // expected-note@Inputs/normal-module-map/nested_umbrella/b.h:1{{previous}} + // expected-note@Inputs/normal-module-map/nested_umbrella/b.h:1{{here}} } @import nested_umbrella.b; diff --git a/clang/test/Modules/stddef.c b/clang/test/Modules/stddef.c index 16de854563fca..c33f3643112bb 100644 --- a/clang/test/Modules/stddef.c +++ b/clang/test/Modules/stddef.c @@ -5,8 +5,8 @@ ptrdiff_t pdt; -size_t st; // expected-error {{must be imported}} -// expected-note@stddef.h:* {{previous}} +size_t st; // expected-error {{missing '#include "include_again.h"'; 'size_t' must be declared before it is used}} +// expected-note@stddef.h:* {{here}} #include "include_again.h" diff --git a/clang/test/Modules/subframeworks.m b/clang/test/Modules/subframeworks.m index ce35415717d2c..c4cf85a2babfe 100644 --- a/clang/test/Modules/subframeworks.m +++ b/clang/test/Modules/subframeworks.m @@ -6,7 +6,7 @@ void testSubFramework() { float *sf1 = sub_framework; // expected-error{{declaration of 'sub_framework' must be imported from module 'DependsOnModule.SubFramework' before it is required}} - // expected-note@Inputs/DependsOnModule.framework/Frameworks/SubFramework.framework/Headers/SubFramework.h:2 {{previous}} + // expected-note@Inputs/DependsOnModule.framework/Frameworks/SubFramework.framework/Headers/SubFramework.h:2 {{here}} } @import DependsOnModule.SubFramework; diff --git a/clang/test/Modules/submodule-visibility-cycles.cpp b/clang/test/Modules/submodule-visibility-cycles.cpp index a01fe562b14f5..4fecdb98abd1f 100644 --- a/clang/test/Modules/submodule-visibility-cycles.cpp +++ b/clang/test/Modules/submodule-visibility-cycles.cpp @@ -3,7 +3,7 @@ #include "cycle1.h" C1 c1; -C2 c2; // expected-error {{must be imported}} +C2 c2; // expected-error {{missing '#include "cycle2.h"'; 'C2' must be declared}} // expected-note@cycle2.h:6 {{here}} #include "cycle2.h" diff --git a/clang/test/Modules/submodule-visibility.cpp b/clang/test/Modules/submodule-visibility.cpp index 4c066e6ab9b09..cae18d41ad7b5 100644 --- a/clang/test/Modules/submodule-visibility.cpp +++ b/clang/test/Modules/submodule-visibility.cpp @@ -22,7 +22,7 @@ // The use of -fmodule-name=x causes us to textually include the above headers. // The submodule visibility rules are still applied in this case. // -// expected-error@b.h:1 {{declaration of 'n' must be imported from module 'x.a'}} +// expected-error@b.h:1 {{missing '#include "a.h"'; 'n' must be declared}} // expected-note@a.h:1 {{here}} #endif diff --git a/clang/test/Modules/submodules-merge-defs.cpp b/clang/test/Modules/submodules-merge-defs.cpp index 9e1ac6ceef289..777fe6936a43b 100644 --- a/clang/test/Modules/submodules-merge-defs.cpp +++ b/clang/test/Modules/submodules-merge-defs.cpp @@ -15,67 +15,55 @@ #endif A pre_a; -#ifdef IMPORT_USE_2 -// expected-error-re@-2 {{must be imported from one of {{.*}}stuff.use{{.*}}stuff.use-2}} -#elif EARLY_INDIRECT_INCLUDE -// expected-error@-4 {{must be imported from module 'merged-defs'}} -#else -// expected-error@-6 {{must be imported from module 'stuff.use'}} -#endif +// expected-error-re@-1 {{missing '#include "{{.*}}-defs.h"'; 'A' must be declared}} // expected-note@defs.h:1 +{{here}} extern class A pre_a2; -int pre_use_a = use_a(pre_a2); // expected-error 2{{'A' must be imported}} expected-error {{'use_a' must be imported}} +int pre_use_a = use_a(pre_a2); // expected-error 2{{'A' must be defined}} expected-error {{'use_a' must be declared}} // expected-note@defs.h:2 +{{here}} -B::Inner2 pre_bi; // expected-error +{{must be imported}} +B::Inner2 pre_bi; // expected-error +{{must be declared}} expected-error +{{must be defined}} // expected-note@defs.h:4 +{{here}} // expected-note@defs.h:17 +{{here}} -void pre_bfi(B b) { // expected-error +{{must be imported}} +void pre_bfi(B b) { // expected-error +{{must be declared}} b.f(); } -C_Base<1> pre_cb1; // expected-error +{{must be imported}} +C_Base<1> pre_cb1; // expected-error +{{must be declared}} expected-error +{{must be defined}} // expected-note@defs.h:23 +{{here}} -C1 pre_c1; // expected-error +{{must be imported}} +C1 pre_c1; // expected-error +{{must be declared}} // expected-note@defs.h:25 +{{here}} -C2 pre_c2; // expected-error +{{must be imported}} +C2 pre_c2; // expected-error +{{must be declared}} // expected-note@defs.h:26 +{{here}} -D::X pre_dx; // expected-error +{{must be imported}} +D::X pre_dx; // expected-error +{{must be declared}} expected-error +{{must be defined}} // expected-note@defs.h:28 +{{here}} // expected-note@defs.h:29 +{{here}} int pre_use_dx = use_dx(pre_dx); // ignored; pre_dx is invalid -int pre_e = E(0); // expected-error {{must be imported}} +int pre_e = E(0); // expected-error {{must be declared}} // expected-note@defs.h:32 +{{here}} -int pre_ff = F().f(); // expected-error +{{must be imported}} -int pre_fg = F().g(); // expected-error +{{must be imported}} +int pre_ff = F().f(); // expected-error +{{must be declared}} +int pre_fg = F().g(); // expected-error +{{must be declared}} // expected-note@defs.h:34 +{{here}} -G::A pre_ga // expected-error +{{must be imported}} - = G::a; // expected-error +{{must be imported}} +G::A pre_ga // expected-error +{{must be declared}} + = G::a; // expected-error +{{must be declared}} // expected-note@defs.h:49 +{{here}} // expected-note@defs.h:50 +{{here}} -decltype(G::h) pre_gh = G::h; // expected-error +{{must be imported}} +decltype(G::h) pre_gh = G::h; // expected-error +{{must be declared}} expected-error +{{must be defined}} // expected-note@defs.h:51 +{{here}} -int pre_h = H(); // expected-error +{{must be imported}} +int pre_h = H(); // expected-error +{{must be declared}} // expected-note@defs.h:56 +{{here}} -using pre_i = I<>; // expected-error +{{must be imported}} +using pre_i = I<>; // expected-error +{{must be declared}} expected-error +{{default argument of 'I' must be defined}} // expected-note@defs.h:57 +{{here}} -J<> pre_j; // expected-error {{declaration of 'J' must be imported}} -#ifdef IMPORT_USE_2 -// expected-error-re@-2 {{default argument of 'J' must be imported from one of {{.*}}stuff.use-2{{.*}}stuff.use}} -#elif EARLY_INDIRECT_INCLUDE -// expected-error@-4 {{default argument of 'J' must be imported from module 'merged-defs'}} -#else -// expected-error@-6 {{default argument of 'J' must be imported from module 'stuff.use'}} -#endif +J<> pre_j; // expected-error {{'J' must be declared}} +// expected-error-re@-1 {{missing '#include "{{.*}}.h"'; default argument of 'J' must be defined before it is used}} // expected-note@defs.h:58 +{{here}} -ScopedEnum pre_scopedenum; // expected-error {{must be imported}} +ScopedEnum pre_scopedenum; // expected-error {{must be declared}} // expected-note@defs.h:105 0-1{{here}} // expected-note@defs.h:106 0-1{{here}} enum ScopedEnum : int; diff --git a/clang/test/Modules/submodules.cpp b/clang/test/Modules/submodules.cpp index a3dde23cb5b5e..e4a31db0e5339 100644 --- a/clang/test/Modules/submodules.cpp +++ b/clang/test/Modules/submodules.cpp @@ -8,8 +8,8 @@ vector vi; // Note: remove_reference is not visible yet. remove_reference::type *int_ptr = 0; // expected-error{{declaration of 'remove_reference' must be imported from module 'std.type_traits' before it is required}} -// expected-note@Inputs/submodules/type_traits.h:2{{previous}} -// expected-note@Inputs/submodules/hash_map.h:1{{previous}} +// expected-note@Inputs/submodules/type_traits.h:2{{not visible}} +// expected-note@Inputs/submodules/hash_map.h:1{{not visible}} @import std.typetraits; // expected-error{{no submodule named 'typetraits' in module 'std'; did you mean 'type_traits'?}} diff --git a/clang/test/Modules/suggest-include.cpp b/clang/test/Modules/suggest-include.cpp index e10c3f38aba28..1e53ef05d5f83 100644 --- a/clang/test/Modules/suggest-include.cpp +++ b/clang/test/Modules/suggest-include.cpp @@ -3,23 +3,25 @@ #include "empty.h" // import the module file -// expected-note@usetextual1.h:2 {{previous}} -// expected-note@textual2.h:1 {{previous}} -// expected-note@textual3.h:1 {{previous}} -// expected-note@textual4.h:1 {{previous}} -// expected-note@textual5.h:1 {{previous}} -// expected-note@private1.h:1 {{previous}} -// expected-note@private2.h:1 {{previous}} -// expected-note@private3.h:1 {{previous}} +// expected-note@usetextual1.h:2 {{here}} +// expected-note@textual2.h:1 {{here}} +// expected-note@textual3.h:1 {{here}} +// expected-note@textual4.h:1 {{here}} +// expected-note@textual5.h:1 {{here}} +// expected-note@private1.h:1 {{here}} +// expected-note@private2.h:1 {{here}} +// expected-note@private3.h:1 {{here}} void f() { (void)::usetextual1; // expected-error {{missing '#include "usetextual1.h"'}} (void)::usetextual2; // expected-error {{missing '#include "usetextual2.h"'}} (void)::textual3; // expected-error-re {{{{^}}missing '#include "usetextual3.h"'}} - // Don't suggest a #include that includes the entity via a path that leaves - // the module. In that case we can't be sure that we've picked the right header. - (void)::textual4; // expected-error-re {{{{^}}declaration of 'textual4'}} - (void)::textual5; // expected-error-re {{{{^}}declaration of 'textual5'}} + // If the declaration is in an include-guarded header, make sure we suggest + // including that rather than importing a module. In this case, there could + // be more than one module, and the module name we picked is almost certainly + // wrong. + (void)::textual4; // expected-error {{missing '#include "usetextual4.h"'; 'textual4' must be declared before it is used}} + (void)::textual5; // expected-error {{missing '#include "usetextual5.h"'; 'textual5' must be declared before it is used}} // Don't suggest #including a private header. // FIXME: We could suggest including "useprivate1.h" here, as it's the only diff --git a/clang/test/Modules/tag-injection.c b/clang/test/Modules/tag-injection.c index 5bb15477e2e24..727616f5d8621 100644 --- a/clang/test/Modules/tag-injection.c +++ b/clang/test/Modules/tag-injection.c @@ -14,5 +14,5 @@ void f(struct a *p); // "compatible types" rule. void g(struct b *p); -struct b b; // expected-error {{definition of 'b' must be imported from module 'X.b' before it is required}} +struct b b; // expected-error {{'b' must be defined before it is used}} // expected-note@b.h:1 {{here}} diff --git a/clang/test/Modules/tag-injection.cpp b/clang/test/Modules/tag-injection.cpp index e55598b06202b..dca520e921a10 100644 --- a/clang/test/Modules/tag-injection.cpp +++ b/clang/test/Modules/tag-injection.cpp @@ -21,5 +21,5 @@ namespace N { }; } -X x; // expected-error {{definition of 'X' must be imported from module 'X.b' before it is required}} +X x; // expected-error {{'X' must be defined before it is used}} // expected-note@b.h:1 {{here}} diff --git a/clang/test/Modules/template-default-args.cpp b/clang/test/Modules/template-default-args.cpp index c51cb28408844..85b2a18d9e506 100644 --- a/clang/test/Modules/template-default-args.cpp +++ b/clang/test/Modules/template-default-args.cpp @@ -37,10 +37,10 @@ extern C<> c; D<> d; E<> e; F<> f; -G<> g; // expected-error {{default argument of 'G' must be imported from module 'X.A' before it is required}} -// expected-note@a.h:7 {{default argument declared here}} -H<> h; // expected-error {{default argument of 'H' must be imported from module 'X.A' before it is required}} -// expected-note@a.h:8 {{default argument declared here}} +G<> g; // expected-error {{missing '#include "a.h"'; default argument of 'G' must be defined before it is used}} +// expected-note@a.h:7 {{default argument declared here is not reachable}} +H<> h; // expected-error {{missing '#include "a.h"'; default argument of 'H' must be defined before it is used}} +// expected-note@a.h:8 {{default argument declared here is not reachable}} I<> i; L<> *l; END diff --git a/clang/test/Modules/undefined-type-fixit1.cpp b/clang/test/Modules/undefined-type-fixit1.cpp index 3b73457107265..8cb8a3c55de51 100644 --- a/clang/test/Modules/undefined-type-fixit1.cpp +++ b/clang/test/Modules/undefined-type-fixit1.cpp @@ -5,8 +5,8 @@ #include "public2.h" #include "public2sub.h" -use_this1 client_variable1; // expected-error{{declaration of 'use_this1' must be imported from module 'public1' before it is required}} +use_this1 client_variable1; // expected-error{{'use_this1' must be declared}} use_this2 client_variable2; use_this2sub client_variable2sub; -// expected-note@Inputs/undefined-type-fixit/public1.h:4 {{previous declaration is here}} +// expected-note@Inputs/undefined-type-fixit/public1.h:4 {{declaration here is not visible}} diff --git a/clang/test/Modules/visibility-in-instantiation.cpp b/clang/test/Modules/visibility-in-instantiation.cpp index 8689758a42ba2..81ddfd6baaf38 100644 --- a/clang/test/Modules/visibility-in-instantiation.cpp +++ b/clang/test/Modules/visibility-in-instantiation.cpp @@ -47,5 +47,5 @@ void g() { ST::f(); // expected-error {{must be imported from module 'M.B'}} foo(X()); // expected-error {{must be imported from module 'M.C'}} - // expected-note@* 2{{previous declaration is here}} + // expected-note@* 2{{declaration here is not visible}} } diff --git a/clang/test/OpenMP/aarch64_vfabi_NarrowestDataSize.c b/clang/test/OpenMP/aarch64_vfabi_NarrowestDataSize.c new file mode 100644 index 0000000000000..d65c4edaeea70 --- /dev/null +++ b/clang/test/OpenMP/aarch64_vfabi_NarrowestDataSize.c @@ -0,0 +1,82 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fopenmp-simd -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s + +// REQUIRES: aarch64-registered-target +// Note: -fopemp and -fopenmp-simd behavior are expected to be the same. + +// This test checks the values of Narrowest Data Size (NDS), as defined in +// https://github.com/ARM-software/abi-aa/tree/master/vfabia64 +// +// NDS is used to compute the token in the name of AdvSIMD +// vector functions when no `simdlen` is specified, with the rule: +// +// if NDS(f) = 1, then VLEN = 16, 8; +// if NDS(f) = 2, then VLEN = 8, 4; +// if NDS(f) = 4, then VLEN = 4, 2; +// if NDS(f) = 8 or NDS(f) = 16, then VLEN = 2. + +// NDS(NDS_is_sizeof_char) = 1 +#pragma omp declare simd notinbranch +char NDS_is_sizeof_char(short in); +// CHECK-DAG: _ZGVnN16v_NDS_is_sizeof_char +// CHECK-DAG: _ZGVnN8v_NDS_is_sizeof_char +// CHECK-NOT: _ZGV{{.*}}_NDS_is_sizeof_char + +// NDS(NDS_is_sizeof_short) = 2 +#pragma omp declare simd notinbranch +int NDS_is_sizeof_short(short in); +// CHECK-DAG: _ZGVnN8v_NDS_is_sizeof_short +// CHECK-DAG: _ZGVnN4v_NDS_is_sizeof_short +// CHECK-NOT: _ZGV{{.*}}_NDS_is_sizeof_short + +// NDS(NDS_is_sizeof_float_with_linear) = 4, and not 2, because the pointers are +// marked as `linear` and therefore the size of the pointee realizes +// the NDS. +#pragma omp declare simd linear(sin) notinbranch +void NDS_is_sizeof_float_with_linear(double in, float *sin); +// Neon accepts only power of 2 values as . +// CHECK-DAG: _ZGVnN4vl4_NDS_is_sizeof_float_with_linear +// CHECK-DAG: _ZGVnN2vl4_NDS_is_sizeof_float_with_linear +// CHECK-NOT: _ZGV{{.*}}_NDS_is_sizeof_float_with_linear + +// NDS(NDS_is_size_of_float) = 4 +#pragma omp declare simd notinbranch +double NDS_is_size_of_float(float in); +// CHECK-DAG: _ZGVnN4v_NDS_is_size_of_float +// CHECK-DAG: _ZGVnN2v_NDS_is_size_of_float +// CHECK-NOT: _ZGV{{.*}}_NDS_is_size_of_float + +// NDS(NDS_is_sizeof_double) = 8 +#pragma omp declare simd linear(sin) notinbranch +void NDS_is_sizeof_double(double in, double *sin); +// CHECK-DAG: _ZGVnN2vl8_NDS_is_sizeof_double +// CHECK-NOT: _ZGV{{.*}}_NDS_is_sizeof_double + +// NDS(double_complex) = 16 +#pragma omp declare simd notinbranch +double _Complex double_complex(double _Complex); +// CHECK-DAG: _ZGVnN2v_double_complex +// CHECK-NOT: _ZGV{{.*}}_double_complex + +// NDS(double_complex_linear_char) = 1, becasue `x` is marked linear. +#pragma omp declare simd linear(x) notinbranch +double _Complex double_complex_linear_char(double _Complex y, char *x); +// CHECK-DAG: _ZGVnN8vl_double_complex_linear_char +// CHECK-DAG: _ZGVnN16vl_double_complex_linear_char +// CHECK-NOT: _ZGV{{.*}}_double_complex_linear_char + +static float *F; +static double *D; +static short S; +static int I; +static char C; +static double _Complex DC; +void do_something() { + C = NDS_is_sizeof_char(S); + I = NDS_is_sizeof_short(S); + NDS_is_sizeof_float_with_linear(*D, F); + *D = NDS_is_size_of_float(*F); + NDS_is_sizeof_double(*D, D); + DC = double_complex(DC); + DC = double_complex_linear_char(DC, &C); +} diff --git a/clang/test/OpenMP/aarch64_vfabi_WidestDataSize.c b/clang/test/OpenMP/aarch64_vfabi_WidestDataSize.c new file mode 100644 index 0000000000000..841a64053e5e3 --- /dev/null +++ b/clang/test/OpenMP/aarch64_vfabi_WidestDataSize.c @@ -0,0 +1,78 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve -fopenmp -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +sve -fopenmp-simd -x c -emit-llvm %s -o - -femit-all-decls | FileCheck %s + +// REQUIRES: aarch64-registered-target +// Note: -fopemp and -fopenmp-simd behavior are expected to be the same. + +// This test checks the values of Widest Data Size (WDS), as defined +// in https://github.com/ARM-software/abi-aa/tree/master/vfabia64 +// +// WDS is used to check the accepted values of `simdlen()` when +// targeting fixed-length SVE vector function names. The values of +// `` that are accepted are such that for X = WDS * * 8, +// 128-bit <= X <= 2048-bit and X is a multiple of 128-bit. + +#pragma omp declare simd simdlen(8) +#pragma omp declare simd simdlen(16) +#pragma omp declare simd simdlen(256) +#pragma omp declare simd simdlen(272) +char WDS_is_sizeof_char(char in); +// WDS = 1, simdlen(8) and simdlen(272) are not generated. +// CHECK-DAG: _ZGVsM16v_WDS_is_sizeof_char +// CHECK-DAG: _ZGVsM256v_WDS_is_sizeof_char +// CHECK-NOT: _ZGV{{.*}}_WDS_is_sizeof_char + +#pragma omp declare simd simdlen(4) +#pragma omp declare simd simdlen(8) +#pragma omp declare simd simdlen(128) +#pragma omp declare simd simdlen(136) +char WDS_is_sizeof_short(short in); +// WDS = 2, simdlen(4) and simdlen(136) are not generated. +// CHECK-DAG: _ZGVsM8v_WDS_is_sizeof_short +// CHECK-DAG: _ZGVsM128v_WDS_is_sizeof_short +// CHECK-NOT: _ZGV{{.*}}_WDS_is_sizeof_short + +#pragma omp declare simd linear(sin) notinbranch simdlen(2) +#pragma omp declare simd linear(sin) notinbranch simdlen(4) +#pragma omp declare simd linear(sin) notinbranch simdlen(64) +#pragma omp declare simd linear(sin) notinbranch simdlen(68) +void WDS_is_sizeof_float_pointee(float in, float *sin); +// WDS = 4, simdlen(2) and simdlen(68) are not generated. +// CHECK-DAG: _ZGVsM4vl4_WDS_is_sizeof_float_pointee +// CHECK-DAG: _ZGVsM64vl4_WDS_is_sizeof_float_pointee +// CHECK-NOT: _ZGV{{.*}}_WDS_is_sizeof_float_pointee + +#pragma omp declare simd linear(sin) notinbranch simdlen(2) +#pragma omp declare simd linear(sin) notinbranch simdlen(4) +#pragma omp declare simd linear(sin) notinbranch simdlen(32) +#pragma omp declare simd linear(sin) notinbranch simdlen(34) +void WDS_is_sizeof_double_pointee(float in, double *sin); +// WDS = 8 because of the linear clause, simdlen(34) is not generated. +// CHECK-DAG: _ZGVsM2vl8_WDS_is_sizeof_double_pointee +// CHECK-DAG: _ZGVsM4vl8_WDS_is_sizeof_double_pointee +// CHECK-DAG: _ZGVsM32vl8_WDS_is_sizeof_double_pointee +// CHECK-NOT: _ZGV{{.*}}_WDS_is_sizeof_double_pointee + +#pragma omp declare simd simdlen(2) +#pragma omp declare simd simdlen(4) +#pragma omp declare simd simdlen(32) +#pragma omp declare simd simdlen(34) +double WDS_is_sizeof_double(double in); +// WDS = 8, simdlen(34) is not generated. +// CHECK-DAG: _ZGVsM2v_WDS_is_sizeof_double +// CHECK-DAG: _ZGVsM4v_WDS_is_sizeof_double +// CHECK-DAG: _ZGVsM32v_WDS_is_sizeof_double +// CHECK-NOT: _ZGV{{.*}}_WDS_is_sizeof_double + +static char C; +static short S; +static float F; +static double D; + +void do_something() { + C = WDS_is_sizeof_char(C); + C = WDS_is_sizeof_short(S); + WDS_is_sizeof_float_pointee(F, &F); + WDS_is_sizeof_double_pointee(F, &D); + D = WDS_is_sizeof_double(D); +} diff --git a/clang/test/OpenMP/declare_simd_aarch64.c b/clang/test/OpenMP/declare_simd_aarch64.c index 4af2ad9bb6039..21c83c225963f 100644 --- a/clang/test/OpenMP/declare_simd_aarch64.c +++ b/clang/test/OpenMP/declare_simd_aarch64.c @@ -130,12 +130,12 @@ double constlinear(const int i); /*************************/ #pragma omp declare simd linear(sin) linear(cos) void sincos(double in, double *sin, double *cos); -// AARCH64: "_ZGVnN2vll_sincos" +// AARCH64: "_ZGVnN2vl8l8_sincos" // AARCH64-NOT: sincos #pragma omp declare simd linear(sin : 1) linear(cos : 2) void SinCos(double in, double *sin, double *cos); -// AARCH64: "_ZGVnN2vll2_SinCos" +// AARCH64: "_ZGVnN2vl8l16_SinCos" // AARCH64-NOT: SinCos // Selection of tests based on the examples provided in chapter 5 of @@ -158,7 +158,7 @@ int foo3(int *x, int i, unsigned char c); // Listing 6, p. 19 #pragma omp declare simd linear(x) aligned(x : 16) simdlen(4) int foo4(int *x, float y); -// AARCH64: "_ZGVnM4la16v_foo4" "_ZGVnN4la16v_foo4" +// AARCH64: "_ZGVnM4l4a16v_foo4" "_ZGVnN4l4a16v_foo4" // AARCH64-NOT: foo4 static int *I; diff --git a/clang/test/OpenMP/declare_simd_codegen.cpp b/clang/test/OpenMP/declare_simd_codegen.cpp index ef7ad8772e517..1967f3b248dc5 100644 --- a/clang/test/OpenMP/declare_simd_codegen.cpp +++ b/clang/test/OpenMP/declare_simd_codegen.cpp @@ -136,14 +136,14 @@ double constlinear(const int i) { return 0.0; } // CHECK-DAG: declare {{.+}}@_Z5add_2Pf( // CHECK-DAG: define {{.+}}@_Z11constlineari( -// CHECK-DAG: "_ZGVbM4l8__Z5add_1Pf" -// CHECK-DAG: "_ZGVbN4l8__Z5add_1Pf" -// CHECK-DAG: "_ZGVcM8l8__Z5add_1Pf" -// CHECK-DAG: "_ZGVcN8l8__Z5add_1Pf" -// CHECK-DAG: "_ZGVdM8l8__Z5add_1Pf" -// CHECK-DAG: "_ZGVdN8l8__Z5add_1Pf" -// CHECK-DAG: "_ZGVeM16l8__Z5add_1Pf" -// CHECK-DAG: "_ZGVeN16l8__Z5add_1Pf" +// CHECK-DAG: "_ZGVbM4l32__Z5add_1Pf" +// CHECK-DAG: "_ZGVbN4l32__Z5add_1Pf" +// CHECK-DAG: "_ZGVcM8l32__Z5add_1Pf" +// CHECK-DAG: "_ZGVcN8l32__Z5add_1Pf" +// CHECK-DAG: "_ZGVdM8l32__Z5add_1Pf" +// CHECK-DAG: "_ZGVdN8l32__Z5add_1Pf" +// CHECK-DAG: "_ZGVeM16l32__Z5add_1Pf" +// CHECK-DAG: "_ZGVeN16l32__Z5add_1Pf" // CHECK-DAG: "_ZGVbM32v__Z5add_1Pf" // CHECK-DAG: "_ZGVcM32v__Z5add_1Pf" // CHECK-DAG: "_ZGVdM32v__Z5add_1Pf" @@ -180,14 +180,14 @@ double constlinear(const int i) { return 0.0; } // CHECK-DAG: "_ZGVeM16uus1__ZN2VV3addEii" // CHECK-DAG: "_ZGVeN16uus1__ZN2VV3addEii" -// CHECK-DAG: "_ZGVbM4lla16l4a4__ZN2VV6taddpfEPfRS0_" -// CHECK-DAG: "_ZGVbN4lla16l4a4__ZN2VV6taddpfEPfRS0_" -// CHECK-DAG: "_ZGVcM8lla16l4a4__ZN2VV6taddpfEPfRS0_" -// CHECK-DAG: "_ZGVcN8lla16l4a4__ZN2VV6taddpfEPfRS0_" -// CHECK-DAG: "_ZGVdM8lla16l4a4__ZN2VV6taddpfEPfRS0_" -// CHECK-DAG: "_ZGVdN8lla16l4a4__ZN2VV6taddpfEPfRS0_" -// CHECK-DAG: "_ZGVeM16lla16l4a4__ZN2VV6taddpfEPfRS0_" -// CHECK-DAG: "_ZGVeN16lla16l4a4__ZN2VV6taddpfEPfRS0_" +// CHECK-DAG: "_ZGVbM4ll4a16l4a4__ZN2VV6taddpfEPfRS0_" +// CHECK-DAG: "_ZGVbN4ll4a16l4a4__ZN2VV6taddpfEPfRS0_" +// CHECK-DAG: "_ZGVcM8ll4a16l4a4__ZN2VV6taddpfEPfRS0_" +// CHECK-DAG: "_ZGVcN8ll4a16l4a4__ZN2VV6taddpfEPfRS0_" +// CHECK-DAG: "_ZGVdM8ll4a16l4a4__ZN2VV6taddpfEPfRS0_" +// CHECK-DAG: "_ZGVdN8ll4a16l4a4__ZN2VV6taddpfEPfRS0_" +// CHECK-DAG: "_ZGVeM16ll4a16l4a4__ZN2VV6taddpfEPfRS0_" +// CHECK-DAG: "_ZGVeN16ll4a16l4a4__ZN2VV6taddpfEPfRS0_" // CHECK-DAG: "_ZGVbM4vvl8__ZN2VV4taddERA_iRi" // CHECK-DAG: "_ZGVbN4vvl8__ZN2VV4taddERA_iRi" @@ -293,23 +293,23 @@ double constlinear(const int i) { return 0.0; } // CHECK-DAG: "_ZGVeM16vvv__Z3bax2VVPdi" // CHECK-DAG: "_ZGVeN16vvv__Z3bax2VVPdi" -// CHECK-DAG: "_ZGVbM4ua16vl1__Z3fooPffi" -// CHECK-DAG: "_ZGVbN4ua16vl1__Z3fooPffi" -// CHECK-DAG: "_ZGVcM8ua16vl1__Z3fooPffi" -// CHECK-DAG: "_ZGVcN8ua16vl1__Z3fooPffi" -// CHECK-DAG: "_ZGVdM8ua16vl1__Z3fooPffi" -// CHECK-DAG: "_ZGVdN8ua16vl1__Z3fooPffi" -// CHECK-DAG: "_ZGVeM16ua16vl1__Z3fooPffi" -// CHECK-DAG: "_ZGVeN16ua16vl1__Z3fooPffi" - -// CHECK-DAG: "_ZGVbM4l8__Z5add_2Pf" -// CHECK-DAG: "_ZGVbN4l8__Z5add_2Pf" -// CHECK-DAG: "_ZGVcM8l8__Z5add_2Pf" -// CHECK-DAG: "_ZGVcN8l8__Z5add_2Pf" -// CHECK-DAG: "_ZGVdM8l8__Z5add_2Pf" -// CHECK-DAG: "_ZGVdN8l8__Z5add_2Pf" -// CHECK-DAG: "_ZGVeM16l8__Z5add_2Pf" -// CHECK-DAG: "_ZGVeN16l8__Z5add_2Pf" +// CHECK-DAG: "_ZGVbM4ua16vl__Z3fooPffi" +// CHECK-DAG: "_ZGVbN4ua16vl__Z3fooPffi" +// CHECK-DAG: "_ZGVcM8ua16vl__Z3fooPffi" +// CHECK-DAG: "_ZGVcN8ua16vl__Z3fooPffi" +// CHECK-DAG: "_ZGVdM8ua16vl__Z3fooPffi" +// CHECK-DAG: "_ZGVdN8ua16vl__Z3fooPffi" +// CHECK-DAG: "_ZGVeM16ua16vl__Z3fooPffi" +// CHECK-DAG: "_ZGVeN16ua16vl__Z3fooPffi" + +// CHECK-DAG: "_ZGVbM4l32__Z5add_2Pf" +// CHECK-DAG: "_ZGVbN4l32__Z5add_2Pf" +// CHECK-DAG: "_ZGVcM8l32__Z5add_2Pf" +// CHECK-DAG: "_ZGVcN8l32__Z5add_2Pf" +// CHECK-DAG: "_ZGVdM8l32__Z5add_2Pf" +// CHECK-DAG: "_ZGVdN8l32__Z5add_2Pf" +// CHECK-DAG: "_ZGVeM16l32__Z5add_2Pf" +// CHECK-DAG: "_ZGVeN16l32__Z5add_2Pf" // CHECK-DAG: "_ZGVbM32v__Z5add_2Pf" // CHECK-DAG: "_ZGVcM32v__Z5add_2Pf" // CHECK-DAG: "_ZGVdM32v__Z5add_2Pf" diff --git a/clang/test/OpenMP/distribute_parallel_for_ast_print.cpp b/clang/test/OpenMP/distribute_parallel_for_ast_print.cpp index 38f0d44315baf..ec8d595f46cb7 100644 --- a/clang/test/OpenMP/distribute_parallel_for_ast_print.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_ast_print.cpp @@ -163,12 +163,12 @@ int main(int argc, char **argv) { #pragma omp target #pragma omp teams #ifdef OMP5 -#pragma omp distribute parallel for schedule(guided, argc) default(none) copyin(g) dist_schedule(static, a) private(a) shared(argc) order(concurrent) +#pragma omp distribute parallel for schedule(guided, argc) default(none) copyin(g) dist_schedule(static, a) private(a) shared(argc) order(concurrent) reduction(task,+:c) #else #pragma omp distribute parallel for schedule(guided, argc) default(none) copyin(g) dist_schedule(static, a) private(a) shared(argc) #endif // OMP5 // OMP45: #pragma omp distribute parallel for schedule(guided, argc) default(none) copyin(g) dist_schedule(static, a) private(a) shared(argc) - // OMP50: #pragma omp distribute parallel for schedule(guided, argc) default(none) copyin(g) dist_schedule(static, a) private(a) shared(argc) order(concurrent) + // OMP50: #pragma omp distribute parallel for schedule(guided, argc) default(none) copyin(g) dist_schedule(static, a) private(a) shared(argc) order(concurrent) reduction(task, +: c) for (int i = 0; i < 2; ++i) a = 2; // CHECK-NEXT: for (int i = 0; i < 2; ++i) diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_messages.cpp index 0bdacd01ae5aa..828b31e00846a 100644 --- a/clang/test/OpenMP/distribute_parallel_for_reduction_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized extern int omp_default_mem_alloc; @@ -457,6 +463,11 @@ int main(int argc, char **argv) { #pragma omp distribute parallel for reduction(+ : m) // OK for (int i = 0; i < 10; ++i) m++; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp new file mode 100644 index 0000000000000..971e9be8534ba --- /dev/null +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -0,0 +1,130 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp target teams +#pragma omp distribute parallel for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp index 0cab19c60938f..84e79c84a2b27 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wno-openmp-mapping -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -456,6 +462,11 @@ int main(int argc, char **argv) { #pragma omp distribute parallel for simd reduction(+ : m) // OK for (int i = 0; i < 10; ++i) m++; +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/distribute_simd_reduction_messages.cpp b/clang/test/OpenMP/distribute_simd_reduction_messages.cpp index b3a45655f3153..aba7b7133b0c5 100644 --- a/clang/test/OpenMP/distribute_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/distribute_simd_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -262,6 +268,11 @@ T tmain(T argc) { #pragma omp distribute simd reduction(+ : fl) for (int i = 0; i < 10; ++i) foo(); +#pragma omp target +#pragma omp teams +#pragma omp distribute simd reduction(task, + : fl) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error 3 {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + foo(); return T(); } diff --git a/clang/test/OpenMP/for_ast_print.cpp b/clang/test/OpenMP/for_ast_print.cpp index af170cacf8075..8cf839a120500 100644 --- a/clang/test/OpenMP/for_ast_print.cpp +++ b/clang/test/OpenMP/for_ast_print.cpp @@ -168,13 +168,13 @@ int main(int argc, char **argv) { // CHECK-NEXT: for (int i = 0; i < 2; ++i) // CHECK-NEXT: a = 2; #pragma omp parallel -#pragma omp for private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(3) schedule(auto) ordered nowait linear(g:-1) +#pragma omp for private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(3) schedule(auto) ordered nowait linear(g:-1) reduction(task, +:e) for (int i = 0; i < 10; ++i) for (int j = 0; j < 10; ++j) for (auto x : arr) foo(), (void)x; // CHECK-NEXT: #pragma omp parallel - // CHECK-NEXT: #pragma omp for private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(3) schedule(auto) ordered nowait linear(g: -1) + // CHECK-NEXT: #pragma omp for private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(3) schedule(auto) ordered nowait linear(g: -1) reduction(task, +: e) // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: for (int j = 0; j < 10; ++j) // CHECK-NEXT: for (auto x : arr) diff --git a/clang/test/OpenMP/for_reduction_messages.cpp b/clang/test/OpenMP/for_reduction_messages.cpp index 1ba3a604c6235..fd9a414b679ed 100644 --- a/clang/test/OpenMP/for_reduction_messages.cpp +++ b/clang/test/OpenMP/for_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -404,6 +410,9 @@ int main(int argc, char **argv) { #pragma omp for reduction(+:m) for (int i = 0; i < 10; ++i) m++; +#pragma omp for reduction(task, +:m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp new file mode 100644 index 0000000000000..ea8fc55d9cb2f --- /dev/null +++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -0,0 +1,130 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel +#pragma omp for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/for_simd_reduction_messages.cpp b/clang/test/OpenMP/for_simd_reduction_messages.cpp index c7396084f3226..f61a75951bc68 100644 --- a/clang/test/OpenMP/for_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/for_simd_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 %s -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -383,6 +389,9 @@ int main(int argc, char **argv) { #pragma omp for simd reduction(+ : m) for (int i = 0; i < 10; ++i) m++; +#pragma omp for simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/master_taskloop_reduction_messages.cpp b/clang/test/OpenMP/master_taskloop_reduction_messages.cpp index 58be898a9f1e4..bc571e04b417d 100644 --- a/clang/test/OpenMP/master_taskloop_reduction_messages.cpp +++ b/clang/test/OpenMP/master_taskloop_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -347,6 +353,9 @@ int main(int argc, char **argv) { #pragma omp master taskloop nogroup reduction(+ : m) // expected-error {{'reduction' clause cannot be used with 'nogroup' clause}} for (int i = 0; i < 10; ++i) m++; +#pragma omp master taskloop reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/master_taskloop_simd_reduction_messages.cpp b/clang/test/OpenMP/master_taskloop_simd_reduction_messages.cpp index eabb913e53492..de5cec5b9a660 100644 --- a/clang/test/OpenMP/master_taskloop_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -347,6 +353,9 @@ int main(int argc, char **argv) { #pragma omp master taskloop simd reduction(+ : m) nogroup // expected-error {{'reduction' clause cannot be used with 'nogroup' clause}} for (int i = 0; i < 10; ++i) m++; +#pragma omp master taskloop simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/parallel_ast_print.cpp b/clang/test/OpenMP/parallel_ast_print.cpp index 7ba40d8dc1cc3..3285e101134a4 100644 --- a/clang/test/OpenMP/parallel_ast_print.cpp +++ b/clang/test/OpenMP/parallel_ast_print.cpp @@ -152,7 +152,7 @@ T tmain(T argc, T *argv) { a=2; #pragma omp parallel default(none), private(argc,b) firstprivate(argv) shared (d) if (parallel:argc > 0) num_threads(C) copyin(S::TS, thrp) proc_bind(master) reduction(+:c, arr1[argc]) reduction(max:e, arr[:C][0:10]) foo(); -#pragma omp parallel if (C) num_threads(s) proc_bind(close) reduction(^:e, f, arr[0:C][:argc]) reduction(default, && : g) +#pragma omp parallel if (C) num_threads(s) proc_bind(close) reduction(^:e, f, arr[0:C][:argc]) reduction(default, && : g) reduction(task,+:argc) foo(); return 0; } @@ -166,7 +166,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) copyin(S::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10]) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp parallel if(C) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:C][:argc]) reduction(default, &&: g) +// CHECK-NEXT: #pragma omp parallel if(C) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:C][:argc]) reduction(default, &&: g) reduction(task, +: argc) // CHECK-NEXT: foo() // CHECK: template<> int tmain(int argc, int *argv) { // CHECK-NEXT: int b = argc, c, d, e, f, g; @@ -177,7 +177,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) copyin(S::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10]) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp parallel if(5) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(default, &&: g) +// CHECK-NEXT: #pragma omp parallel if(5) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(default, &&: g) reduction(task, +: argc) // CHECK-NEXT: foo() // CHECK: template<> long tmain(long argc, long *argv) { // CHECK-NEXT: long b = argc, c, d, e, f, g; @@ -188,7 +188,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(1) copyin(S::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10]) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp parallel if(1) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(default, &&: g) +// CHECK-NEXT: #pragma omp parallel if(1) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(default, &&: g) reduction(task, +: argc) // CHECK-NEXT: foo() enum Enum { }; diff --git a/clang/test/OpenMP/parallel_for_ast_print.cpp b/clang/test/OpenMP/parallel_for_ast_print.cpp index 7ec4c76e3de6b..4006d79ff2cdb 100644 --- a/clang/test/OpenMP/parallel_for_ast_print.cpp +++ b/clang/test/OpenMP/parallel_for_ast_print.cpp @@ -132,8 +132,8 @@ int main(int argc, char **argv) { // CHECK: static int a; static float g; #pragma omp threadprivate(g) -#pragma omp parallel for schedule(guided, argc) default(none) copyin(g) linear(a) shared(argc) - // CHECK: #pragma omp parallel for schedule(guided, argc) default(none) copyin(g) linear(a) shared(argc) +#pragma omp parallel for schedule(guided, argc) default(none) copyin(g) linear(a) shared(argc) reduction(task,&:d) + // CHECK: #pragma omp parallel for schedule(guided, argc) default(none) copyin(g) linear(a) shared(argc) reduction(task, &: d) for (int i = 0; i < 2; ++i) a = 2; // CHECK-NEXT: for (int i = 0; i < 2; ++i) diff --git a/clang/test/OpenMP/parallel_for_reduction_messages.cpp b/clang/test/OpenMP/parallel_for_reduction_messages.cpp index 50a93129b1fa2..e88aeed41d003 100644 --- a/clang/test/OpenMP/parallel_for_reduction_messages.cpp +++ b/clang/test/OpenMP/parallel_for_reduction_messages.cpp @@ -340,6 +340,9 @@ int main(int argc, char **argv) { #pragma omp parallel for reduction(+ : m) // OK for (int i = 0; i < 10; ++i) m++; +#pragma omp parallel for reduction(task, + : m) // OK + for (int i = 0; i < 10; ++i) + m++; #pragma omp parallel for reduction(inscan, + : m) reduction(*: fl) reduction(default, &&: j) // expected-error 2 {{expected 'reduction' clause with the 'inscan' modifier}} expected-note 2 {{'reduction' clause with 'inscan' modifier is used here}} for (int i = 0; i < 10; ++i) { diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp new file mode 100644 index 0000000000000..b4f4f83ec9554 --- /dev/null +++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/parallel_for_simd_reduction_messages.cpp b/clang/test/OpenMP/parallel_for_simd_reduction_messages.cpp index d8c13e7d2c876..f8776713e20e8 100644 --- a/clang/test/OpenMP/parallel_for_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/parallel_for_simd_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -o - %s -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -326,6 +332,9 @@ int main(int argc, char **argv) { #pragma omp parallel for simd reduction(+ : m) // OK for (int i = 0; i < 10; ++i) m++; +#pragma omp parallel for simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/parallel_master_ast_print.cpp b/clang/test/OpenMP/parallel_master_ast_print.cpp index 0e07258a73c1e..e521488fafc30 100644 --- a/clang/test/OpenMP/parallel_master_ast_print.cpp +++ b/clang/test/OpenMP/parallel_master_ast_print.cpp @@ -152,7 +152,7 @@ T tmain(T argc, T *argv) { a=2; #pragma omp parallel master default(none), private(argc,b) firstprivate(argv) shared (d) if (parallel:argc > 0) num_threads(C) copyin(S::TS, thrp) proc_bind(master) reduction(+:c, arr1[argc]) reduction(max:e, arr[:C][0:10]) foo(); -#pragma omp parallel master if (C) num_threads(s) proc_bind(close) reduction(^:e, f, arr[0:C][:argc]) reduction(&& : g) +#pragma omp parallel master if (C) num_threads(s) proc_bind(close) reduction(^:e, f, arr[0:C][:argc]) reduction(task, && : g) foo(); return 0; } @@ -166,7 +166,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp parallel master default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) copyin(S::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10]) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp parallel master if(C) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:C][:argc]) reduction(&&: g) +// CHECK-NEXT: #pragma omp parallel master if(C) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:C][:argc]) reduction(task, &&: g) // CHECK-NEXT: foo() // CHECK: template<> int tmain(int argc, int *argv) { // CHECK-NEXT: int b = argc, c, d, e, f, g; @@ -177,7 +177,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp parallel master default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) copyin(S::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10]) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp parallel master if(5) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(&&: g) +// CHECK-NEXT: #pragma omp parallel master if(5) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(task, &&: g) // CHECK-NEXT: foo() // CHECK: template<> long tmain(long argc, long *argv) { // CHECK-NEXT: long b = argc, c, d, e, f, g; @@ -188,7 +188,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp parallel master default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(1) copyin(S::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10]) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp parallel master if(1) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(&&: g) +// CHECK-NEXT: #pragma omp parallel master if(1) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(task, &&: g) // CHECK-NEXT: foo() enum Enum { }; diff --git a/clang/test/OpenMP/parallel_master_reduction_messages.cpp b/clang/test/OpenMP/parallel_master_reduction_messages.cpp index 4a5f8d2cb9f0f..b74d3fe936e83 100644 --- a/clang/test/OpenMP/parallel_master_reduction_messages.cpp +++ b/clang/test/OpenMP/parallel_master_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -393,6 +399,10 @@ int main(int argc, char **argv) { { foo(); } +#pragma omp parallel master reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} + { + foo(); + } return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp new file mode 100644 index 0000000000000..0f8366fa95e33 --- /dev/null +++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -0,0 +1,128 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel master reduction(task, +: argc, argv[0:10][0:argc]) + { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/parallel_master_taskloop_ast_print.cpp b/clang/test/OpenMP/parallel_master_taskloop_ast_print.cpp index fa02c0b169f97..d58121e86667a 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_ast_print.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_ast_print.cpp @@ -18,9 +18,9 @@ T tmain(T argc) { static T a; // CHECK: static T a; #pragma omp taskgroup allocate(d) task_reduction(+: d) -#pragma omp parallel master taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+:g) allocate(g) +#pragma omp parallel master taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+:g) allocate(g) reduction(task,^:c) // CHECK-NEXT: #pragma omp taskgroup allocate(d) task_reduction(+: d) - // CHECK-NEXT: #pragma omp parallel master taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+: g) allocate(g){{$}} + // CHECK-NEXT: #pragma omp parallel master taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+: g) allocate(g) reduction(task, ^: c){{$}} for (int i = 0; i < 2; ++i) a = 2; // CHECK-NEXT: for (int i = 0; i < 2; ++i) diff --git a/clang/test/OpenMP/parallel_master_taskloop_reduction_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_reduction_messages.cpp index 309cf71b8b9f7..477a0e79f65df 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_reduction_messages.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -344,6 +350,9 @@ int main(int argc, char **argv) { #pragma omp parallel master taskloop reduction(+ : m) // OK for (int i = 0; i < 10; ++i) m++; +#pragma omp parallel master taskloop reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + m++; #pragma omp parallel master taskloop nogroup reduction(+ : m) // expected-error {{'reduction' clause cannot be used with 'nogroup' clause}} for (int i = 0; i < 10; ++i) m++; diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp index 9fb65f37072dd..fe0bd5a65e3a7 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -344,6 +350,9 @@ int main(int argc, char **argv) { #pragma omp parallel master taskloop simd reduction(+ : m) // OK for (int i = 0; i < 10; ++i) m++; +#pragma omp parallel master taskloop simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + m++; #pragma omp parallel master taskloop simd nogroup reduction(+ : m) // expected-error {{'reduction' clause cannot be used with 'nogroup' clause}} for (int i = 0; i < 10; ++i) m++; diff --git a/clang/test/OpenMP/parallel_reduction_messages.c b/clang/test/OpenMP/parallel_reduction_messages.c index 5d6010d59d500..eef9a7d7d133a 100644 --- a/clang/test/OpenMP/parallel_reduction_messages.c +++ b/clang/test/OpenMP/parallel_reduction_messages.c @@ -10,7 +10,7 @@ void test(int *p) { ; #pragma omp parallel reduction(default, // expected-error {{expected identifier}} expected-error {{expected ')'}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-note {{to match this '('}} ; -#pragma omp parallel reduction(unknown, +: a) // expected-error {{expected 'default' or 'inscan' in OpenMP clause 'reduction'}} +#pragma omp parallel reduction(unknown, +: a) // expected-error {{expected 'default', 'inscan' or 'task' in OpenMP clause 'reduction'}} ; #pragma omp parallel reduction(default, + : a) ; diff --git a/clang/test/OpenMP/parallel_reduction_messages.cpp b/clang/test/OpenMP/parallel_reduction_messages.cpp index 881615684c550..b464bf5b96437 100644 --- a/clang/test/OpenMP/parallel_reduction_messages.cpp +++ b/clang/test/OpenMP/parallel_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -269,6 +275,8 @@ int main(int argc, char **argv) { static int m; #pragma omp parallel reduction(+ : m) // OK m++; +#pragma omp parallel reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp new file mode 100644 index 0000000000000..5e04aa8c1ec28 --- /dev/null +++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -0,0 +1,128 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel reduction(task, +: argc, argv[0:10][0:argc]) + { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/parallel_sections_ast_print.cpp b/clang/test/OpenMP/parallel_sections_ast_print.cpp index 569288163433a..4c813c3c026b7 100644 --- a/clang/test/OpenMP/parallel_sections_ast_print.cpp +++ b/clang/test/OpenMP/parallel_sections_ast_print.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s // expected-no-diagnostics #ifndef HEADER @@ -118,8 +118,8 @@ int main(int argc, char **argv) { #pragma omp threadprivate(a) Enum ee; // CHECK: Enum ee; -#pragma omp parallel sections - // CHECK-NEXT: #pragma omp parallel sections +#pragma omp parallel sections reduction(task,*:f) + // CHECK-NEXT: #pragma omp parallel sections reduction(task, *: f) { a = 2; } diff --git a/clang/test/OpenMP/parallel_sections_reduction_messages.cpp b/clang/test/OpenMP/parallel_sections_reduction_messages.cpp index 6b4f5f941525d..79ba5a3014f6b 100644 --- a/clang/test/OpenMP/parallel_sections_reduction_messages.cpp +++ b/clang/test/OpenMP/parallel_sections_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -393,6 +399,10 @@ int main(int argc, char **argv) { { foo(); } +#pragma omp parallel sections reduction(task,+ : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} + { + foo(); + } return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp new file mode 100644 index 0000000000000..867eb45a1332b --- /dev/null +++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -0,0 +1,133 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel sections reduction(task, +: argc, argv[0:10][0:argc]) + { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/sections_ast_print.cpp b/clang/test/OpenMP/sections_ast_print.cpp index 1679b11311505..a5b4e34eab179 100644 --- a/clang/test/OpenMP/sections_ast_print.cpp +++ b/clang/test/OpenMP/sections_ast_print.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s // expected-no-diagnostics #ifndef HEADER @@ -18,12 +18,12 @@ T tmain(T argc) { static T a; // CHECK: static T a; #pragma omp parallel -#pragma omp sections private(argc, b), firstprivate(c, d), lastprivate(d, f) reduction(- : g) nowait allocate(d) +#pragma omp sections private(argc, b), firstprivate(c, d), lastprivate(d, f) reduction(task, - : g) nowait allocate(d) { foo(); } // CHECK-NEXT: #pragma omp parallel - // CHECK-NEXT: #pragma omp sections private(argc,b) firstprivate(c,d) lastprivate(d,f) reduction(-: g) nowait allocate(d){{$}} + // CHECK-NEXT: #pragma omp sections private(argc,b) firstprivate(c,d) lastprivate(d,f) reduction(task, -: g) nowait allocate(d){{$}} // CHECK-NEXT: { // CHECK-NEXT: foo(); // CHECK-NEXT: } diff --git a/clang/test/OpenMP/sections_reduction_messages.cpp b/clang/test/OpenMP/sections_reduction_messages.cpp index 40e43d23d86ee..1ed9b491fc1d1 100644 --- a/clang/test/OpenMP/sections_reduction_messages.cpp +++ b/clang/test/OpenMP/sections_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -450,6 +456,10 @@ int main(int argc, char **argv) { { foo(); } +#pragma omp sections reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} + { + foo(); + } return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp new file mode 100644 index 0000000000000..be67a2a174004 --- /dev/null +++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -0,0 +1,134 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp parallel +#pragma omp sections reduction(task, +: argc, argv[0:10][0:argc]) + { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/simd_reduction_messages.cpp b/clang/test/OpenMP/simd_reduction_messages.cpp index e065419fbca35..17fbc60689818 100644 --- a/clang/test/OpenMP/simd_reduction_messages.cpp +++ b/clang/test/OpenMP/simd_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 %s -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -325,6 +331,9 @@ int main(int argc, char **argv) { #pragma omp simd reduction(+ : fl) for (int i = 0; i < 10; ++i) foo(); +#pragma omp simd reduction(task, + : fl) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + foo(); return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_ast_print.cpp b/clang/test/OpenMP/target_ast_print.cpp index 1ff0b2bc1e1a3..b2125ac3d77c2 100644 --- a/clang/test/OpenMP/target_ast_print.cpp +++ b/clang/test/OpenMP/target_ast_print.cpp @@ -341,6 +341,16 @@ int main (int argc, char **argv) { // RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -DOMP5 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s --check-prefix OMP5 +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + void foo() {} #pragma omp declare target @@ -1050,8 +1060,12 @@ int main (int argc, char **argv) { // OMP5-NEXT: bar(); #pragma omp target defaultmap(none) // OMP5-NEXT: #pragma omp target defaultmap(none) + // OMP5-NEXT: bar(); bar(); +#pragma omp target allocate(omp_default_mem_alloc:argv) uses_allocators(omp_default_mem_alloc,omp_large_cap_mem_alloc) allocate(omp_large_cap_mem_alloc:argc) private(argc, argv) + // OMP5-NEXT: #pragma omp target allocate(omp_default_mem_alloc: argv) uses_allocators(omp_default_mem_alloc,omp_large_cap_mem_alloc) allocate(omp_large_cap_mem_alloc: argc) private(argc,argv) // OMP5-NEXT: bar(); + bar(); return tmain(argc, &argc) + tmain(argv[0][0], argv[0]); } diff --git a/clang/test/OpenMP/target_firstprivate_messages.cpp b/clang/test/OpenMP/target_firstprivate_messages.cpp index 0a3079bbf83e7..ab68501517f9c 100644 --- a/clang/test/OpenMP/target_firstprivate_messages.cpp +++ b/clang/test/OpenMP/target_firstprivate_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized void xxx(int argc) { int fp, fp1; // expected-note {{initialize the variable 'fp' to silence this warning}} expected-note {{initialize the variable 'fp1' to silence this warning}} @@ -128,7 +128,7 @@ int foomain(I argc, C **argv) { {} #pragma omp target firstprivate(argv[1]) // expected-error {{expected variable name}} {} -#pragma omp target firstprivate(e, g) allocate(omp_thread_mem_alloc: e) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target' directive}} +#pragma omp target firstprivate(e, g) allocate(omp_thread_mem_alloc: e) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target' directive}} expected-error {{allocator must be specified in the 'uses_allocators' clause}} {} #pragma omp target firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}} {} diff --git a/clang/test/OpenMP/target_parallel_ast_print.cpp b/clang/test/OpenMP/target_parallel_ast_print.cpp index fb206343cc008..fd3b724cf5ea3 100644 --- a/clang/test/OpenMP/target_parallel_ast_print.cpp +++ b/clang/test/OpenMP/target_parallel_ast_print.cpp @@ -1,15 +1,25 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + void foo() {} template @@ -41,7 +51,7 @@ T tmain(T argc, T *argv) { T i, j, a[20]; #pragma omp target parallel h=2; -#pragma omp target parallel allocate(argv) default(none), private(argc,b) firstprivate(argv) shared (d) if (parallel:argc > 0) num_threads(C) proc_bind(master) reduction(+:c, arr1[argc]) reduction(max:e, arr[:C][0:10]) +#pragma omp target parallel allocate(omp_large_cap_mem_alloc:argv) default(none), private(argc,b) firstprivate(argv) shared (d) if (parallel:argc > 0) num_threads(C) proc_bind(master) reduction(+:c, arr1[argc]) reduction(max:e, arr[:C][0:10]) uses_allocators(omp_large_cap_mem_alloc) foo(); #pragma omp target parallel if (C) num_threads(s) proc_bind(close) reduction(^:e, f, arr[0:C][:argc]) reduction(&& : g) allocate(g) foo(); @@ -76,7 +86,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: T i, j, a[20] // CHECK-NEXT: #pragma omp target parallel{{$}} // CHECK-NEXT: h = 2; -// CHECK-NEXT: #pragma omp target parallel allocate(argv) default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10]) +// CHECK-NEXT: #pragma omp target parallel allocate(omp_large_cap_mem_alloc: argv) default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10]) uses_allocators(omp_large_cap_mem_alloc) // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp target parallel if(C) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:C][:argc]) reduction(&&: g) allocate(g) // CHECK-NEXT: foo() @@ -108,7 +118,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: int i, j, a[20] // CHECK-NEXT: #pragma omp target parallel // CHECK-NEXT: h = 2; -// CHECK-NEXT: #pragma omp target parallel allocate(argv) default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10]) +// CHECK-NEXT: #pragma omp target parallel allocate(omp_large_cap_mem_alloc: argv) default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10]) uses_allocators(omp_large_cap_mem_alloc) // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp target parallel if(5) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(&&: g) allocate(g) // CHECK-NEXT: foo() @@ -140,7 +150,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: char i, j, a[20] // CHECK-NEXT: #pragma omp target parallel // CHECK-NEXT: h = 2; -// CHECK-NEXT: #pragma omp target parallel allocate(argv) default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(1) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10]) +// CHECK-NEXT: #pragma omp target parallel allocate(omp_large_cap_mem_alloc: argv) default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(1) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10]) uses_allocators(omp_large_cap_mem_alloc) // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp target parallel if(1) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(&&: g) allocate(g) // CHECK-NEXT: foo() @@ -223,10 +233,18 @@ int main (int argc, char **argv) { foo(); // CHECK-NEXT: foo(); -#pragma omp target parallel defaultmap(tofrom: scalar) -// CHECK-NEXT: #pragma omp target parallel defaultmap(tofrom: scalar) +#pragma omp target parallel defaultmap(tofrom: scalar) reduction(task, +:argc) +// CHECK-NEXT: #pragma omp target parallel defaultmap(tofrom: scalar) reduction(task, +: argc) + { foo(); +#pragma omp cancellation point parallel +#pragma omp cancel parallel + } +// CHECK-NEXT: { // CHECK-NEXT: foo(); +// CHECK-NEXT: #pragma omp cancellation point parallel +// CHECK-NEXT: #pragma omp cancel parallel +// CHECK-NEXT: } return tmain(argc, &argc) + tmain(argv[0][0], argv[0]); } diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp index 12026696ba94a..2e094c294dfa0 100644 --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -134,6 +134,7 @@ int foo(int n) { #pragma omp target parallel if(target: 1) { aa += 1; +#pragma omp cancel parallel } // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 @@ -360,6 +361,12 @@ int foo(int n) { // CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align // CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* // CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK: [[IS_CANCEL:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: [[CMP:%.+]] = icmp ne i32 [[IS_CANCEL]], 0 +// CHECK: br i1 [[CMP]], label %[[EXIT:.+]], label %[[CONTINUE:[^,]+]] +// CHECK: [[EXIT]]: +// CHECK: br label %[[CONTINUE]] +// CHECK: [[CONTINUE]]: // CHECK: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/target_parallel_firstprivate_messages.cpp b/clang/test/OpenMP/target_parallel_firstprivate_messages.cpp index 11be23cab8dcf..be7812ed71474 100644 --- a/clang/test/OpenMP/target_parallel_firstprivate_messages.cpp +++ b/clang/test/OpenMP/target_parallel_firstprivate_messages.cpp @@ -2,6 +2,8 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized +#pragma omp requires dynamic_allocators + typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; diff --git a/clang/test/OpenMP/target_parallel_for_ast_print.cpp b/clang/test/OpenMP/target_parallel_for_ast_print.cpp index 6e9b06703768e..82f1bf347460b 100644 --- a/clang/test/OpenMP/target_parallel_for_ast_print.cpp +++ b/clang/test/OpenMP/target_parallel_for_ast_print.cpp @@ -10,6 +10,16 @@ #ifndef HEADER #define HEADER +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + void foo() {} struct S { @@ -195,8 +205,8 @@ int main(int argc, char **argv) { // CHECK-NEXT: #pragma omp target parallel for default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10]) // CHECK-NEXT: for (int i = 0; i < 2; ++i) { // CHECK-NEXT: } -#pragma omp target parallel for if (5) num_threads(s) proc_bind(close) reduction(^:e, f, arr[0:5][:argc]) reduction(&& : h) -// CHECK-NEXT: #pragma omp target parallel for if(5) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(&&: h) +#pragma omp target parallel for if (5) num_threads(s) proc_bind(close) reduction(^:e, f, arr[0:5][:argc]) reduction(&& : h) allocate(omp_const_mem_alloc: h) uses_allocators(omp_const_mem_alloc) +// CHECK-NEXT: #pragma omp target parallel for if(5) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(&&: h) allocate(omp_const_mem_alloc: h) uses_allocators(omp_const_mem_alloc) for (int i = 0; i < 2; ++i) {} // CHECK-NEXT: for (int i = 0; i < 2; ++i) { // CHECK-NEXT: } @@ -245,8 +255,8 @@ int main(int argc, char **argv) { for (int i = 0; i < 2; ++i) {} // CHECK-NEXT: for (int i = 0; i < 2; ++i) { // CHECK-NEXT: } -#pragma omp target parallel for defaultmap(tofrom: scalar) -// CHECK-NEXT: #pragma omp target parallel for defaultmap(tofrom: scalar) +#pragma omp target parallel for defaultmap(tofrom: scalar) reduction(task, +: argc) +// CHECK-NEXT: #pragma omp target parallel for defaultmap(tofrom: scalar) reduction(task, +: argc) for (int i = 0; i < 2; ++i) {} // CHECK-NEXT: for (int i = 0; i < 2; ++i) { // CHECK-NEXT: } diff --git a/clang/test/OpenMP/target_parallel_for_firstprivate_messages.cpp b/clang/test/OpenMP/target_parallel_for_firstprivate_messages.cpp index 0a103edc5822a..b2744b66bd2a5 100644 --- a/clang/test/OpenMP/target_parallel_for_firstprivate_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_firstprivate_messages.cpp @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; diff --git a/clang/test/OpenMP/target_parallel_for_lastprivate_messages.cpp b/clang/test/OpenMP/target_parallel_for_lastprivate_messages.cpp index 23ee0a6a6be34..73f8ae6247039 100644 --- a/clang/test/OpenMP/target_parallel_for_lastprivate_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_lastprivate_messages.cpp @@ -196,7 +196,7 @@ int main(int argc, char **argv) { #pragma omp target parallel for lastprivate(2 * 2) // expected-error {{expected variable name}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for lastprivate(ba) allocate(omp_thread_mem_alloc: ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for' directive}} +#pragma omp target parallel for lastprivate(ba) allocate(omp_thread_mem_alloc: ba) uses_allocators(omp_thread_mem_alloc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for' directive}} omp45-error {{unexpected OpenMP clause 'uses_allocators' in directive '#pragma omp target parallel for'}} for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for lastprivate(ca) // expected-error {{const-qualified variable without mutable fields cannot be lastprivate}} diff --git a/clang/test/OpenMP/target_parallel_for_linear_messages.cpp b/clang/test/OpenMP/target_parallel_for_linear_messages.cpp index adb6c786a3b13..abd0b16a1bfcb 100644 --- a/clang/test/OpenMP/target_parallel_for_linear_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_linear_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -170,7 +170,7 @@ int foomain(I argc, C **argv) { #pragma omp target parallel for linear(argv[1]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; -#pragma omp target parallel for allocate(omp_thread_mem_alloc: e) linear(e, g) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for' directive}} +#pragma omp target parallel for allocate(omp_thread_mem_alloc: e) linear(e, g) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for' directive}} expected-error {{allocator must be specified in the 'uses_allocators' clause}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target parallel for linear(z, h) // expected-error {{threadprivate or thread local variable cannot be linear}} diff --git a/clang/test/OpenMP/target_parallel_for_private_messages.cpp b/clang/test/OpenMP/target_parallel_for_private_messages.cpp index 124eadb61de7d..8db615cfa7b3f 100644 --- a/clang/test/OpenMP/target_parallel_for_private_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_private_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -138,7 +138,7 @@ int foomain(I argc, C **argv) { #pragma omp target parallel for private(argv[1]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; -#pragma omp target parallel for private(e, g) allocate(omp_thread_mem_alloc: e) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for' directive}} +#pragma omp target parallel for private(e, g) allocate(omp_thread_mem_alloc: e) uses_allocators(omp_thread_mem_alloc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for' directive}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target parallel for private(h) // expected-error {{threadprivate or thread local variable cannot be private}} diff --git a/clang/test/OpenMP/target_parallel_for_reduction_messages.cpp b/clang/test/OpenMP/target_parallel_for_reduction_messages.cpp index 101bef490e8ea..5019e24593c7a 100644 --- a/clang/test/OpenMP/target_parallel_for_reduction_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -331,8 +337,11 @@ int main(int argc, char **argv) { #pragma omp target parallel for reduction(+ : fl) for (int i = 0; i < 10; ++i) foo(); +#pragma omp target parallel for reduction(task, + : fl) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); static int m; -#pragma omp target parallel for allocate(omp_thread_mem_alloc: m) reduction(+ : m) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for' directive}} +#pragma omp target parallel for allocate(omp_thread_mem_alloc: m) reduction(+ : m) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for' directive}} omp50-error {{allocator must be specified in the 'uses_allocators' clause}} for (int i = 0; i < 10; ++i) m++; diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp new file mode 100644 index 0000000000000..5c5ea6b90d529 --- /dev/null +++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp target parallel for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/target_parallel_for_simd_ast_print.cpp b/clang/test/OpenMP/target_parallel_for_simd_ast_print.cpp index f9a6430880e59..db335f0eddf84 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_ast_print.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_ast_print.cpp @@ -16,6 +16,16 @@ #ifndef HEADER #define HEADER +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + void foo() {} struct S { @@ -115,13 +125,13 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: } #ifdef OMP5 -#pragma omp target parallel for simd if(target:argc > 0) if (simd: argc) nontemporal(argc, c, d) order(concurrent) +#pragma omp target parallel for simd if(target:argc > 0) if (simd: argc) nontemporal(argc, c, d) order(concurrent) allocate(omp_high_bw_mem_alloc:f) private(f) uses_allocators(omp_high_bw_mem_alloc) #else #pragma omp target parallel for simd if(target:argc > 0) #endif // OMP5 for (T i = 0; i < 2; ++i) {} // OMP45: #pragma omp target parallel for simd if(target: argc > 0) - // OMP50: #pragma omp target parallel for simd if(target: argc > 0) if(simd: argc) nontemporal(argc,c,d) order(concurrent) + // OMP50: #pragma omp target parallel for simd if(target: argc > 0) if(simd: argc) nontemporal(argc,c,d) order(concurrent) allocate(omp_high_bw_mem_alloc: f) private(f) uses_allocators(omp_high_bw_mem_alloc) // CHECK-NEXT: for (T i = 0; i < 2; ++i) { // CHECK-NEXT: } diff --git a/clang/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp index 4a25753ba5287..78ef6965e091b 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp @@ -2,6 +2,8 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +#pragma omp requires dynamic_allocators + typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; diff --git a/clang/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp index 05dff0e2c9f3b..7d179decc1b4c 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp @@ -101,7 +101,7 @@ int foomain(int argc, char **argv) { #pragma omp target parallel for simd lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; -#pragma omp target parallel for simd allocate(omp_thread_mem_alloc: argc) lastprivate(argc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for simd' directive}} +#pragma omp target parallel for simd allocate(omp_thread_mem_alloc: argc) lastprivate(argc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for simd' directive}} omp50-error {{allocator must be specified in the 'uses_allocators' clause}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target parallel for simd lastprivate(conditional: argc,s) lastprivate(conditional: // omp50-error {{expected expression}} omp45-error 2 {{use of undeclared identifier 'conditional'}} expected-error {{expected ')'}} expected-note {{to match this '('}} omp45-error 2 {{calling a private constructor of class 'S6'}} omp50-error {{expected list item of scalar type in 'lastprivate' clause with 'conditional' modifier}} diff --git a/clang/test/OpenMP/target_parallel_for_simd_linear_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_linear_messages.cpp index 56abe2ad85ce0..9ef8d501fc58a 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_linear_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_linear_messages.cpp @@ -2,6 +2,8 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +#pragma omp requires dynamic_allocators + typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; diff --git a/clang/test/OpenMP/target_parallel_for_simd_private_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_private_messages.cpp index 6a0f40e285396..0ac51b8794093 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_private_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_private_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -68,7 +68,7 @@ class S6 { S6() : a(0) {} S6(T v) : a(v) { -#pragma omp target parallel for simd allocate(omp_thread_mem_alloc: a) private(a) private(this->a) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for simd' directive}} +#pragma omp target parallel for simd allocate(omp_thread_mem_alloc: a) private(a) private(this->a) uses_allocators(omp_thread_mem_alloc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel for simd' directive}} for (int k = 0; k < v; ++k) ++this->a; } diff --git a/clang/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp index a755046d553f4..8fa32d96718c6 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp @@ -1,10 +1,18 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized + +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -335,6 +343,9 @@ int main(int argc, char **argv) { #pragma omp target parallel for simd reduction(+ : m) // OK for (int i = 0; i < 10; ++i) m++; +#pragma omp target parallel for simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_parallel_messages.cpp b/clang/test/OpenMP/target_parallel_messages.cpp index 25e63bb72b591..1ccdf647db9c2 100644 --- a/clang/test/OpenMP/target_parallel_messages.cpp +++ b/clang/test/OpenMP/target_parallel_messages.cpp @@ -76,6 +76,20 @@ int main(int argc, char **argv) { #pragma omp target parallel copyin(pvt) // expected-error {{unexpected OpenMP clause 'copyin' in directive '#pragma omp target parallel'}} foo(); + #pragma omp target parallel + { +#pragma omp cancel // expected-error {{one of 'for', 'parallel', 'sections' or 'taskgroup' is expected}} +#pragma omp cancellation point // expected-error {{one of 'for', 'parallel', 'sections' or 'taskgroup' is expected}} +#pragma omp cancel for // expected-error {{region cannot be closely nested inside 'target parallel' region}} +#pragma omp cancellation point for // expected-error {{region cannot be closely nested inside 'target parallel' region}} +#pragma omp cancel sections // expected-error {{region cannot be closely nested inside 'target parallel' region}} +#pragma omp cancellation point sections // expected-error {{region cannot be closely nested inside 'target parallel' region}} +#pragma omp cancel taskgroup // expected-error {{region cannot be closely nested inside 'target parallel' region}} +#pragma omp cancellation point taskgroup // expected-error {{region cannot be closely nested inside 'target parallel' region}} +#pragma omp cancel parallel +#pragma omp cancellation point parallel + } + return 0; } diff --git a/clang/test/OpenMP/target_parallel_private_messages.cpp b/clang/test/OpenMP/target_parallel_private_messages.cpp index a23c2ea2f1ab6..a12599017cab1 100644 --- a/clang/test/OpenMP/target_parallel_private_messages.cpp +++ b/clang/test/OpenMP/target_parallel_private_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -96,7 +96,7 @@ int foomain(I argc, C **argv) { {} #pragma omp target parallel private(argv[1]) // expected-error {{expected variable name}} {} -#pragma omp target parallel allocate(omp_thread_mem_alloc: ba) private(ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel' directive}} +#pragma omp target parallel allocate(omp_thread_mem_alloc: ba) private(ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel' directive}} expected-error {{allocator must be specified in the 'uses_allocators' clause}} {} #pragma omp target parallel private(ca) // expected-error {{const-qualified variable without mutable fields cannot be private}} {} diff --git a/clang/test/OpenMP/target_parallel_reduction_messages.cpp b/clang/test/OpenMP/target_parallel_reduction_messages.cpp index ae72f89f4d000..eeae218692b61 100644 --- a/clang/test/OpenMP/target_parallel_reduction_messages.cpp +++ b/clang/test/OpenMP/target_parallel_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -173,7 +179,7 @@ T tmain(T argc) { #pragma omp for private(fl) for (int i = 0; i < 10; ++i) {} -#pragma omp target parallel reduction(+ : fl) allocate(omp_thread_mem_alloc: fl) // expected-warning 2 {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel' directive}} +#pragma omp target parallel reduction(+ : fl) allocate(omp_thread_mem_alloc: fl) uses_allocators(omp_thread_mem_alloc) // expected-warning 2 {{allocator with the 'thread' trait access has unspecified behavior on 'target parallel' directive}} omp45-error {{unexpected OpenMP clause 'uses_allocators' in directive '#pragma omp target parallel'}} foo(); #pragma omp target parallel #pragma omp for reduction(- : fl) @@ -284,6 +290,8 @@ int main(int argc, char **argv) { static int m; #pragma omp target parallel reduction(+ : m) // OK m++; +#pragma omp target parallel reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp new file mode 100644 index 0000000000000..2fc49d44c1e90 --- /dev/null +++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -0,0 +1,128 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp target parallel reduction(task, +: argc, argv[0:10][0:argc]) + { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/target_private_messages.cpp b/clang/test/OpenMP/target_private_messages.cpp index 2b9fd35c05c66..7b3f928217fe3 100644 --- a/clang/test/OpenMP/target_private_messages.cpp +++ b/clang/test/OpenMP/target_private_messages.cpp @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; diff --git a/clang/test/OpenMP/target_reduction_messages.cpp b/clang/test/OpenMP/target_reduction_messages.cpp index 7417cebca707d..fff6d90e6f031 100644 --- a/clang/test/OpenMP/target_reduction_messages.cpp +++ b/clang/test/OpenMP/target_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -171,7 +177,7 @@ T tmain(T argc) { #pragma omp parallel #pragma omp for private(fl) for (int i = 0; i < 10; ++i) -#pragma omp target reduction(+ : fl) allocate(omp_thread_mem_alloc: fl) // expected-warning 2 {{allocator with the 'thread' trait access has unspecified behavior on 'target' directive}} +#pragma omp target reduction(+ : fl) allocate(omp_thread_mem_alloc: fl) // expected-warning 2 {{allocator with the 'thread' trait access has unspecified behavior on 'target' directive}} omp50-error 2 {{allocator must be specified in the 'uses_allocators' clause}} foo(); #pragma omp parallel #pragma omp for reduction(- : fl) @@ -278,6 +284,8 @@ int main(int argc, char **argv) { static int m; #pragma omp target reduction(+ : m) // OK m++; +#pragma omp target reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_simd_ast_print.cpp b/clang/test/OpenMP/target_simd_ast_print.cpp index 0d18131675f2e..976eeed7ada1e 100644 --- a/clang/test/OpenMP/target_simd_ast_print.cpp +++ b/clang/test/OpenMP/target_simd_ast_print.cpp @@ -16,6 +16,16 @@ #ifndef HEADER #define HEADER +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + void foo() {} struct S { @@ -116,13 +126,13 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: } #ifdef OMP5 -#pragma omp target simd if(target:argc > 0) if (simd:argc) +#pragma omp target simd if(target:argc > 0) if (simd:argc) allocate(omp_default_mem_alloc:f) private(f) uses_allocators(omp_default_mem_alloc) #else #pragma omp target simd if(target:argc > 0) #endif // OMP5 for (T i = 0; i < 2; ++i) {} // OMP45: #pragma omp target simd if(target: argc > 0) - // OMP50: #pragma omp target simd if(target: argc > 0) if(simd: argc) + // OMP50: #pragma omp target simd if(target: argc > 0) if(simd: argc) allocate(omp_default_mem_alloc: f) private(f) uses_allocators(omp_default_mem_alloc) // CHECK-NEXT: for (T i = 0; i < 2; ++i) { // CHECK-NEXT: } diff --git a/clang/test/OpenMP/target_simd_firstprivate_messages.cpp b/clang/test/OpenMP/target_simd_firstprivate_messages.cpp index e5696c9163d17..79243d7231067 100644 --- a/clang/test/OpenMP/target_simd_firstprivate_messages.cpp +++ b/clang/test/OpenMP/target_simd_firstprivate_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -125,7 +125,7 @@ int foomain(int argc, char **argv) { { int v = 0; int i; -#pragma omp target simd allocate(omp_thread_mem_alloc: i) firstprivate(i) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target simd' directive}} +#pragma omp target simd allocate(omp_thread_mem_alloc: i) firstprivate(i) uses_allocators(omp_thread_mem_alloc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target simd' directive}} for (int k = 0; k < argc; ++k) { i = k; v += i; diff --git a/clang/test/OpenMP/target_simd_lastprivate_messages.cpp b/clang/test/OpenMP/target_simd_lastprivate_messages.cpp index f9f77c0658d26..ac343a7d7e6a1 100644 --- a/clang/test/OpenMP/target_simd_lastprivate_messages.cpp +++ b/clang/test/OpenMP/target_simd_lastprivate_messages.cpp @@ -4,6 +4,7 @@ // RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-version=45 -fopenmp-simd %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-version=50 -fopenmp-simd %s -Wuninitialized +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; diff --git a/clang/test/OpenMP/target_simd_linear_messages.cpp b/clang/test/OpenMP/target_simd_linear_messages.cpp index 2048029ae3717..ad94a0cca42e3 100644 --- a/clang/test/OpenMP/target_simd_linear_messages.cpp +++ b/clang/test/OpenMP/target_simd_linear_messages.cpp @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; diff --git a/clang/test/OpenMP/target_simd_private_messages.cpp b/clang/test/OpenMP/target_simd_private_messages.cpp index ee0122f2da50f..cfed8426d344f 100644 --- a/clang/test/OpenMP/target_simd_private_messages.cpp +++ b/clang/test/OpenMP/target_simd_private_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -68,7 +68,7 @@ class S6 { S6() : a(0) {} S6(T v) : a(v) { -#pragma omp target simd private(a) private(this->a) allocate(omp_thread_mem_alloc: a) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target simd' directive}} +#pragma omp target simd private(a) private(this->a) allocate(omp_thread_mem_alloc: a) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target simd' directive}} expected-error {{allocator must be specified in the 'uses_allocators' clause}} for (int k = 0; k < v; ++k) ++this->a; } diff --git a/clang/test/OpenMP/target_simd_reduction_messages.cpp b/clang/test/OpenMP/target_simd_reduction_messages.cpp index 57663ef8389aa..2e6c5665853c1 100644 --- a/clang/test/OpenMP/target_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/target_simd_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -31,7 +37,7 @@ bool foobool(int argc) { } void foobar(int &ref) { -#pragma omp target simd allocate(omp_thread_mem_alloc: ref) reduction(+:ref) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target simd' directive}} +#pragma omp target simd allocate(omp_thread_mem_alloc: ref) reduction(+:ref) uses_allocators(omp_thread_mem_alloc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target simd' directive}} omp45-error {{unexpected OpenMP clause 'uses_allocators' in directive '#pragma omp target simd'}} for (int i = 0; i < 10; ++i) foo(); } @@ -335,6 +341,9 @@ int main(int argc, char **argv) { #pragma omp target simd reduction(+ : m) // OK for (int i = 0; i < 10; ++i) m++; +#pragma omp target simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + m++; return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_teams_ast_print.cpp b/clang/test/OpenMP/target_teams_ast_print.cpp index 9bf037e0dbc7f..047addb387fab 100644 --- a/clang/test/OpenMP/target_teams_ast_print.cpp +++ b/clang/test/OpenMP/target_teams_ast_print.cpp @@ -1,15 +1,27 @@ -// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER +struct omp_alloctrait_t {}; + +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + void foo() {} template @@ -37,11 +49,13 @@ T tmain(T argc, T *argv) { T b = argc, c, d, e, f, g; static T a; S s; + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; #pragma omp target teams a=2; #pragma omp target teams default(none), private(argc,b) firstprivate(argv) shared (d) reduction(+:c) reduction(max:e) num_teams(C) thread_limit(d*C) allocate(argv) foo(); -#pragma omp target teams allocate(f) reduction(^:e, f) reduction(&& : g) +#pragma omp target teams allocate(my_allocator:f) reduction(^:e, f) reduction(&& : g) uses_allocators(my_allocator(traits)) foo(); return 0; } @@ -50,31 +64,37 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: T b = argc, c, d, e, f, g; // CHECK-NEXT: static T a; // CHECK-NEXT: S s; +// CHECK-NEXT: omp_alloctrait_t traits[10]; +// CHECK-NEXT: omp_allocator_handle_t my_allocator; // CHECK-NEXT: #pragma omp target teams{{$}} // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp target teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(C) thread_limit(d * C) allocate(argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp target teams allocate(f) reduction(^: e,f) reduction(&&: g) +// CHECK-NEXT: #pragma omp target teams allocate(my_allocator: f) reduction(^: e,f) reduction(&&: g) uses_allocators(my_allocator(traits)) // CHECK-NEXT: foo() // CHECK: template<> int tmain(int argc, int *argv) { // CHECK-NEXT: int b = argc, c, d, e, f, g; // CHECK-NEXT: static int a; // CHECK-NEXT: S s; +// CHECK-NEXT: omp_alloctrait_t traits[10]; +// CHECK-NEXT: omp_allocator_handle_t my_allocator; // CHECK-NEXT: #pragma omp target teams // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp target teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(5) thread_limit(d * 5) allocate(argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp target teams allocate(f) reduction(^: e,f) reduction(&&: g) +// CHECK-NEXT: #pragma omp target teams allocate(my_allocator: f) reduction(^: e,f) reduction(&&: g) uses_allocators(my_allocator(traits)) // CHECK-NEXT: foo() // CHECK: template<> long tmain(long argc, long *argv) { // CHECK-NEXT: long b = argc, c, d, e, f, g; // CHECK-NEXT: static long a; // CHECK-NEXT: S s; +// CHECK-NEXT: omp_alloctrait_t traits[10]; +// CHECK-NEXT: omp_allocator_handle_t my_allocator; // CHECK-NEXT: #pragma omp target teams // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp target teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(1) thread_limit(d * 1) allocate(argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp target teams allocate(f) reduction(^: e,f) reduction(&&: g) +// CHECK-NEXT: #pragma omp target teams allocate(my_allocator: f) reduction(^: e,f) reduction(&&: g) uses_allocators(my_allocator(traits)) // CHECK-NEXT: foo() enum Enum { }; diff --git a/clang/test/OpenMP/target_teams_distribute_ast_print.cpp b/clang/test/OpenMP/target_teams_distribute_ast_print.cpp index b7bf1df8d9a8e..3a1ec36b8181e 100644 --- a/clang/test/OpenMP/target_teams_distribute_ast_print.cpp +++ b/clang/test/OpenMP/target_teams_distribute_ast_print.cpp @@ -1,15 +1,25 @@ -// RUN: %clang_cc1 -verify -fopenmp -ast-print %s -Wno-openmp-mapping | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-mapping -// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-mapping | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-mapping +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-mapping | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s -Wno-openmp-mapping | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-mapping -// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-mapping | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-mapping +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-mapping | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + void foo() {} struct S { @@ -40,14 +50,14 @@ class S7 : public T { void foo() { int b, argv, d, c, e, f; -#pragma omp target teams distribute default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) +#pragma omp target teams distribute default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) allocate(omp_low_lat_mem_alloc:b) uses_allocators(omp_low_lat_mem_alloc) for (int k = 0; k < a.a; ++k) ++a.a; } }; // CHECK: #pragma omp target teams distribute private(this->a) private(this->a) private(T::a) // CHECK: #pragma omp target teams distribute private(this->a) private(this->a) -// CHECK: #pragma omp target teams distribute default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) +// CHECK: #pragma omp target teams distribute default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) allocate(omp_low_lat_mem_alloc: b) uses_allocators(omp_low_lat_mem_alloc) // CHECK: #pragma omp target teams distribute private(this->a) private(this->a) private(this->S::a) class S8 : public S7 { diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp index 8b446fe4ccc82..9c4bf5c3d7dcb 100644 --- a/clang/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; @@ -30,7 +31,7 @@ struct S1; // expected-note {{declared here}} expected-note{{forward declaration extern S1 a; class S2 { mutable int a; - + public: S2() : a(0) {} S2(const S2 &s2) : a(s2.a) {} diff --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp index f76c13fd27d81..92a7f2f186349 100644 --- a/clang/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp @@ -100,7 +100,7 @@ int foomain(int argc, char **argv) { #pragma omp target teams distribute lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; -#pragma omp target teams distribute allocate(omp_thread_mem_alloc: argc) lastprivate(argc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute' directive}} +#pragma omp target teams distribute allocate(omp_thread_mem_alloc: argc) lastprivate(argc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute' directive}} omp50-error {{allocator must be specified in the 'uses_allocators' clause}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target teams distribute lastprivate(S1) // expected-error {{'S1' does not refer to a value}} diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp index a3366a165ac88..008423ed0746e 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp @@ -10,6 +10,16 @@ #ifndef HEADER #define HEADER +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + void foo() {} struct S { @@ -27,7 +37,7 @@ class S7 : public T { public: S7(typename T::type v) : a(v) { -#pragma omp target teams distribute parallel for private(a) private(this->a) private(T::a) +#pragma omp target teams distribute parallel for private(a) private(this->a) private(T::a) allocate(omp_cgroup_mem_alloc:a) uses_allocators(omp_cgroup_mem_alloc) for (int k = 0; k < a.a; ++k) { ++this->a.a; #pragma omp cancel for @@ -48,7 +58,7 @@ class S7 : public T { ++a.a; } }; -// CHECK: #pragma omp target teams distribute parallel for private(this->a) private(this->a) private(T::a) +// CHECK: #pragma omp target teams distribute parallel for private(this->a) private(this->a) private(T::a) allocate(omp_cgroup_mem_alloc: this->a) uses_allocators(omp_cgroup_mem_alloc) // CHECK: #pragma omp cancel for // CHECK: #pragma omp target teams distribute parallel for private(this->a) private(this->a) // CHECK: #pragma omp target teams distribute parallel for default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) @@ -58,7 +68,7 @@ class S8 : public S7 { public: S8(int v) : S7(v){ -#pragma omp target teams distribute parallel for private(a) private(this->a) private(S7::a) +#pragma omp target teams distribute parallel for private(a) private(this->a) private(S7::a) for (int k = 0; k < a.a; ++k) ++this->a.a; } @@ -165,10 +175,10 @@ int main (int argc, char **argv) { // CHECK: #pragma omp target teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; -#pragma omp target teams distribute parallel for +#pragma omp target teams distribute parallel for reduction(task,+:argc) for (int k = 0; k < 10; ++k) e += d + argc; -// CHECK: #pragma omp target teams distribute parallel for +// CHECK: #pragma omp target teams distribute parallel for reduction(task, +: argc) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; return (0); diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp index b0a7da9c932d1..f063c9985d470 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -113,7 +113,7 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for firstprivate (argv[1]) // expected-error {{expected variable name}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for firstprivate(ba) allocate(omp_thread_mem_alloc: ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute parallel for' directive}} +#pragma omp target teams distribute parallel for firstprivate(ba) uses_allocators(omp_thread_mem_alloc) allocate(omp_thread_mem_alloc: ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute parallel for' directive}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for firstprivate(ca) // expected-error {{no matching constructor for initialization of 'S3'}} diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp index 8f4fd88a0818f..173c253f0a594 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp @@ -6,6 +6,7 @@ // RUN: %clang_cc1 -verify=expected,le45 -fopenmp-version=45 -fopenmp-simd %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-version=50 -fopenmp-simd %s -Wuninitialized +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_messages.cpp index eb5a4d63cadfc..2d6140f282229 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -116,7 +116,7 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for firstprivate(i), private(i) // expected-error {{firstprivate variable cannot be private}} expected-note {{defined as firstprivate}} for (int k = 0; k < argc; ++k) ++k; -#pragma omp target teams distribute parallel for allocate(omp_thread_mem_alloc: j) private(j) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute parallel for' directive}} +#pragma omp target teams distribute parallel for allocate(omp_thread_mem_alloc: j) private(j) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute parallel for' directive}} expected-error {{allocator must be specified in the 'uses_allocators' clause}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target teams distribute parallel for reduction(+:i) diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp index cfd0b73666281..3d5452553123b 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -163,7 +169,7 @@ T tmain(T argc) { #pragma omp parallel reduction(min : i) #pragma omp target teams distribute parallel for reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} for (int j=0; j<100; j++) foo(); -#pragma omp target teams distribute parallel for reduction(+ : fl) allocate(omp_thread_mem_alloc: fl) // expected-warning 2 {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute parallel for' directive}} +#pragma omp target teams distribute parallel for reduction(+ : fl) allocate(omp_thread_mem_alloc: fl) uses_allocators(omp_thread_mem_alloc)// expected-warning 2 {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute parallel for' directive}} omp45-error {{unexpected OpenMP clause 'uses_allocators' in directive '#pragma omp target teams distribute parallel for'}} for (int j=0; j<100; j++) foo(); return T(); @@ -257,6 +263,8 @@ int main(int argc, char **argv) { static int m; #pragma omp target teams distribute parallel for reduction(+ : m) // OK for (int j=0; j<100; j++) foo(); +#pragma omp target teams distribute parallel for reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} + for (int j=0; j<100; j++) foo(); return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp new file mode 100644 index 0000000000000..06c0f8744e8cc --- /dev/null +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp target teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp index 8e33d43dc2ce8..0b21fb6bfbc85 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp @@ -10,6 +10,16 @@ #ifndef HEADER #define HEADER +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + void foo() {} struct S { @@ -33,7 +43,7 @@ class S7 : public T { } S7 &operator=(S7 &s) { int k; -#pragma omp target teams distribute parallel for simd allocate(a) private(a) private(this->a) linear(k) allocate(k) +#pragma omp target teams distribute parallel for simd allocate(omp_pteam_mem_alloc: a) private(a) private(this->a) linear(k) allocate(k) uses_allocators(omp_pteam_mem_alloc) for (k = 0; k < s.a.a; ++k) ++s.a.a; @@ -58,7 +68,7 @@ class S7 : public T { } }; // CHECK: #pragma omp target teams distribute parallel for simd private(this->a) private(this->a) private(T::a) -// CHECK: #pragma omp target teams distribute parallel for simd allocate(this->a) private(this->a) private(this->a) linear(k) allocate(k) +// CHECK: #pragma omp target teams distribute parallel for simd allocate(omp_pteam_mem_alloc: this->a) private(this->a) private(this->a) linear(k) allocate(k) uses_allocators(omp_pteam_mem_alloc) // CHECK: #pragma omp target teams distribute parallel for simd default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK: #pragma omp target teams distribute parallel for simd simdlen(slen1) safelen(slen2) aligned(arr: alen) diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp index 17aa7f488534a..f47c5eea94858 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp index 0553874cb997e..af0fd40524f7a 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -verify=expected,le45 -fopenmp %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,le45 -fopenmp-version=40 -fopenmp %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,le45 -fopenmp-version=45 -fopenmp %s -Wuninitialized -// RUN: %clang_cc1 -verify=expected -fopenmp-version=50 -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,le50 -fopenmp-version=50 -fopenmp %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,le45 -fopenmp-simd %s -Wuninitialized -// RUN: %clang_cc1 -verify=expected -fopenmp-version=50 -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,le50 -fopenmp-version=50 -fopenmp-simd %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -124,7 +124,7 @@ int foomain(int argc, char **argv) { for (int k = 0; k < argc; ++k) ++k; int v = 0; -#pragma omp target teams distribute parallel for simd lastprivate(i) allocate(omp_thread_mem_alloc: i) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute parallel for simd' directive}} +#pragma omp target teams distribute parallel for simd lastprivate(i) allocate(omp_thread_mem_alloc: i) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute parallel for simd' directive}} le50-error {{allocator must be specified in the 'uses_allocators' clause}} for (int k = 0; k < argc; ++k) { i = k; v += i; diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_messages.cpp index 8598fb6663ccd..9d70f9f3c6351 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -92,7 +92,7 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for simd private (argv[1]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; - #pragma omp target teams distribute parallel for simd private(ba) allocate(omp_thread_mem_alloc: ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute parallel for simd' directive}} + #pragma omp target teams distribute parallel for simd private(ba) uses_allocators(omp_thread_mem_alloc) allocate(omp_thread_mem_alloc: ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute parallel for simd' directive}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target teams distribute parallel for simd private(ca) // expected-error {{const-qualified variable without mutable fields cannot be private}} diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp index 6d76945f42692..fbdf5aa8efe3e 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp @@ -1,11 +1,18 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 %s -Wuninitialized +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; @@ -257,6 +264,8 @@ int main(int argc, char **argv) { static int m; #pragma omp target teams distribute parallel for simd reduction(+ : m) // OK for (int j=0; j<100; j++) foo(); +#pragma omp target teams distribute parallel for simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int j=0; j<100; j++) foo(); return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_teams_distribute_private_messages.cpp b/clang/test/OpenMP/target_teams_distribute_private_messages.cpp index a2233ed076070..264935dfc1e58 100644 --- a/clang/test/OpenMP/target_teams_distribute_private_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_private_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -92,7 +92,7 @@ int main(int argc, char **argv) { #pragma omp target teams distribute private (argv[1]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; -#pragma omp target teams distribute private(ba) allocate(omp_thread_mem_alloc: ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute' directive}} +#pragma omp target teams distribute private(ba) allocate(omp_thread_mem_alloc: ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute' directive}} expected-error {{allocator must be specified in the 'uses_allocators' clause}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target teams distribute private(ca) // expected-error {{const-qualified variable without mutable fields cannot be private}} diff --git a/clang/test/OpenMP/target_teams_distribute_reduction_messages.cpp b/clang/test/OpenMP/target_teams_distribute_reduction_messages.cpp index 26080557d01e4..7ec60da27da54 100644 --- a/clang/test/OpenMP/target_teams_distribute_reduction_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -168,7 +174,7 @@ T tmain(T argc) { #pragma omp parallel reduction(min : i) #pragma omp target teams distribute reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} for (int j=0; j<100; j++) foo(); -#pragma omp target teams distribute allocate(omp_thread_mem_alloc: fl) reduction(+ : fl) // expected-warning 2 {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute' directive}} +#pragma omp target teams distribute uses_allocators(omp_thread_mem_alloc) allocate(omp_thread_mem_alloc: fl) reduction(+ : fl) // expected-warning 2 {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute' directive}} omp45-error {{unexpected OpenMP clause 'uses_allocators' in directive '#pragma omp target teams distribute'}} for (int j=0; j<100; j++) foo(); return T(); @@ -262,6 +268,8 @@ int main(int argc, char **argv) { static int m; #pragma omp target teams distribute reduction(+ : m) // OK for (int j=0; j<100; j++) foo(); +#pragma omp target teams distribute reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int j=0; j<100; j++) foo(); return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_teams_distribute_simd_ast_print.cpp b/clang/test/OpenMP/target_teams_distribute_simd_ast_print.cpp index 8644ac9c1ad1e..5a527464b5236 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_ast_print.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_ast_print.cpp @@ -16,6 +16,16 @@ #ifndef HEADER #define HEADER +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + void foo() {} struct S { @@ -133,7 +143,7 @@ T tmain(T argc) { foo(); // CHECK: #pragma omp target teams distribute simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: foo(); +// CHECK-NEXT: foo(); #pragma omp target teams distribute simd private(b), firstprivate(argc) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) for (int k = 0; k < 10; ++k) e += d + argc; @@ -198,14 +208,14 @@ int main (int argc, char **argv) { // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #ifdef OMP5 -#pragma omp target teams distribute simd safelen(clen-1) aligned(arr:N+6) if(simd:argc) nontemporal(argc, c, d) order(concurrent) +#pragma omp target teams distribute simd safelen(clen-1) aligned(arr:N+6) if(simd:argc) nontemporal(argc, c, d) order(concurrent) allocate(omp_low_lat_mem_alloc:e) firstprivate(e) uses_allocators(omp_low_lat_mem_alloc) #else #pragma omp target teams distribute simd safelen(clen-1) aligned(arr:N+6) #endif // OMP5 for (int k = 0; k < 10; ++k) e += d + argc + arr[k]; // OMP45: #pragma omp target teams distribute simd safelen(clen - 1) aligned(arr: N + 6) -// OMP50: #pragma omp target teams distribute simd safelen(clen - 1) aligned(arr: N + 6) if(simd: argc) nontemporal(argc,c,d) order(concurrent) +// OMP50: #pragma omp target teams distribute simd safelen(clen - 1) aligned(arr: N + 6) if(simd: argc) nontemporal(argc,c,d) order(concurrent) allocate(omp_low_lat_mem_alloc: e) firstprivate(e) uses_allocators(omp_low_lat_mem_alloc) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc + arr[k]; return (0); diff --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp index 5ea84bf0221f0..eb481c4496493 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp @@ -5,6 +5,7 @@ // RUN: %clang_cc1 -verify=expected,le45 -fopenmp-simd %s -Wuninitialized +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; diff --git a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp index 09da5e6c75e4f..c68b0452328cf 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -verify=expected,le45 -fopenmp %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,le45 -fopenmp-version=40 -fopenmp %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,le45 -fopenmp-version=45 -fopenmp %s -Wuninitialized -// RUN: %clang_cc1 -verify=expected -fopenmp-version=50 -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,le50 -fopenmp-version=50 -fopenmp %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,le45 -fopenmp-simd %s -Wuninitialized -// RUN: %clang_cc1 -verify=expected -fopenmp-version=50 -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,le50 -fopenmp-version=50 -fopenmp-simd %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -124,7 +124,7 @@ int foomain(int argc, char **argv) { for (int k = 0; k < argc; ++k) ++k; int v = 0; -#pragma omp target teams distribute simd allocate(omp_thread_mem_alloc: i) lastprivate(i) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute simd' directive}} +#pragma omp target teams distribute simd allocate(omp_thread_mem_alloc: i) lastprivate(i) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute simd' directive}} le50-error {{allocator must be specified in the 'uses_allocators' clause}} for (int k = 0; k < argc; ++k) { i = k; v += i; diff --git a/clang/test/OpenMP/target_teams_distribute_simd_private_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_private_messages.cpp index 1ed9e7661ed5d..d37f38de64837 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_private_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_private_messages.cpp @@ -4,6 +4,7 @@ // RUN: %clang_cc1 -verify=expected -fopenmp-version=50 -fopenmp %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,le45 -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -95,7 +96,7 @@ int main(int argc, char **argv) { #pragma omp target teams distribute simd private (k, argv[1]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; -#pragma omp target teams distribute simd private(ba) allocate(omp_thread_mem_alloc: ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute simd' directive}} +#pragma omp target teams distribute simd private(ba) allocate(omp_thread_mem_alloc: ba) uses_allocators(omp_thread_mem_alloc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams distribute simd' directive}} le45-error {{unexpected OpenMP clause 'uses_allocators' in directive '#pragma omp target teams distribute simd'}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target teams distribute simd private(ca) // expected-error {{const-qualified variable without mutable fields cannot be private}} diff --git a/clang/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp index 100f992fb6aff..f97ad593b7b35 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp @@ -1,11 +1,18 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 %s -Wuninitialized +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; @@ -257,6 +264,8 @@ int main(int argc, char **argv) { static int m; #pragma omp target teams distribute simd reduction(+ : m) // OK for (int j=0; j<100; j++) foo(); +#pragma omp target teams distribute simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int j=0; j<100; j++) foo(); return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_teams_firstprivate_messages.cpp b/clang/test/OpenMP/target_teams_firstprivate_messages.cpp index 1f4f40d675337..6563da08738b0 100644 --- a/clang/test/OpenMP/target_teams_firstprivate_messages.cpp +++ b/clang/test/OpenMP/target_teams_firstprivate_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -103,7 +103,7 @@ int main(int argc, char **argv) { foo(); #pragma omp target teams firstprivate(argv[1]) // expected-error {{expected variable name}} foo(); -#pragma omp target teams allocate(omp_thread_mem_alloc: ba) firstprivate(ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams' directive}} +#pragma omp target teams allocate(omp_thread_mem_alloc: ba) uses_allocators(omp_thread_mem_alloc) firstprivate(ba) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams' directive}} foo(); #pragma omp target teams firstprivate(ca, z) foo(); diff --git a/clang/test/OpenMP/target_teams_private_messages.cpp b/clang/test/OpenMP/target_teams_private_messages.cpp index 85309e953b923..7bab3723473c3 100644 --- a/clang/test/OpenMP/target_teams_private_messages.cpp +++ b/clang/test/OpenMP/target_teams_private_messages.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -108,7 +108,7 @@ int main(int argc, char **argv) { foo(); #pragma omp target teams private(j) foo(); -#pragma omp target teams firstprivate(i) allocate(omp_thread_mem_alloc: i) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams' directive}} +#pragma omp target teams firstprivate(i) uses_allocators(omp_thread_mem_alloc) allocate(omp_thread_mem_alloc: i) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'target teams' directive}} for (int k = 0; k < 10; ++k) { #pragma omp parallel private(i) foo(); diff --git a/clang/test/OpenMP/target_teams_reduction_messages.cpp b/clang/test/OpenMP/target_teams_reduction_messages.cpp index 3da0cc8479a0d..e775270f4d6a4 100644 --- a/clang/test/OpenMP/target_teams_reduction_messages.cpp +++ b/clang/test/OpenMP/target_teams_reduction_messages.cpp @@ -1,11 +1,18 @@ -// RUN: %clang_cc1 -verify -fopenmp -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -o - %s -Wuninitialized +#pragma omp requires dynamic_allocators typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; extern const omp_allocator_handle_t omp_large_cap_mem_alloc; @@ -282,6 +289,8 @@ int main(int argc, char **argv) { static int m; #pragma omp target teams reduction(+ : m) // OK foo(); +#pragma omp target teams reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + foo(); return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/target_uses_allocators_messages.cpp b/clang/test/OpenMP/target_uses_allocators_messages.cpp new file mode 100644 index 0000000000000..1c1912025a032 --- /dev/null +++ b/clang/test/OpenMP/target_uses_allocators_messages.cpp @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 %s -Wuninitialized + +struct omp_alloctrait_t {}; + +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + +int main(int argc, char **argv) { + omp_alloctrait_t traits[10]; + omp_alloctrait_t *ptraits; + omp_allocator_handle_t my_alloc = nullptr; + const omp_allocator_handle_t c_my_alloc = my_alloc; +#pragma omp target uses_allocators // expected-error {{expected '(' after 'uses_allocator'}} +{} +#pragma omp target uses_allocators( // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected unqualified-id}} +{} +#pragma omp target uses_allocators() // expected-error {{expected unqualified-id}} +{} +#pragma omp target uses_allocators(omp_default_mem_alloc // expected-error {{expected ',' or ')' in 'uses_allocators' clause}} expected-error {{expected ')'}} expected-note {{to match this '('}} +{} +#pragma omp target uses_allocators(argc, // expected-error {{expected ')'}} expected-error {{expected variable of the 'omp_allocator_handle_t' type, not 'int'}} expected-note {{to match this '('}} +{} +#pragma omp target uses_allocators(argc > 0 ? omp_default_mem_alloc : omp_thread_mem_alloc) // expected-error {{expected ',' or ')' in 'uses_allocators' clause}} expected-error {{expected unqualified-id}} expected-error {{expected variable of the 'omp_allocator_handle_t' type, not 'int'}} +{} +#pragma omp target uses_allocators(omp_default_mem_alloc, omp_large_cap_mem_alloc, omp_const_mem_alloc, omp_high_bw_mem_alloc, omp_low_lat_mem_alloc, omp_cgroup_mem_alloc, omp_pteam_mem_alloc, omp_thread_mem_alloc) +{} +#pragma omp target uses_allocators(omp_default_mem_alloc(traits), omp_large_cap_mem_alloc(traits), omp_const_mem_alloc(traits), omp_high_bw_mem_alloc(traits), omp_low_lat_mem_alloc(traits), omp_cgroup_mem_alloc(traits), omp_pteam_mem_alloc(traits), omp_thread_mem_alloc(traits)) // expected-error 8 {{predefined allocator cannot have traits specified}} expected-note-re 8 {{predefined trait '{{omp_default_mem_alloc|omp_large_cap_mem_alloc|omp_const_mem_alloc|omp_high_bw_mem_alloc|omp_low_lat_mem_alloc|omp_cgroup_mem_alloc|omp_pteam_mem_alloc|omp_thread_mem_alloc}}' used here}} +{} +#pragma omp target uses_allocators(my_alloc, c_my_alloc) // expected-error {{non-predefined allocator must have traits specified}} expected-error {{expected variable of the 'omp_allocator_handle_t' type, not 'const omp_allocator_handle_t' (aka 'void **const')}} +{} +#pragma omp target uses_allocators(my_alloc() // expected-error {{expected unqualified-id}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{non-predefined allocator must have traits specified}} +{} +#pragma omp target uses_allocators(my_alloc()) // expected-error {{expected unqualified-id}} expected-error {{non-predefined allocator must have traits specified}} +{} +#pragma omp target uses_allocators(my_alloc(argc > 0 ? argv[0] : argv{1})) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected constant sized array of 'omp_alloctrait_t' elements, not 'int'}} +{} +#pragma omp target uses_allocators(my_alloc(ptraits)) // expected-error {{expected constant sized array of 'omp_alloctrait_t' elements, not 'omp_alloctrait_t *'}} +{} +#pragma omp target uses_allocators(my_alloc(traits)) private(my_alloc) // expected-error {{allocators used in 'uses_allocators' clause cannot appear in other data-sharing or data-mapping attribute clauses}} expected-note {{defined as private}} +{} +#pragma omp target map(my_alloc, traits) uses_allocators(my_alloc(traits)) // expected-error {{allocators used in 'uses_allocators' clause cannot appear in other data-sharing or data-mapping attribute clauses}} expected-note {{used here}} +{} + return 0; +} + diff --git a/clang/test/OpenMP/taskloop_reduction_messages.cpp b/clang/test/OpenMP/taskloop_reduction_messages.cpp index da044ab2c9c9f..e0dee11112a5e 100644 --- a/clang/test/OpenMP/taskloop_reduction_messages.cpp +++ b/clang/test/OpenMP/taskloop_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -344,6 +350,9 @@ int main(int argc, char **argv) { #pragma omp taskloop reduction(+ : m) // OK for (int i = 0; i < 10; ++i) m++; +#pragma omp taskloop reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + m++; #pragma omp taskloop nogroup reduction(+ : m) // expected-error {{'reduction' clause cannot be used with 'nogroup' clause}} for (int i = 0; i < 10; ++i) m++; diff --git a/clang/test/OpenMP/taskloop_simd_reduction_messages.cpp b/clang/test/OpenMP/taskloop_simd_reduction_messages.cpp index 82047f04cd489..2b592869f7d3e 100644 --- a/clang/test/OpenMP/taskloop_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/taskloop_simd_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized typedef void **omp_allocator_handle_t; extern const omp_allocator_handle_t omp_default_mem_alloc; @@ -344,6 +350,9 @@ int main(int argc, char **argv) { #pragma omp taskloop simd reduction(+ : m) // OK for (int i = 0; i < 10; ++i) m++; +#pragma omp taskloop simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int i = 0; i < 10; ++i) + m++; #pragma omp taskloop simd reduction(+ : m) nogroup // expected-error {{'reduction' clause cannot be used with 'nogroup' clause}} for (int i = 0; i < 10; ++i) m++; diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp index 6e3c0ab0b0c14..4b6afa16832d0 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp @@ -197,11 +197,11 @@ int main (int argc, char **argv) { // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target -#pragma omp teams distribute parallel for +#pragma omp teams distribute parallel for reduction(task,&&:argc) for (int k = 0; k < 10; ++k) e += d + argc; // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute parallel for +// CHECK-NEXT: #pragma omp teams distribute parallel for reduction(task, &&: argc) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; return (0); diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp index 8374c14331a88..7efdf179d3653 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -311,6 +317,9 @@ int main(int argc, char **argv) { #pragma omp target #pragma omp teams distribute parallel for reduction(+ : m) // OK for (int j=0; j<100; j++) foo(); +#pragma omp target +#pragma omp teams distribute parallel for reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} + for (int j=0; j<100; j++) foo(); return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp new file mode 100644 index 0000000000000..194999f8cbb05 --- /dev/null +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -0,0 +1,130 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: @main +int main(int argc, char **argv) { +#pragma omp target +#pragma omp teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc]) + for (long long i = 0; i < 10; ++i) { +#pragma omp task in_reduction(+: argc, argv[0:10][0:argc]) + ; + } +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: alloca i32, +// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, +// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]], +// CHECK: [[TG:%.+]] = alloca i8*, + +// Init firstprivate copy of argc +// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]], +// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]], +// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]], + +// Init firstprivate copy of argv[0:10][0:argc] +// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]] +// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]] +// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]] +// CHECK: [[INIT]]: +// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ] +// CHECK: store i8 0, i8* [[EL]], +// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1 +// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]] +// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]] +// CHECK: [[DONE]]: + +// Register task reduction. +// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0 +// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0 +// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]], +// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1 +// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8* +// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]], +// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2 +// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]], +// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]], +// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]], +// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]], +// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6 +// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false) +// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1 +// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0 +// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]], +// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1 +// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]], +// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2 +// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]], +// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]], +// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4 +// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]], +// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5 +// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]], +// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6 +// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]], +// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8* +// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]]) +// CHECK: store i8* [[TG_VAL]], i8** [[TG]], + +// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]* +// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1 +// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0 +// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]], +// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]], + +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]]) + +// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1) +// CHECK: call i32 @__kmpc_reduce_nowait( + +// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: store i32 0, i32* %{{.+}}, + +// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: store i32 [[ADD]], i32* %{{.+}}, + +// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}}) +// CHECK: phi i8* +// CHECK: store i8 0, i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}}) +// CHECK: phi i8* +// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}} +// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 +// CHECK: store i8 [[CONV]], i8* [[EL:%.+]], +// CHECK: getelementptr i8, i8* [[EL]], i32 1 + +// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 +// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 + +#endif diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp index 09797474924ad..b6a3a30baaed7 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -311,6 +317,9 @@ int main(int argc, char **argv) { #pragma omp target #pragma omp teams distribute parallel for simd reduction(+ : m) // OK for (int j=0; j<100; j++) foo(); +#pragma omp target +#pragma omp teams distribute parallel for simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int j=0; j<100; j++) foo(); return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/teams_distribute_reduction_messages.cpp b/clang/test/OpenMP/teams_distribute_reduction_messages.cpp index c297788bc172e..fb793e0008ad6 100644 --- a/clang/test/OpenMP/teams_distribute_reduction_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -317,6 +323,9 @@ int main(int argc, char **argv) { #pragma omp target #pragma omp teams distribute reduction(+ : m) // OK for (int j=0; j<100; j++) foo(); +#pragma omp target +#pragma omp teams distribute reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int j=0; j<100; j++) foo(); return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/teams_distribute_simd_reduction_messages.cpp b/clang/test/OpenMP/teams_distribute_simd_reduction_messages.cpp index adb1835603e0a..a167e9c10f2f1 100644 --- a/clang/test/OpenMP/teams_distribute_simd_reduction_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 %s -Wno-openmp-mapping -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -311,6 +317,9 @@ int main(int argc, char **argv) { #pragma omp target #pragma omp teams distribute simd reduction(+ : m) // OK for (int j=0; j<100; j++) foo(); +#pragma omp target +#pragma omp teams distribute simd reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + for (int j=0; j<100; j++) foo(); return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/OpenMP/teams_reduction_messages.cpp b/clang/test/OpenMP/teams_reduction_messages.cpp index 4cb1e75281f6a..674d1ca85c8ce 100644 --- a/clang/test/OpenMP/teams_reduction_messages.cpp +++ b/clang/test/OpenMP/teams_reduction_messages.cpp @@ -1,10 +1,16 @@ -// RUN: %clang_cc1 -verify -fopenmp -o - %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -o - %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++98 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++98 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -o - %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -o - %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -o - %s -Wno-openmp-mapping -Wuninitialized -// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++98 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++98 -o - %s -Wno-openmp-mapping -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -o - %s -Wno-openmp-mapping -Wuninitialized extern int omp_default_mem_alloc; void xxx(int argc) { @@ -343,6 +349,9 @@ int main(int argc, char **argv) { #pragma omp target #pragma omp teams reduction(+ : m) // OK foo(); +#pragma omp target +#pragma omp teams reduction(task, + : m) // omp45-error 2 {{expected expression}} omp45-warning {{missing ':' after reduction identifier - ignoring}} omp50-error {{'reduction' clause with 'task' modifier allowed only on non-simd parallel or worksharing constructs}} + foo(); return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} } diff --git a/clang/test/PCH/cli-macro.c b/clang/test/PCH/cli-macro.c new file mode 100644 index 0000000000000..bae8cc3ff0e75 --- /dev/null +++ b/clang/test/PCH/cli-macro.c @@ -0,0 +1,12 @@ +// Test this without pch. +// RUN: %clang_cc1 -Wunused-macros -Dunused=1 -fsyntax-only -verify %s + +// Test with pch. +// RUN: %clang_cc1 -Wunused-macros -emit-pch -o %t %s +// RUN: %clang_cc1 -Wunused-macros -Dunused=1 -include-pch %t -fsyntax-only -verify %s + +// expected-no-diagnostics + +// -Dunused=1 is intentionally not set for the pch. +// There still should be no unused warning for a macro from the command line. + diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index 9cb12f8afb329..8ce6b8a8a45d5 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -163,6 +163,7 @@ // RUN: %clang -target aarch64 -mcpu=kryo -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-KRYO %s // RUN: %clang -target aarch64 -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-THUNDERX2T99 %s // RUN: %clang -target aarch64 -mcpu=a64fx -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A64FX %s +// RUN: %clang -target aarch64 -mcpu=carmel -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-CARMEL %s // CHECK-MCPU-APPLE-A7: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+crypto" "-target-feature" "+zcm" "-target-feature" "+zcz" "-target-feature" "+sha2" "-target-feature" "+aes" // CHECK-MCPU-APPLE-A10: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+rdm" "-target-feature" "+zcm" "-target-feature" "+zcz" "-target-feature" "+sha2" "-target-feature" "+aes" // CHECK-MCPU-APPLE-A11: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+v8.2a" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+ras" "-target-feature" "+lse" "-target-feature" "+rdm" "-target-feature" "+zcm" "-target-feature" "+zcz" "-target-feature" "+sha2" "-target-feature" "+aes" @@ -179,6 +180,7 @@ // CHECK-MCPU-KRYO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" // CHECK-MCPU-THUNDERX2T99: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" // CHECK-MCPU-A64FX: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+v8.2a" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+fullfp16" "-target-feature" "+ras" "-target-feature" "+lse" "-target-feature" "+rdm" "-target-feature" "+sve" "-target-feature" "+sha2" +// CHECK-MCPU-CARMEL: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+v8.2a" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+fullfp16" "-target-feature" "+ras" "-target-feature" "+lse" "-target-feature" "+rdm" "-target-feature" "+sha2" "-target-feature" "+aes" // RUN: %clang -target x86_64-apple-macosx -arch arm64 -### -c %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH-ARM64 %s // CHECK-ARCH-ARM64: "-target-cpu" "apple-a7" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+crypto" "-target-feature" "+zcm" "-target-feature" "+zcz" diff --git a/clang/test/Sema/arm-cmse-no-diag.c b/clang/test/Sema/arm-cmse-no-diag.c new file mode 100644 index 0000000000000..0ad8cc3970acc --- /dev/null +++ b/clang/test/Sema/arm-cmse-no-diag.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple thumbv8m.base-none-eabi -mcmse -verify -Wno-cmse-union-leak %s +// expected-no-diagnostics + +union U { unsigned n; char b[4]; } u; + +void (*fn2)(int, union U) __attribute__((cmse_nonsecure_call)); + +union U xyzzy() __attribute__((cmse_nonsecure_entry)) { + fn2(0, u); + return u; +} diff --git a/clang/test/Sema/arm-cmse.c b/clang/test/Sema/arm-cmse.c index 2148cc1aeb962..2a32256aaf091 100644 --- a/clang/test/Sema/arm-cmse.c +++ b/clang/test/Sema/arm-cmse.c @@ -28,3 +28,30 @@ void fn0() __attribute__((cmse_nonsecure_entry)); void fn1() __attribute__((cmse_nonsecure_entry(1))); // expected-error {{'cmse_nonsecure_entry' attribute takes no arguments}} typedef void (*fn2_t)() __attribute__((cmse_nonsecure_call("abc"))); // expected-error {{'cmse_nonsecure_call' attribute takes no argument}} + +union U { unsigned n; char b[4]; } u; + +union U xyzzy() __attribute__((cmse_nonsecure_entry)) { + return u; // expected-warning {{passing union across security boundary via return value may leak information}} +} + +void (*fn2)(int, union U) __attribute__((cmse_nonsecure_call)); +void (*fn3)() __attribute__ ((cmse_nonsecure_call)); + +struct S { + int t; + union { + char b[4]; + unsigned w; + }; +} s; + +void qux() { + fn2(1, + u); // expected-warning {{passing union across security boundary via parameter 1 may leak information}} + + fn3( + u, // expected-warning {{passing union across security boundary via parameter 0 may leak information}} + 1, + s); // expected-warning {{passing union across security boundary via parameter 2 may leak information}} +} diff --git a/clang/test/Sema/ext-int-not-supported.c b/clang/test/Sema/ext-int-not-supported.c new file mode 100644 index 0000000000000..23610b9941e4c --- /dev/null +++ b/clang/test/Sema/ext-int-not-supported.c @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 -triple armv7 -fsyntax-only -verify %s + +void foo() { + _ExtInt(33) a; // expected-error{{_ExtInt is not supported on this target}} +} diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu index b0e2852a12a75..612d954b79af7 100644 --- a/clang/test/SemaCUDA/function-overload.cu +++ b/clang/test/SemaCUDA/function-overload.cu @@ -1,8 +1,8 @@ // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s -// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -std=c++11 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s #include "Inputs/cuda.h" @@ -449,3 +449,17 @@ __host__ __device__ int constexpr_overload(const T &x, const T &y) { int test_constexpr_overload(C2 &x, C2 &y) { return constexpr_overload(x, y); } + +// Verify no ambiguity for new operator. +void *a = new int; +__device__ void *b = new int; +// expected-error@-1{{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} + +// Verify no ambiguity for new operator. +template _Tp&& f(); +template()))> +void __test(); + +void foo() { + __test(); +} diff --git a/clang/test/SemaCUDA/union-init.cu b/clang/test/SemaCUDA/union-init.cu new file mode 100644 index 0000000000000..a633975e3776e --- /dev/null +++ b/clang/test/SemaCUDA/union-init.cu @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-linux-unknown -fsyntax-only -o - -verify + +#include "Inputs/cuda.h" + +struct A { + int a; + __device__ A() { a = 1; } + __device__ ~A() { a = 2; } +}; + +// This can be a global var since ctor/dtors of data members are not called. +union B { + A a; + __device__ B() {} + __device__ ~B() {} +}; + +// This cannot be a global var since it has a dynamic ctor. +union C { + A a; + __device__ C() { a.a = 3; } + __device__ ~C() {} +}; + +// This cannot be a global var since it has a dynamic dtor. +union D { + A a; + __device__ D() { } + __device__ ~D() { a.a = 4; } +}; + +__device__ B b; +__device__ C c; +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +__device__ D d; +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} + +__device__ void foo() { + __shared__ B b; + __shared__ C c; + // expected-error@-1 {{initialization is not supported for __shared__ variables.}} + __shared__ D d; + // expected-error@-1 {{initialization is not supported for __shared__ variables.}} +} diff --git a/clang/test/SemaCXX/compare-modules-cxx2a.cpp b/clang/test/SemaCXX/compare-modules-cxx2a.cpp index fa9180024351f..afbcce1b0c005 100644 --- a/clang/test/SemaCXX/compare-modules-cxx2a.cpp +++ b/clang/test/SemaCXX/compare-modules-cxx2a.cpp @@ -22,12 +22,12 @@ auto va = A() <=> A(); // expected-note {{required here}} #pragma clang module import compare.other -// expected-note@std-compare.h:* 2+{{previous definition}} +// expected-note@std-compare.h:* 2+{{not reachable}} -void b() { void(0 <=> 0); } // expected-error 1+{{definition of 'strong_ordering' must be imported}} +void b() { void(0 <=> 0); } // expected-error 1+{{missing '#include "std-compare.h"'; 'strong_ordering' must be defined}} struct B { - CC operator<=>(const B&) const = default; // expected-error 1+{{definition of 'strong_ordering' must be imported}} + CC operator<=>(const B&) const = default; // expected-error 1+{{missing '#include "std-compare.h"'; 'strong_ordering' must be defined}} }; auto vb = B() <=> B(); // expected-note {{required here}} diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp index 6a06280dceec4..cf94fd17162a5 100644 --- a/clang/test/SemaCXX/ext-int.cpp +++ b/clang/test/SemaCXX/ext-int.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s -Wimplicit-int-conversion +// RUN: %clang_cc1 -fsyntax-only -verify %s -Wimplicit-int-conversion -triple x86_64-gnu-linux template struct HasExtInt { diff --git a/clang/test/SemaCXX/modules-ts.cppm b/clang/test/SemaCXX/modules-ts.cppm index 1081995c586f0..ae15c0825d1ab 100644 --- a/clang/test/SemaCXX/modules-ts.cppm +++ b/clang/test/SemaCXX/modules-ts.cppm @@ -43,7 +43,7 @@ namespace N { export struct T {} t; #elif TEST == 3 int use_a = a; // expected-error {{declaration of 'a' must be imported from module 'foo' before it is required}} -// expected-note@-13 {{previous}} +// expected-note@-13 {{declaration here is not visible}} #undef foo import foo; diff --git a/clang/test/SemaCXX/vector-conditional.cpp b/clang/test/SemaCXX/vector-conditional.cpp index 5676d7a3880d9..1b360e4fa832e 100644 --- a/clang/test/SemaCXX/vector-conditional.cpp +++ b/clang/test/SemaCXX/vector-conditional.cpp @@ -97,7 +97,7 @@ void Operands() { // When there is a vector and a scalar, conversions must be legal. (void)(four_ints ? four_floats : 3); // should work, ints can convert to floats. - (void)(four_ints ? four_uints : e); // should work, non-scoped enum can convert to uint. + (void)(four_ints ? four_uints : e); // expected-error {{cannot convert between scalar type 'E' and vector type 'FourUInts'}} (void)(four_ints ? four_uints : se); // expected-error {{cannot convert between vector and non-scalar values ('FourUInts' (vector of 4 'unsigned int' values) and 'SE'}} // GCC permits this, but our conversion rules reject this for truncation. (void)(two_ints ? two_ints : us); // expected-error {{cannot convert between scalar type 'unsigned int' and vector type 'TwoInts'}} diff --git a/clang/test/SemaCXX/vector.cpp b/clang/test/SemaCXX/vector.cpp index caa840596d7db..724ccece0c42e 100644 --- a/clang/test/SemaCXX/vector.cpp +++ b/clang/test/SemaCXX/vector.cpp @@ -475,3 +475,24 @@ void use() { #endif // __cplusplus >= 201103L } } + +namespace rdar60092165 { +template void f() { + typedef T first_type __attribute__((vector_size(sizeof(T) * 4))); + typedef T second_type __attribute__((vector_size(sizeof(T) * 4))); + + second_type st; +} +} + +namespace PR45780 { +enum E { Value = 15 }; +void use(char16 c) { + E e; + c &Value; // expected-error{{cannot convert between scalar type 'PR45780::E' and vector type 'char16'}} + c == Value; // expected-error{{cannot convert between scalar type 'PR45780::E' and vector type 'char16'}} + e | c; // expected-error{{cannot convert between scalar type 'PR45780::E' and vector type 'char16'}} + e != c; // expected-error{{cannot convert between scalar type 'PR45780::E' and vector type 'char16'}} +} + +} // namespace PR45780 diff --git a/clang/test/SemaCXX/warn-thread-safety-parsing.cpp b/clang/test/SemaCXX/warn-thread-safety-parsing.cpp index 198d02e1f0768..6ad0f877a11d3 100644 --- a/clang/test/SemaCXX/warn-thread-safety-parsing.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-parsing.cpp @@ -496,7 +496,7 @@ Mutex aa_var_arg_bad_3 ACQUIRED_AFTER(muDoublePointer); // \ Mutex aa_var_arg_bad_4 ACQUIRED_AFTER(umu); // \ // expected-warning {{'acquired_after' attribute requires arguments whose type is annotated with 'capability' attribute}} UnlockableMu aa_var_arg_bad_5 ACQUIRED_AFTER(mu_aa); // \ - // expected-warning {{'acquired_after' attribute can only be applied in a context annotated with 'capability("mutex")' attribute}} + // expected-warning {{'acquired_after' attribute can only be applied in a context annotated with 'capability' attribute}} //-----------------------------------------// // Acquired Before (ab) @@ -559,7 +559,7 @@ Mutex ab_var_arg_bad_3 ACQUIRED_BEFORE(muDoublePointer); // \ Mutex ab_var_arg_bad_4 ACQUIRED_BEFORE(umu); // \ // expected-warning {{'acquired_before' attribute requires arguments whose type is annotated with 'capability' attribute}} UnlockableMu ab_var_arg_bad_5 ACQUIRED_BEFORE(mu_ab); // \ - // expected-warning {{'acquired_before' attribute can only be applied in a context annotated with 'capability("mutex")' attribute}} + // expected-warning {{'acquired_before' attribute can only be applied in a context annotated with 'capability' attribute}} //-----------------------------------------// diff --git a/clang/test/SemaObjC/dictionary-literal-duplicates.m b/clang/test/SemaObjC/dictionary-literal-duplicates.m new file mode 100644 index 0000000000000..5bfe66d280fe2 --- /dev/null +++ b/clang/test/SemaObjC/dictionary-literal-duplicates.m @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -Wno-objc-root-class %s -verify +// RUN: %clang_cc1 -xobjective-c++ -Wno-objc-root-class %s -verify + +#define YES __objc_yes +#define NO __objc_no + +@interface NSNumber ++(instancetype)numberWithChar:(char)value; ++(instancetype)numberWithInt:(int)value; ++(instancetype)numberWithDouble:(double)value; ++(instancetype)numberWithBool:(unsigned char)value; ++(instancetype)numberWithUnsignedLong:(unsigned long)value; ++(instancetype)numberWithLongLong:(long long) value; ++(instancetype)numberWithUnsignedInt:(unsigned)value; +@end + +@interface NSString +@end + +@interface NSDictionary ++ (instancetype)dictionaryWithObjects:(const id[])objects + forKeys:(const id[])keys + count:(unsigned long)cnt; +@end + +void test() { + NSDictionary *t1 = @{ + @"foo" : @0, // expected-note 2 {{previous equal key is here}} + @"foo" : @0, // expected-warning{{duplicate key in dictionary literal}} + @("foo") : @0, // expected-warning{{duplicate key in dictionary literal}} + @"foo\0" : @0, + + @1 : @0, // expected-note + {{previous equal key is here}} + @YES : @0, // expected-warning{{duplicate key in dictionary literal}} + @'\1' : @0, // expected-warning{{duplicate key in dictionary literal}} + @1 : @0, // expected-warning{{duplicate key in dictionary literal}} + @1ul : @0, // expected-warning{{duplicate key in dictionary literal}} + @1ll : @0, // expected-warning{{duplicate key in dictionary literal}} +#ifdef __cplusplus + @true : @0, // expected-warning{{duplicate key in dictionary literal}} +#endif + @1.0 : @0, // FIXME: should warn + + @-1 : @0, // expected-note + {{previous equal key is here}} + @4294967295u : @0, // no warning + @-1ll : @0, // expected-warning{{duplicate key in dictionary literal}} + @(NO-YES) : @0, // expected-warning{{duplicate key in dictionary literal}} + }; +} + +#ifdef __cplusplus +template void variadic(Ts... ts) { + NSDictionary *nd = @{ + ts : @0 ..., + @0 : ts ... // expected-warning 2 {{duplicate key in dictionary literal}} expected-note 2 {{previous equal key is here}} + }; +} + +void call_variadic() { + variadic(@0, @1, @2); // expected-note {{in instantiation}} +} +#endif diff --git a/clang/test/SemaOpenCL/printf-format-strings.cl b/clang/test/SemaOpenCL/printf-format-strings.cl index 0cfeeb1357960..6cdfc7e60b379 100644 --- a/clang/test/SemaOpenCL/printf-format-strings.cl +++ b/clang/test/SemaOpenCL/printf-format-strings.cl @@ -65,8 +65,8 @@ kernel void format_v4f64(half4 arg_h, float4 arg_f, double4 arg_d) kernel void format_v4f16(half4 arg_h, float4 arg_f, double4 arg_d) { - printf("%v4hf\n", arg_d); // expected-warning{{format specifies type '__fp16 __attribute__((ext_vector_type(4)))' but the argument has type 'double4' (vector of 4 'double' values)}} - printf("%v4hf\n", arg_f); // expected-warning{{format specifies type '__fp16 __attribute__((ext_vector_type(4)))' but the argument has type 'float4' (vector of 4 'float' values)}} + printf("%v4hf\n", arg_d); // expected-warning{{format specifies type 'half __attribute__((ext_vector_type(4)))' but the argument has type 'double4' (vector of 4 'double' values)}} + printf("%v4hf\n", arg_f); // expected-warning{{format specifies type 'half __attribute__((ext_vector_type(4)))' but the argument has type 'float4' (vector of 4 'float' values)}} printf("%v4hf\n", arg_h); } diff --git a/clang/test/SemaOpenCLCXX/address-space-castoperators.cl b/clang/test/SemaOpenCLCXX/address-space-castoperators.cl index d61a9a72573cd..7fd7f728fda39 100644 --- a/clang/test/SemaOpenCLCXX/address-space-castoperators.cl +++ b/clang/test/SemaOpenCLCXX/address-space-castoperators.cl @@ -9,4 +9,9 @@ void nester_ptr() { gengen = static_cast(locgen); //expected-error{{static_cast from '__local int *__generic *' to '__generic int *__generic *' is not allowed}} // CHECK-NOT: AddressSpaceConversion gengen = reinterpret_cast(locgen); //expected-warning{{reinterpret_cast from '__local int *__generic *' to '__generic int *__generic *' changes address space of nested pointers}} + + gengen = const_cast(congen); //expected-error{{const_cast from '__constant int *__generic *' to '__generic int *__generic *' is not allowed}} + gengen = static_cast(congen); //expected-error{{static_cast from '__constant int *__generic *' to '__generic int *__generic *' is not allowed}} +// CHECK-NOT: AddressSpaceConversion + gengen = reinterpret_cast(congen); //expected-warning{{reinterpret_cast from '__constant int *__generic *' to '__generic int *__generic *' changes address space of nested pointers}} } diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in index daec694bd1289..4527ccadbb4ab 100644 --- a/clang/test/lit.site.cfg.py.in +++ b/clang/test/lit.site.cfg.py.in @@ -27,7 +27,7 @@ config.enable_backtrace = @ENABLE_BACKTRACES@ config.enable_experimental_new_pass_manager = @ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER@ config.enable_threads = @LLVM_ENABLE_THREADS@ config.host_arch = "@HOST_ARCH@" -config.python_executable = "@PYTHON_EXECUTABLE@" +config.python_executable = "@Python3_EXECUTABLE@" config.use_z3_solver = lit_config.params.get('USE_Z3_SOLVER', "@USE_Z3_SOLVER@") config.has_plugins = @LLVM_ENABLE_PLUGINS@ diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index e441b19f9a17a..2afc5a4eb842c 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -1295,6 +1295,14 @@ bool CursorVisitor::VisitFriendDecl(FriendDecl *D) { return false; } +bool CursorVisitor::VisitDecompositionDecl(DecompositionDecl *D) { + for (auto *B : D->bindings()) { + if (Visit(MakeCXCursor(B, TU, RegionOfInterest))) + return true; + } + return VisitVarDecl(D); +} + bool CursorVisitor::VisitDeclarationNameInfo(DeclarationNameInfo Name) { switch (Name.getName().getNameKind()) { case clang::DeclarationName::Identifier: @@ -2490,6 +2498,14 @@ void OMPClauseEnqueue::VisitOMPNontemporalClause( Visitor->AddStmt(E); } void OMPClauseEnqueue::VisitOMPOrderClause(const OMPOrderClause *C) {} +void OMPClauseEnqueue::VisitOMPUsesAllocatorsClause( + const OMPUsesAllocatorsClause *C) { + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { + const OMPUsesAllocatorsClause::Data &D = C->getAllocatorData(I); + Visitor->AddStmt(D.Allocator); + Visitor->AddStmt(D.AllocatorTraits); + } +} } // namespace void EnqueueVisitor::EnqueueChildren(const OMPClause *S) { @@ -2664,6 +2680,7 @@ void EnqueueVisitor::VisitIfStmt(const IfStmt *If) { AddStmt(If->getElse()); AddStmt(If->getThen()); AddStmt(If->getCond()); + AddStmt(If->getInit()); AddDecl(If->getConditionVariable()); } void EnqueueVisitor::VisitInitListExpr(const InitListExpr *IE) { diff --git a/clang/tools/libclang/CIndexCodeCompletion.cpp b/clang/tools/libclang/CIndexCodeCompletion.cpp index 1311f66ce0bc8..6685c892749ea 100644 --- a/clang/tools/libclang/CIndexCodeCompletion.cpp +++ b/clang/tools/libclang/CIndexCodeCompletion.cpp @@ -254,7 +254,7 @@ struct AllocatedCXCodeCompleteResults : public CXCodeCompleteResults { SmallVector Diagnostics; /// Allocated API-exposed wrappters for Diagnostics. - SmallVector DiagnosticsWrappers; + SmallVector, 8> DiagnosticsWrappers; IntrusiveRefCntPtr DiagOpts; @@ -371,7 +371,6 @@ AllocatedCXCodeCompleteResults::AllocatedCXCodeCompleteResults( } AllocatedCXCodeCompleteResults::~AllocatedCXCodeCompleteResults() { - llvm::DeleteContainerPointers(DiagnosticsWrappers); delete [] Results; for (unsigned I = 0, N = TemporaryBuffers.size(); I != N; ++I) @@ -914,10 +913,12 @@ clang_codeCompleteGetDiagnostic(CXCodeCompleteResults *ResultsIn, if (!Results || Index >= Results->Diagnostics.size()) return nullptr; - CXStoredDiagnostic *Diag = Results->DiagnosticsWrappers[Index]; + CXStoredDiagnostic *Diag = Results->DiagnosticsWrappers[Index].get(); if (!Diag) - Results->DiagnosticsWrappers[Index] = Diag = - new CXStoredDiagnostic(Results->Diagnostics[Index], Results->LangOpts); + Diag = (Results->DiagnosticsWrappers[Index] = + std::make_unique( + Results->Diagnostics[Index], Results->LangOpts)) + .get(); return Diag; } diff --git a/clang/tools/libclang/CMakeLists.txt b/clang/tools/libclang/CMakeLists.txt index 9368501592a92..f56771b8a594b 100644 --- a/clang/tools/libclang/CMakeLists.txt +++ b/clang/tools/libclang/CMakeLists.txt @@ -77,7 +77,7 @@ if(MSVC) set(LLVM_EXPORTED_SYMBOL_FILE) endif() -if(LLVM_ENABLE_PIC) +if(LLVM_ENABLE_PIC OR (WIN32 AND NOT LIBCLANG_BUILD_STATIC)) set(ENABLE_SHARED SHARED) endif() diff --git a/clang/tools/libclang/CursorVisitor.h b/clang/tools/libclang/CursorVisitor.h index 3337fecd0db39..364d9fdebdbc7 100644 --- a/clang/tools/libclang/CursorVisitor.h +++ b/clang/tools/libclang/CursorVisitor.h @@ -241,6 +241,7 @@ class CursorVisitor : public DeclVisitor, bool VisitUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *D); bool VisitStaticAssertDecl(StaticAssertDecl *D); bool VisitFriendDecl(FriendDecl *D); + bool VisitDecompositionDecl(DecompositionDecl *D); // Name visitor bool VisitDeclarationNameInfo(DeclarationNameInfo Name); diff --git a/clang/tools/libclang/Indexing.cpp b/clang/tools/libclang/Indexing.cpp index 05852f3e2bfcf..f0303fdcd825c 100644 --- a/clang/tools/libclang/Indexing.cpp +++ b/clang/tools/libclang/Indexing.cpp @@ -227,7 +227,8 @@ class ParsedSrcLocationsTracker { } bool isParsedOnceInclude(const FileEntry *FE) { - return PP.getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE); + return PP.getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) || + PP.getHeaderSearchInfo().hasFileBeenImported(FE); } }; diff --git a/clang/unittests/AST/CMakeLists.txt b/clang/unittests/AST/CMakeLists.txt index 868635b6eea56..5d9ff5b97dbd4 100644 --- a/clang/unittests/AST/CMakeLists.txt +++ b/clang/unittests/AST/CMakeLists.txt @@ -43,5 +43,9 @@ clang_target_link_libraries(ASTTests clangFrontend clangSerialization clangTooling - LLVMTestingSupport ) + +target_link_libraries(ASTTests + PRIVATE + LLVMTestingSupport +) diff --git a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp index 1c3e00ca4ae8a..4df2a9bf736ad 100644 --- a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp +++ b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp @@ -75,7 +75,7 @@ mutatedBy(const SmallVectorImpl &Results, ASTUnit *AST) { std::string removeSpace(std::string s) { s.erase(std::remove_if(s.begin(), s.end(), - [](char c) { return std::isspace(c); }), + [](char c) { return llvm::isSpace(c); }), s.end()); return s; } diff --git a/clang/unittests/Format/CleanupTest.cpp b/clang/unittests/Format/CleanupTest.cpp index 70741d239c828..9649b981d558d 100644 --- a/clang/unittests/Format/CleanupTest.cpp +++ b/clang/unittests/Format/CleanupTest.cpp @@ -479,7 +479,6 @@ TEST_F(CleanUpReplacementsTest, NoNewLineAtTheEndOfCodeMultipleInsertions) { EXPECT_EQ(Expected, apply(Code, Replaces)); } - TEST_F(CleanUpReplacementsTest, FormatCorrectLineWhenHeadersAreInserted) { std::string Code = "\n" "int x;\n" diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 69a2001cd9952..9fdf2e7284727 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -1298,6 +1298,20 @@ TEST_F(FormatTest, CaseRanges) { "}"); } +TEST_F(FormatTest, ShortEnums) { + FormatStyle Style = getLLVMStyle(); + Style.AllowShortEnumsOnASingleLine = true; + verifyFormat("enum { A, B, C } ShortEnum1, ShortEnum2;", Style); + Style.AllowShortEnumsOnASingleLine = false; + verifyFormat("enum\n" + "{\n" + " A,\n" + " B,\n" + " C\n" + "} ShortEnum1, ShortEnum2;", + Style); +} + TEST_F(FormatTest, ShortCaseLabels) { FormatStyle Style = getLLVMStyle(); Style.AllowShortCaseLabelsOnASingleLine = true; @@ -6279,7 +6293,17 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) { "void\n" "A::operator[]() {}\n" "void\n" - "A::operator!() {}\n", + "A::operator!() {}\n" + "void\n" + "A::operator**() {}\n" + "void\n" + "A::operator *() {}\n" + "void\n" + "A::operator **() {}\n" + "void\n" + "A::operator &() {}\n" + "void\n" + "A::operator void **() {}\n", Style); verifyFormat("constexpr auto\n" "operator()() const -> reference {}\n" @@ -6296,6 +6320,10 @@ TEST_F(FormatTest, ReturnTypeBreakingStyle) { "constexpr auto\n" "operator void *() const -> reference {}\n" "constexpr auto\n" + "operator void **() const -> reference {}\n" + "constexpr auto\n" + "operator void *() const -> reference {}\n" + "constexpr auto\n" "operator void &() const -> reference {}\n" "constexpr auto\n" "operator void &&() const -> reference {}\n" @@ -8381,6 +8409,17 @@ TEST_F(FormatTest, LayoutCxx11BraceInitializers) { format("vector SomeVector = { // aaa\n" " 1, 2, };")); + // C++11 brace initializer list l-braces should not be treated any differently + // when breaking before lambda bodies is enabled + FormatStyle BreakBeforeLambdaBody = getLLVMStyle(); + BreakBeforeLambdaBody.BreakBeforeBraces = FormatStyle::BS_Custom; + BreakBeforeLambdaBody.BraceWrapping.BeforeLambdaBody = true; + BreakBeforeLambdaBody.AlwaysBreakBeforeMultilineStrings = true; + verifyFormat( + "std::runtime_error{\n" + " \"Long string which will force a break onto the next line...\"};", + BreakBeforeLambdaBody); + FormatStyle ExtraSpaces = getLLVMStyle(); ExtraSpaces.Cpp11BracedListStyle = false; ExtraSpaces.ColumnLimit = 75; @@ -8487,6 +8526,20 @@ TEST_F(FormatTest, LayoutCxx11BraceInitializers) { verifyFormat("vector< int > x{ };", SpaceBetweenBraces); } +TEST_F(FormatTest, FormatSpacesInAngles) { + FormatStyle SpaceInAngles = getLLVMStyle(); + SpaceInAngles.SpacesInAngles = true; + verifyFormat("vector< ::std::string > x1;", SpaceInAngles); + verifyFormat("Foo< int, Bar > x2;", SpaceInAngles); + verifyFormat("Foo< ::int, ::Bar > x3;", SpaceInAngles); + + SpaceInAngles.SpacesInAngles = false; + verifyFormat("vector<::std::string> x4;", SpaceInAngles); + verifyFormat("vector x5;", SpaceInAngles); + verifyFormat("Foo x6;", SpaceInAngles); + verifyFormat("Foo<::int, ::Bar> x7;", SpaceInAngles); +} + TEST_F(FormatTest, FormatsBracedListsInColumnLayout) { verifyFormat("vector x = {1, 22, 333, 4444, 55555, 666666, 7777777,\n" " 1, 22, 333, 4444, 55555, 666666, 7777777,\n" @@ -12934,6 +12987,7 @@ TEST_F(FormatTest, ParsesConfigurationBools) { CHECK_PARSE_BOOL(AllowAllConstructorInitializersOnNextLine); CHECK_PARSE_BOOL(AllowAllParametersOfDeclarationOnNextLine); CHECK_PARSE_BOOL(AllowShortCaseLabelsOnASingleLine); + CHECK_PARSE_BOOL(AllowShortEnumsOnASingleLine); CHECK_PARSE_BOOL(AllowShortLoopsOnASingleLine); CHECK_PARSE_BOOL(BinPackArguments); CHECK_PARSE_BOOL(BinPackParameters); @@ -14111,6 +14165,8 @@ TEST_F(FormatTest, FormatsLambdas) { " -> int {\n" " return 1; //\n" "};"); + verifyFormat("[]() -> Void {};"); + verifyFormat("[a, b]() -> Tuple { return {}; };"); // Lambdas with explicit template argument lists. verifyFormat( @@ -15628,9 +15684,20 @@ TEST_F(FormatTest, OperatorSpacing) { Style.PointerAlignment = FormatStyle::PAS_Right; verifyFormat("Foo::operator*();", Style); verifyFormat("Foo::operator void *();", Style); + verifyFormat("Foo::operator void **();", Style); verifyFormat("Foo::operator()(void *);", Style); verifyFormat("Foo::operator*(void *);", Style); verifyFormat("Foo::operator*();", Style); + verifyFormat("Foo::operator**();", Style); + verifyFormat("Foo::operator&();", Style); + verifyFormat("Foo::operator *();", Style); + verifyFormat("Foo::operator *();", Style); + verifyFormat("Foo::operator **();", Style); + verifyFormat("Foo::operator **();", Style); + verifyFormat("Foo::operator &();", Style); + verifyFormat("Foo::operator &();", Style); + verifyFormat("Foo::operator &&();", Style); + verifyFormat("Foo::operator &&();", Style); verifyFormat("operator*(int (*)(), class Foo);", Style); verifyFormat("Foo::operator&();", Style); @@ -15641,21 +15708,39 @@ TEST_F(FormatTest, OperatorSpacing) { verifyFormat("operator&(int (&)(), class Foo);", Style); verifyFormat("Foo::operator&&();", Style); + verifyFormat("Foo::operator**();", Style); verifyFormat("Foo::operator void &&();", Style); verifyFormat("Foo::operator()(void &&);", Style); verifyFormat("Foo::operator&&(void &&);", Style); verifyFormat("Foo::operator&&();", Style); verifyFormat("operator&&(int(&&)(), class Foo);", Style); + verifyFormat("operator const nsTArrayRight &()", Style); + verifyFormat("[[nodiscard]] operator const nsTArrayRight &()", + Style); + verifyFormat("operator void **()", Style); + verifyFormat("operator const FooRight &()", Style); + verifyFormat("operator const FooRight *()", Style); + verifyFormat("operator const FooRight **()", Style); Style.PointerAlignment = FormatStyle::PAS_Left; verifyFormat("Foo::operator*();", Style); + verifyFormat("Foo::operator**();", Style); verifyFormat("Foo::operator void*();", Style); + verifyFormat("Foo::operator void**();", Style); verifyFormat("Foo::operator/*comment*/ void*();", Style); verifyFormat("Foo::operator/*a*/ const /*b*/ void*();", Style); verifyFormat("Foo::operator/*a*/ volatile /*b*/ void*();", Style); verifyFormat("Foo::operator()(void*);", Style); verifyFormat("Foo::operator*(void*);", Style); verifyFormat("Foo::operator*();", Style); + verifyFormat("Foo::operator*();", Style); + verifyFormat("Foo::operator*();", Style); + verifyFormat("Foo::operator**();", Style); + verifyFormat("Foo::operator**();", Style); + verifyFormat("Foo::operator&();", Style); + verifyFormat("Foo::operator&();", Style); + verifyFormat("Foo::operator&&();", Style); + verifyFormat("Foo::operator&&();", Style); verifyFormat("operator*(int (*)(), class Foo);", Style); verifyFormat("Foo::operator&();", Style); @@ -15677,9 +15762,17 @@ TEST_F(FormatTest, OperatorSpacing) { verifyFormat("Foo::operator&&(void&&);", Style); verifyFormat("Foo::operator&&();", Style); verifyFormat("operator&&(int(&&)(), class Foo);", Style); + verifyFormat("operator const nsTArrayLeft&()", Style); + verifyFormat("[[nodiscard]] operator const nsTArrayLeft&()", + Style); + verifyFormat("operator void**()", Style); + verifyFormat("operator const FooLeft&()", Style); + verifyFormat("operator const FooLeft*()", Style); + verifyFormat("operator const FooLeft**()", Style); // PR45107 verifyFormat("operator Vector&();", Style); + verifyFormat("operator const Vector&();", Style); verifyFormat("operator foo::Bar*();", Style); verifyFormat("operator const Foo::Bar*();", Style); verifyFormat("operator/*a*/ const /*b*/ Foo /*c*/ /*d*/ ::Bar*();", diff --git a/clang/unittests/Format/FormatTestCSharp.cpp b/clang/unittests/Format/FormatTestCSharp.cpp index d37a533c3c5d4..6f0b1966767d4 100644 --- a/clang/unittests/Format/FormatTestCSharp.cpp +++ b/clang/unittests/Format/FormatTestCSharp.cpp @@ -343,7 +343,13 @@ TEST_F(FormatTestCSharp, CSharpRegions) { } TEST_F(FormatTestCSharp, CSharpKeyWordEscaping) { - verifyFormat("public enum var { none, @string, bool, @enum }"); + verifyFormat("public enum var\n" + "{\n" + " none,\n" + " @string,\n" + " bool,\n" + " @enum\n" + "}"); } TEST_F(FormatTestCSharp, CSharpNullCoalescing) { @@ -611,6 +617,64 @@ TEST_F(FormatTestCSharp, CSharpPropertyAccessors) { public string Name { get => _name; set => _name = value; +})", + Style); + + // Examples taken from + // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties + verifyFormat(R"( +// Expression body definitions +public class SaleItem { + public decimal Price { + get => _cost; + set => _cost = value; + } +})", + Style); + + verifyFormat(R"( +// Properties with backing fields +class TimePeriod { + public double Hours { + get { return _seconds / 3600; } + set { + if (value < 0 || value > 24) + throw new ArgumentOutOfRangeException( + $"{nameof(value)} must be between 0 and 24."); + _seconds = value * 3600; + } + } +})", + Style); + + verifyFormat(R"( +// Auto-implemented properties +public class SaleItem { + public decimal Price { get; set; } +})", + Style); + + // Add column limit to wrap long lines. + Style.ColumnLimit = 100; + + // Examples with assignment to default value. + verifyFormat(R"( +// Long assignment to default value +class MyClass { + public override VeryLongNamedTypeIndeed VeryLongNamedValue { get; set } = + VeryLongNamedTypeIndeed.Create(DefaultFirstArgument, DefaultSecondArgument, + DefaultThirdArgument); +})", + Style); + + verifyFormat(R"( +// Long assignment to default value with expression body +class MyClass { + public override VeryLongNamedTypeIndeed VeryLongNamedValue { + get => veryLongNamedField; + set => veryLongNamedField = value; + } = VeryLongNamedTypeIndeed.Create(DefaultFirstArgument, DefaultSecondArgument, + DefaultThirdArgument); })", Style); } @@ -640,6 +704,10 @@ TEST_F(FormatTestCSharp, CSharpSpaces) { verifyFormat(R"(Result this[Index x] => Foo(x);)", Style); verifyFormat(R"(char[,,] rawCharArray = MakeCharacterGrid();)", Style); + verifyFormat(R"(var (key, value))", Style); + + // `&&` is not seen as a reference. + verifyFormat(R"(A == typeof(X) && someBool)", Style); // Not seen as a C-style cast. verifyFormat(R"(// @@ -696,7 +764,8 @@ TEST_F(FormatTestCSharp, CSharpGenericTypeConstraints) { verifyFormat(R"(// class ItemFactory - where T : new() {})", Style); + where T : new() {})", + Style); verifyFormat(R"(// class Dictionary diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp index 6dbc364fd255b..d5b9f8e0885ac 100644 --- a/clang/unittests/Format/FormatTestComments.cpp +++ b/clang/unittests/Format/FormatTestComments.cpp @@ -28,11 +28,7 @@ FormatStyle getGoogleStyle() { return getGoogleStyle(FormatStyle::LK_Cpp); } class FormatTestComments : public ::testing::Test { protected: - enum StatusCheck { - SC_ExpectComplete, - SC_ExpectIncomplete, - SC_DoNotCheck - }; + enum StatusCheck { SC_ExpectComplete, SC_ExpectIncomplete, SC_DoNotCheck }; std::string format(llvm::StringRef Code, const FormatStyle &Style = getLLVMStyle(), @@ -649,7 +645,8 @@ TEST_F(FormatTestComments, SplitsLongCxxComments) { "//!line 4\n" "//!line 5\n" "// line 6\n" - "//line 7", getLLVMStyleWithColumns(20))); + "//line 7", + getLLVMStyleWithColumns(20))); EXPECT_EQ("// aa bb cc dd", format("// aa bb cc dd ", @@ -1346,7 +1343,8 @@ TEST_F(FormatTestComments, KeepsTrailingPPCommentsAndSectionCommentsSeparate) { " // section comment 3\n" " i = 4;\n" "#endif\n" - "}", getLLVMStyleWithColumns(80)); + "}", + getLLVMStyleWithColumns(80)); } TEST_F(FormatTestComments, AlignsPPElseEndifComments) { @@ -1824,14 +1822,13 @@ TEST_F(FormatTestComments, ReflowsComments) { // Reflow the last two lines of a section that starts with a line having // different indentation. - EXPECT_EQ( - "// long\n" - "// long long long\n" - "// long long", - format("// long\n" - "// long long long long\n" - "// long", - getLLVMStyleWithColumns(20))); + EXPECT_EQ("// long\n" + "// long long long\n" + "// long long", + format("// long\n" + "// long long long long\n" + "// long", + getLLVMStyleWithColumns(20))); // Keep the block comment endling '*/' while reflowing. EXPECT_EQ("/* Long long long\n" @@ -1911,10 +1908,9 @@ TEST_F(FormatTestComments, ReflowsComments) { EXPECT_EQ("// long long long\n" "// long\n" "// ... --- ...", - format( - "// long long long long\n" - "// ... --- ...", - getLLVMStyleWithColumns(20))); + format("// long long long long\n" + "// ... --- ...", + getLLVMStyleWithColumns(20))); // Don't reflow lines starting with '@'. EXPECT_EQ("// long long long\n" @@ -1968,10 +1964,9 @@ TEST_F(FormatTestComments, ReflowsComments) { // characters. EXPECT_EQ("// long long long\n" "// long 'long'", - format( - "// long long long long\n" - "// 'long'", - getLLVMStyleWithColumns(20))); + format("// long long long long\n" + "// 'long'", + getLLVMStyleWithColumns(20))); // Don't reflow between separate blocks of comments. EXPECT_EQ("/* First comment\n" @@ -2420,7 +2415,7 @@ TEST_F(FormatTestComments, BlockComments) { format("int aaaaaaaaaaaaaaaaaaaaaaaaaaaa =\n" " /* line 1\n" " bbbbbbbbbbbb */ bbbbbbbbbbbbbbbbbbbbbbbbbbbb;", - getLLVMStyleWithColumns(50))); + getLLVMStyleWithColumns(50))); FormatStyle NoBinPacking = getLLVMStyle(); NoBinPacking.BinPackParameters = false; @@ -2791,15 +2786,18 @@ TEST_F(FormatTestComments, AlignsBlockCommentDecorations) { EXPECT_EQ("/*\n" " */", format("/*\n" - "*/", getLLVMStyle())); + "*/", + getLLVMStyle())); EXPECT_EQ("/*\n" " */", format("/*\n" - " */", getLLVMStyle())); + " */", + getLLVMStyle())); EXPECT_EQ("/*\n" " */", format("/*\n" - " */", getLLVMStyle())); + " */", + getLLVMStyle())); // Align a single line. EXPECT_EQ("/*\n" @@ -2854,19 +2852,22 @@ TEST_F(FormatTestComments, AlignsBlockCommentDecorations) { " */", format("/*\n" "* line\n" - "*/", getLLVMStyle())); + "*/", + getLLVMStyle())); EXPECT_EQ("/*\n" " * line\n" " */", format("/*\n" " * line\n" - " */", getLLVMStyle())); + " */", + getLLVMStyle())); EXPECT_EQ("/*\n" " * line\n" " */", format("/*\n" " * line\n" - " */", getLLVMStyle())); + " */", + getLLVMStyle())); // Align two lines. EXPECT_EQ("/* line 1\n" @@ -2947,7 +2948,8 @@ TEST_F(FormatTestComments, AlignsBlockCommentDecorations) { " * line 2\n" " * line 3\n" "* line 4\n" - "*/", getLLVMStyle())); + "*/", + getLLVMStyle())); // Align empty or blank lines. EXPECT_EQ("/**\n" @@ -2959,7 +2961,8 @@ TEST_F(FormatTestComments, AlignsBlockCommentDecorations) { "* \n" " * \n" " *\n" - "*/", getLLVMStyle())); + "*/", + getLLVMStyle())); // Align while breaking and reflowing. EXPECT_EQ("/*\n" @@ -3096,7 +3099,7 @@ TEST_F(FormatTestComments, BreaksBeforeTrailingUnbreakableSequence) { } TEST_F(FormatTestComments, ReflowBackslashCrash) { -// clang-format off + // clang-format off EXPECT_EQ( "// How to run:\n" "// bbbbb run \\\n" @@ -3107,7 +3110,7 @@ TEST_F(FormatTestComments, ReflowBackslashCrash) { "// bbbbb run \\\n" "// rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr \\\n" "// -- --output_directory=\"\"")); -// clang-format on + // clang-format on } TEST_F(FormatTestComments, IndentsLongJavadocAnnotatedLines) { @@ -3143,18 +3146,18 @@ TEST_F(FormatTestComments, IndentsLongJavadocAnnotatedLines) { "long long long long long long long long long long long\n" " */\n", Style)); - EXPECT_EQ( - "/**\n" - " * Sentence that\n" - " * should be broken.\n" - " * @param short\n" - " * keep indentation\n" - " */\n", format( - "/**\n" - " * Sentence that should be broken.\n" - " * @param short\n" - " * keep indentation\n" - " */\n", Style20)); + EXPECT_EQ("/**\n" + " * Sentence that\n" + " * should be broken.\n" + " * @param short\n" + " * keep indentation\n" + " */\n", + format("/**\n" + " * Sentence that should be broken.\n" + " * @param short\n" + " * keep indentation\n" + " */\n", + Style20)); EXPECT_EQ("/**\n" " * @param l1 long1\n" diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp index eadea35f051a7..07e25e8e1a799 100644 --- a/clang/unittests/Format/FormatTestJS.cpp +++ b/clang/unittests/Format/FormatTestJS.cpp @@ -48,15 +48,13 @@ class FormatTestJS : public ::testing::Test { static void verifyFormat( llvm::StringRef Code, const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_JavaScript)) { - EXPECT_EQ(Code.str(), format(Code, Style)) - << "Expected code is not stable"; + EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable"; std::string Result = format(test::messUp(Code), Style); EXPECT_EQ(Code.str(), Result) << "Formatted:\n" << Result; } static void verifyFormat( - llvm::StringRef Expected, - llvm::StringRef Code, + llvm::StringRef Expected, llvm::StringRef Code, const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_JavaScript)) { EXPECT_EQ(Expected.str(), format(Expected, Style)) << "Expected code is not stable"; @@ -115,7 +113,8 @@ TEST_F(FormatTestJS, JSDocComments) { " * jsdoc line 1\n" " */", format("/** jsdoc line 1\n" - " */", getGoogleJSStyleWithColumns(20))); + " */", + getGoogleJSStyleWithColumns(20))); // Don't break the first line of a short single line jsdoc comment. EXPECT_EQ("/** jsdoc line 1 */", format("/** jsdoc line 1 */", getGoogleJSStyleWithColumns(20))); @@ -185,8 +184,7 @@ TEST_F(FormatTestJS, JSDocComments) { // Don't break the first line of a single line short jsdoc comment pragma. EXPECT_EQ("/** @returns j */", - format("/** @returns j */", - getGoogleJSStyleWithColumns(20))); + format("/** @returns j */", getGoogleJSStyleWithColumns(20))); // Break a single line long jsdoc comment pragma. EXPECT_EQ("/**\n" @@ -346,18 +344,17 @@ TEST_F(FormatTestJS, ReservedWords) { } TEST_F(FormatTestJS, ReservedWordsMethods) { - verifyFormat( - "class X {\n" - " delete() {\n" - " x();\n" - " }\n" - " interface() {\n" - " x();\n" - " }\n" - " let() {\n" - " x();\n" - " }\n" - "}\n"); + verifyFormat("class X {\n" + " delete() {\n" + " x();\n" + " }\n" + " interface() {\n" + " x();\n" + " }\n" + " let() {\n" + " x();\n" + " }\n" + "}\n"); verifyFormat("class KeywordNamedMethods {\n" " do() {\n" " }\n" @@ -638,25 +635,27 @@ TEST_F(FormatTestJS, FormatsNamespaces) { TEST_F(FormatTestJS, NamespacesMayNotWrap) { verifyFormat("declare namespace foobarbaz {\n" - "}\n", getGoogleJSStyleWithColumns(18)); + "}\n", + getGoogleJSStyleWithColumns(18)); verifyFormat("declare module foobarbaz {\n" - "}\n", getGoogleJSStyleWithColumns(15)); + "}\n", + getGoogleJSStyleWithColumns(15)); verifyFormat("namespace foobarbaz {\n" - "}\n", getGoogleJSStyleWithColumns(10)); + "}\n", + getGoogleJSStyleWithColumns(10)); verifyFormat("module foobarbaz {\n" - "}\n", getGoogleJSStyleWithColumns(7)); + "}\n", + getGoogleJSStyleWithColumns(7)); } TEST_F(FormatTestJS, AmbientDeclarations) { FormatStyle NineCols = getGoogleJSStyleWithColumns(9); - verifyFormat( - "declare class\n" - " X {}", - NineCols); - verifyFormat( - "declare function\n" - "x();", // TODO(martinprobst): should ideally be indented. - NineCols); + verifyFormat("declare class\n" + " X {}", + NineCols); + verifyFormat("declare function\n" + "x();", // TODO(martinprobst): should ideally be indented. + NineCols); verifyFormat("declare function foo();\n" "let x = 1;\n"); verifyFormat("declare function foo(): string;\n" @@ -667,14 +666,12 @@ TEST_F(FormatTestJS, AmbientDeclarations) { "let x = 1;\n"); verifyFormat("declare interface Y {}\n" "let x = 1;\n"); - verifyFormat( - "declare enum X {\n" - "}", - NineCols); - verifyFormat( - "declare let\n" - " x: number;", - NineCols); + verifyFormat("declare enum X {\n" + "}", + NineCols); + verifyFormat("declare let\n" + " x: number;", + NineCols); } TEST_F(FormatTestJS, FormatsFreestandingFunctions) { @@ -1012,7 +1009,6 @@ TEST_F(FormatTestJS, FunctionLiterals) { " a: function() { return 1; }\n" "});", Style); - } TEST_F(FormatTestJS, DontWrapEmptyLiterals) { @@ -1206,15 +1202,13 @@ TEST_F(FormatTestJS, ArrowFunctionStyle) { " x;\n" "};", Style); - verifyFormat("const arrInlineEmpty = () => {};", - Style); + verifyFormat("const arrInlineEmpty = () => {};", Style); Style.AllowShortLambdasOnASingleLine = FormatStyle::SLS_Inline; verifyFormat("const arr = () => {\n" " x;\n" "};", Style); - verifyFormat("foo(() => {});", - Style); + verifyFormat("foo(() => {});", Style); verifyFormat("const arrInlineInline = () => {};", Style); } @@ -1359,43 +1353,36 @@ TEST_F(FormatTestJS, AutomaticSemicolonInsertionHeuristic) { "a = null\n" " return 1"); // Below "class Y {}" should ideally be on its own line. - verifyFormat( - "x = {\n" - " a: 1\n" - "} class Y {}", - " x = {a : 1}\n" - " class Y { }"); - verifyFormat( - "if (x) {\n" - "}\n" - "return 1", - "if (x) {}\n" - " return 1"); - verifyFormat( - "if (x) {\n" - "}\n" - "class X {}", - "if (x) {}\n" - " class X {}"); + verifyFormat("x = {\n" + " a: 1\n" + "} class Y {}", + " x = {a : 1}\n" + " class Y { }"); + verifyFormat("if (x) {\n" + "}\n" + "return 1", + "if (x) {}\n" + " return 1"); + verifyFormat("if (x) {\n" + "}\n" + "class X {}", + "if (x) {}\n" + " class X {}"); } TEST_F(FormatTestJS, ImportExportASI) { - verifyFormat( - "import {x} from 'y'\n" - "export function z() {}", - "import {x} from 'y'\n" - " export function z() {}"); + verifyFormat("import {x} from 'y'\n" + "export function z() {}", + "import {x} from 'y'\n" + " export function z() {}"); // Below "class Y {}" should ideally be on its own line. - verifyFormat( - "export {x} class Y {}", - " export {x}\n" - " class Y {\n}"); - verifyFormat( - "if (x) {\n" - "}\n" - "export class Y {}", - "if ( x ) { }\n" - " export class Y {}"); + verifyFormat("export {x} class Y {}", " export {x}\n" + " class Y {\n}"); + verifyFormat("if (x) {\n" + "}\n" + "export class Y {}", + "if ( x ) { }\n" + " export class Y {}"); } TEST_F(FormatTestJS, ClosureStyleCasts) { @@ -1576,8 +1563,9 @@ TEST_F(FormatTestJS, TypeAnnotations) { verifyFormat( "var someValue = (v as aaaaaaaaaaaaaaaaaaaa[])\n" " .someFunction(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa);"); - verifyFormat("const xIsALongIdent:\n"" YJustBarelyFitsLinex[];", - getGoogleJSStyleWithColumns(20)); + verifyFormat("const xIsALongIdent:\n" + " YJustBarelyFitsLinex[];", + getGoogleJSStyleWithColumns(20)); verifyFormat("const x = {\n" " y: 1\n" "} as const;"); @@ -1630,10 +1618,9 @@ TEST_F(FormatTestJS, ClassDeclarations) { verifyFormat("class C {\n static x(): string {\n return 'asd';\n }\n}"); verifyFormat("class C extends P implements I {}"); verifyFormat("class C extends p.P implements i.I {}"); - verifyFormat( - "x(class {\n" - " a(): A {}\n" - "});"); + verifyFormat("x(class {\n" + " a(): A {}\n" + "});"); verifyFormat("class Test {\n" " aaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaa: aaaaaaaaaaaaaaaaaaaa):\n" " aaaaaaaaaaaaaaaaaaaaaa {}\n" @@ -1765,14 +1752,12 @@ TEST_F(FormatTestJS, TypeInterfaceLineWrapping) { const FormatStyle &Style = getGoogleJSStyleWithColumns(20); verifyFormat("type LongTypeIsReallyUnreasonablyLong =\n" " string;\n", - "type LongTypeIsReallyUnreasonablyLong = string;\n", + "type LongTypeIsReallyUnreasonablyLong = string;\n", Style); + verifyFormat("interface AbstractStrategyFactoryProvider {\n" + " a: number\n" + "}\n", + "interface AbstractStrategyFactoryProvider { a: number }\n", Style); - verifyFormat( - "interface AbstractStrategyFactoryProvider {\n" - " a: number\n" - "}\n", - "interface AbstractStrategyFactoryProvider { a: number }\n", - Style); } TEST_F(FormatTestJS, RemoveEmptyLinesInArrowFunctions) { @@ -1912,11 +1897,11 @@ TEST_F(FormatTestJS, ImportWrapping) { TEST_F(FormatTestJS, TemplateStrings) { // Keeps any whitespace/indentation within the template string. verifyFormat("var x = `hello\n" - " ${name}\n" - " !`;", - "var x = `hello\n" - " ${ name }\n" - " !`;"); + " ${name}\n" + " !`;", + "var x = `hello\n" + " ${ name }\n" + " !`;"); verifyFormat("var x =\n" " `hello ${world}` >= some();", @@ -1926,18 +1911,18 @@ TEST_F(FormatTestJS, TemplateStrings) { verifyFormat("var x = `hellö ${wörld}` >= söme();", getGoogleJSStyleWithColumns(35)); // Fits due to UTF-8. verifyFormat("var x = `hello\n" - " ${world}` >=\n" - " some();", - "var x =\n" - " `hello\n" - " ${world}` >= some();", - getGoogleJSStyleWithColumns(21)); // Barely doesn't fit. + " ${world}` >=\n" + " some();", + "var x =\n" + " `hello\n" + " ${world}` >= some();", + getGoogleJSStyleWithColumns(21)); // Barely doesn't fit. verifyFormat("var x = `hello\n" - " ${world}` >= some();", - "var x =\n" - " `hello\n" - " ${world}` >= some();", - getGoogleJSStyleWithColumns(22)); // Barely fits. + " ${world}` >= some();", + "var x =\n" + " `hello\n" + " ${world}` >= some();", + getGoogleJSStyleWithColumns(22)); // Barely fits. verifyFormat("var x =\n" " `h`;", @@ -1956,18 +1941,17 @@ TEST_F(FormatTestJS, TemplateStrings) { // Make sure template strings get a proper ColumnWidth assigned, even if they // are first token in line. - verifyFormat( - "var a = aaaaaaaaaaaaaaaaaaaaaaaaaaaa ||\n" - " `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`;"); + verifyFormat("var a = aaaaaaaaaaaaaaaaaaaaaaaaaaaa ||\n" + " `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`;"); // Two template strings. verifyFormat("var x = `hello` == `hello`;"); // Comments in template strings. verifyFormat("var x = `//a`;\n" - "var y;", - "var x =\n `//a`;\n" - "var y ;"); + "var y;", + "var x =\n `//a`;\n" + "var y ;"); verifyFormat("var x = `/*a`;\n" "var y;", "var x =\n `/*a`;\n" @@ -2207,16 +2191,15 @@ TEST_F(FormatTestJS, JSDocAnnotations) { " * @see http://very/very/long/url/is/long\n" " */", getGoogleJSStyleWithColumns(20)); - verifyFormat( - "/**\n" - " * @param This is a\n" - " * long comment\n" - " * but no type\n" - " */", - "/**\n" - " * @param This is a long comment but no type\n" - " */", - getGoogleJSStyleWithColumns(20)); + verifyFormat("/**\n" + " * @param This is a\n" + " * long comment\n" + " * but no type\n" + " */", + "/**\n" + " * @param This is a long comment but no type\n" + " */", + getGoogleJSStyleWithColumns(20)); // Break and reindent @param line and reflow unrelated lines. EXPECT_EQ("{\n" " /**\n" @@ -2277,12 +2260,11 @@ TEST_F(FormatTestJS, RequoteStringsSingle) { verifyFormat("var x = 'foo';", "var x = \"foo\";"); verifyFormat("var x = 'fo\\'o\\'';", "var x = \"fo'o'\";"); verifyFormat("var x = 'fo\\'o\\'';", "var x = \"fo\\'o'\";"); - verifyFormat( - "var x =\n" - " 'foo\\'';", - // Code below is 15 chars wide, doesn't fit into the line with the - // \ escape added. - "var x = \"foo'\";", getGoogleJSStyleWithColumns(15)); + verifyFormat("var x =\n" + " 'foo\\'';", + // Code below is 15 chars wide, doesn't fit into the line with + // the \ escape added. + "var x = \"foo'\";", getGoogleJSStyleWithColumns(15)); // Removes no-longer needed \ escape from ". verifyFormat("var x = 'fo\"o';", "var x = \"fo\\\"o\";"); // Code below fits into 15 chars *after* removing the \ escape. @@ -2343,18 +2325,16 @@ TEST_F(FormatTestJS, NonNullAssertionOperator) { verifyFormat("let x = foo[0]!;\n"); verifyFormat("let x = (foo)!;\n"); verifyFormat("let x = x(foo!);\n"); - verifyFormat( - "a.aaaaaa(a.a!).then(\n" - " x => x(x));\n", - getGoogleJSStyleWithColumns(20)); + verifyFormat("a.aaaaaa(a.a!).then(\n" + " x => x(x));\n", + getGoogleJSStyleWithColumns(20)); verifyFormat("let x = foo! - 1;\n"); verifyFormat("let x = {foo: 1}!;\n"); - verifyFormat( - "let x = hello.foo()!\n" - " .foo()!\n" - " .foo()!\n" - " .foo()!;\n", - getGoogleJSStyleWithColumns(20)); + verifyFormat("let x = hello.foo()!\n" + " .foo()!\n" + " .foo()!\n" + " .foo()!;\n", + getGoogleJSStyleWithColumns(20)); verifyFormat("let x = namespace!;\n"); verifyFormat("return !!x;\n"); } @@ -2447,7 +2427,8 @@ TEST_F(FormatTestJS, ImportComments) { verifyFormat("import {x} from 'x'; // from some location", getGoogleJSStyleWithColumns(25)); verifyFormat("// taze: x from 'location'", getGoogleJSStyleWithColumns(10)); - verifyFormat("/// ", getGoogleJSStyleWithColumns(10)); + verifyFormat("/// ", + getGoogleJSStyleWithColumns(10)); } TEST_F(FormatTestJS, Exponentiation) { @@ -2462,36 +2443,42 @@ TEST_F(FormatTestJS, NestedLiterals) { " [\n" " 1,\n" " ],\n" - "];", FourSpaces); + "];", + FourSpaces); verifyFormat("var l = [\n" " {\n" " 1: 1,\n" " },\n" - "];", FourSpaces); + "];", + FourSpaces); verifyFormat("someFunction(\n" " p1,\n" " [\n" " 1,\n" " ],\n" - ");", FourSpaces); + ");", + FourSpaces); verifyFormat("someFunction(\n" " p1,\n" " {\n" " 1: 1,\n" " },\n" - ");", FourSpaces); + ");", + FourSpaces); verifyFormat("var o = {\n" " 1: 1,\n" " 2: {\n" " 3: 3,\n" " },\n" - "};", FourSpaces); + "};", + FourSpaces); verifyFormat("var o = {\n" " 1: 1,\n" " 2: [\n" " 3,\n" " ],\n" - "};", FourSpaces); + "};", + FourSpaces); } TEST_F(FormatTestJS, BackslashesInComments) { diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp index 5e73e4b4ea4e1..9a18c2853abc7 100644 --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -337,12 +337,12 @@ TEST_F(FormatTestJava, Annotations) { "List list;"); verifyFormat( - "@Test\n" - "@Feature({\"Android-TabSwitcher\"})\n" - "@CommandLineFlags.Add({ChromeSwitches.DISABLE_FIRST_RUN_EXPERIENCE})\n" - "@Features.EnableFeatures({FEATURE})\n" - "public void test(@Foo.bar(\"baz\") @Quux.Qoob int theFirstParaaaaam,\n" - " @Foo.bar(\"baz\") @Quux.Qoob int theSecondParaaaaaaaaaaaaaaaam) {}"); + "@Test\n" + "@Feature({\"Android-TabSwitcher\"})\n" + "@CommandLineFlags.Add({ChromeSwitches.DISABLE_FIRST_RUN_EXPERIENCE})\n" + "@Features.EnableFeatures({FEATURE})\n" + "public void test(@Foo.bar(\"baz\") @Quux.Qoob int theFirstParaaaaam,\n" + " @Foo.bar(\"baz\") @Quux.Qoob int theSecondParaaaaaaaaaaaaaaaam) {}"); } TEST_F(FormatTestJava, Generics) { @@ -460,19 +460,18 @@ TEST_F(FormatTestJava, MethodDeclarations) { } TEST_F(FormatTestJava, MethodReference) { - EXPECT_EQ( - "private void foo() {\n" - " f(this::methodReference);\n" - " f(C.super::methodReference);\n" - " Consumer c = System.out::println;\n" - " Iface mRef = Ty::meth;\n" - "}", - format("private void foo() {\n" - " f(this ::methodReference);\n" - " f(C.super ::methodReference);\n" - " Consumer c = System.out ::println;\n" - " Iface mRef = Ty :: meth;\n" - "}")); + EXPECT_EQ("private void foo() {\n" + " f(this::methodReference);\n" + " f(C.super::methodReference);\n" + " Consumer c = System.out::println;\n" + " Iface mRef = Ty::meth;\n" + "}", + format("private void foo() {\n" + " f(this ::methodReference);\n" + " f(C.super ::methodReference);\n" + " Consumer c = System.out ::println;\n" + " Iface mRef = Ty :: meth;\n" + "}")); } TEST_F(FormatTestJava, CppKeywords) { @@ -575,16 +574,15 @@ TEST_F(FormatTestJava, KeepsDelimitersOnOwnLineInJavaDocComments) { } TEST_F(FormatTestJava, RetainsLogicalShifts) { - verifyFormat("void f() {\n" - " int a = 1;\n" - " a >>>= 1;\n" - "}"); - verifyFormat("void f() {\n" - " int a = 1;\n" - " a = a >>> 1;\n" - "}"); + verifyFormat("void f() {\n" + " int a = 1;\n" + " a >>>= 1;\n" + "}"); + verifyFormat("void f() {\n" + " int a = 1;\n" + " a = a >>> 1;\n" + "}"); } - -} // end namespace tooling +} // namespace format } // end namespace clang diff --git a/clang/unittests/Format/FormatTestObjC.cpp b/clang/unittests/Format/FormatTestObjC.cpp index 42e2a80783be6..d73d090a8ba37 100644 --- a/clang/unittests/Format/FormatTestObjC.cpp +++ b/clang/unittests/Format/FormatTestObjC.cpp @@ -31,11 +31,7 @@ class FormatTestObjC : public ::testing::Test { Style.Language = FormatStyle::LK_ObjC; } - enum StatusCheck { - SC_ExpectComplete, - SC_ExpectIncomplete, - SC_DoNotCheck - }; + enum StatusCheck { SC_ExpectComplete, SC_ExpectIncomplete, SC_DoNotCheck }; std::string format(llvm::StringRef Code, StatusCheck CheckComplete = SC_ExpectComplete) { @@ -69,24 +65,28 @@ class FormatTestObjC : public ::testing::Test { }; TEST(FormatTestObjCStyle, DetectsObjCInHeaders) { - auto Style = getStyle("LLVM", "a.h", "none", "@interface\n" - "- (id)init;"); + auto Style = getStyle("LLVM", "a.h", "none", + "@interface\n" + "- (id)init;"); ASSERT_TRUE((bool)Style); EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); - Style = getStyle("LLVM", "a.h", "none", "@interface\n" - "+ (id)init;"); + Style = getStyle("LLVM", "a.h", "none", + "@interface\n" + "+ (id)init;"); ASSERT_TRUE((bool)Style); EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); - Style = getStyle("LLVM", "a.h", "none", "@interface\n" - "@end\n" - "//comment"); + Style = getStyle("LLVM", "a.h", "none", + "@interface\n" + "@end\n" + "//comment"); ASSERT_TRUE((bool)Style); EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); - Style = getStyle("LLVM", "a.h", "none", "@interface\n" - "@end //comment"); + Style = getStyle("LLVM", "a.h", "none", + "@interface\n" + "@end //comment"); ASSERT_TRUE((bool)Style); EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); @@ -113,13 +113,12 @@ TEST(FormatTestObjCStyle, DetectsObjCInHeaders) { ASSERT_TRUE((bool)Style); EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); - Style = - getStyle("{}", "a.h", "none", "typedef NS_ENUM(int, Foo) {};\n"); + Style = getStyle("{}", "a.h", "none", "typedef NS_ENUM(int, Foo) {};\n"); ASSERT_TRUE((bool)Style); EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); - Style = getStyle("{}", "a.h", "none", - "typedef NS_CLOSED_ENUM(int, Foo) {};\n"); + Style = + getStyle("{}", "a.h", "none", "typedef NS_CLOSED_ENUM(int, Foo) {};\n"); ASSERT_TRUE((bool)Style); EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); @@ -603,7 +602,7 @@ TEST_F(FormatTestObjC, FormatObjCMethodDeclarations) { " bbb:(d)cccc;"); verifyFormat("- (void)drawRectOn:(id)surface ofSize:(aaa)height:(bbb)width;"); - // BraceWrapping AfterFunction is respected for ObjC methods + // BraceWrapping AfterFunction is respected for ObjC methods Style = getGoogleStyle(FormatStyle::LK_ObjC); Style.BreakBeforeBraces = FormatStyle::BS_Custom; Style.BraceWrapping.AfterFunction = true; @@ -946,14 +945,16 @@ TEST_F(FormatTestObjC, FormatObjCMethodExpr) { verifyFormat("[self performSelector:@selector(loadAccessories)\n" " withObjectOnMainThread:nil\n" " waitUntilDone:false];"); - verifyFormat("[aaaaaaaaaaaaaaaaaaaaaaaaa\n" - " performSelectorOnMainThread:@selector(loadAccessories)\n" - " withObject:nil\n" - " waitUntilDone:false];"); - verifyFormat("[self // force wrapping\n" - " performSelectorOnMainThread:@selector(loadAccessories)\n" - " withObject:nil\n" - " waitUntilDone:false];"); + verifyFormat( + "[aaaaaaaaaaaaaaaaaaaaaaaaa\n" + " performSelectorOnMainThread:@selector(loadAccessories)\n" + " withObject:nil\n" + " waitUntilDone:false];"); + verifyFormat( + "[self // force wrapping\n" + " performSelectorOnMainThread:@selector(loadAccessories)\n" + " withObject:nil\n" + " waitUntilDone:false];"); } TEST_F(FormatTestObjC, ObjCAt) { @@ -1374,7 +1375,7 @@ TEST_F(FormatTestObjC, DisambiguatesCallsFromCppLambdas) { // verifyFormat("x = ([a foo:bar] >> b->c == 'd');"); } -TEST_F(FormatTestObjC, DisambiguatesCallsFromStructuredBindings) { +TEST_F(FormatTestObjC, DisambiguatesCallsFromStructuredBindings) { verifyFormat("int f() {\n" " if (a && [f arg])\n" " return 0;\n" diff --git a/clang/unittests/Format/FormatTestProto.cpp b/clang/unittests/Format/FormatTestProto.cpp index 4258c169e884e..1ff7c22a6df78 100644 --- a/clang/unittests/Format/FormatTestProto.cpp +++ b/clang/unittests/Format/FormatTestProto.cpp @@ -667,6 +667,5 @@ TEST_F(FormatTestProto, PreventBreaksBetweenKeyAndSubmessages) { "}"); } - -} // end namespace tooling +} // namespace format } // end namespace clang diff --git a/clang/unittests/Format/FormatTestRawStrings.cpp b/clang/unittests/Format/FormatTestRawStrings.cpp index dc2f6b51807bd..31490355468e4 100644 --- a/clang/unittests/Format/FormatTestRawStrings.cpp +++ b/clang/unittests/Format/FormatTestRawStrings.cpp @@ -140,7 +140,9 @@ TEST_F(FormatTestRawStrings, UsesConfigurationOverBaseStyle) { FormatStyle Style = getGoogleStyle(FormatStyle::LK_Cpp); EXPECT_EQ(0, parseConfiguration("---\n" "Language: Cpp\n" - "BasedOnStyle: Google", &Style).value()); + "BasedOnStyle: Google", + &Style) + .value()); Style.RawStringFormats = {{ FormatStyle::LK_Cpp, {"cpp"}, @@ -178,26 +180,18 @@ t = R"pb(item: 1)pb";)test", } TEST_F(FormatTestRawStrings, ReformatsShortRawStringsOnSingleLine) { - expect_eq( - R"test(P p = TP(R"pb()pb");)test", - format( - R"test(P p = TP(R"pb( )pb");)test", - getRawStringPbStyleWithColumns(40))); - expect_eq( - R"test(P p = TP(R"pb(item_1: 1)pb");)test", - format( - R"test(P p = TP(R"pb(item_1:1)pb");)test", - getRawStringPbStyleWithColumns(40))); - expect_eq( - R"test(P p = TP(R"pb(item_1: 1)pb");)test", - format( - R"test(P p = TP(R"pb( item_1 : 1 )pb");)test", - getRawStringPbStyleWithColumns(40))); - expect_eq( - R"test(P p = TP(R"pb(item_1: 1 item_2: 2)pb");)test", - format( - R"test(P p = TP(R"pb(item_1:1 item_2:2)pb");)test", - getRawStringPbStyleWithColumns(40))); + expect_eq(R"test(P p = TP(R"pb()pb");)test", + format(R"test(P p = TP(R"pb( )pb");)test", + getRawStringPbStyleWithColumns(40))); + expect_eq(R"test(P p = TP(R"pb(item_1: 1)pb");)test", + format(R"test(P p = TP(R"pb(item_1:1)pb");)test", + getRawStringPbStyleWithColumns(40))); + expect_eq(R"test(P p = TP(R"pb(item_1: 1)pb");)test", + format(R"test(P p = TP(R"pb( item_1 : 1 )pb");)test", + getRawStringPbStyleWithColumns(40))); + expect_eq(R"test(P p = TP(R"pb(item_1: 1 item_2: 2)pb");)test", + format(R"test(P p = TP(R"pb(item_1:1 item_2:2)pb");)test", + getRawStringPbStyleWithColumns(40))); // Merge two short lines into one. expect_eq(R"test( std::string s = R"pb( @@ -219,10 +213,10 @@ TEST_F(FormatTestRawStrings, BreaksShortRawStringsWhenNeeded) { expect_eq(R"test( P p = TP(R"pb(item_1 < 1 > item_2: { 2 })pb");)test", - format( - R"test( + format( + R"test( P p = TP(R"pb(item_1<1> item_2:{2})pb");)test", - getRawStringPbStyleWithColumns(40))); + getRawStringPbStyleWithColumns(40))); } TEST_F(FormatTestRawStrings, BreaksRawStringsExceedingColumnLimit) { @@ -247,9 +241,9 @@ P p = TPPPPPPPPPPPPPPP(R"pb(item_1: 1, item_2: 2, item_3: 3)pb");)test", P p = TP(R"pb(item_1 < 1 > item_2: < 2 > item_3 {})pb");)test", - format(R"test( + format(R"test( P p = TP(R"pb(item_1<1> item_2:<2> item_3{ })pb");)test", - getRawStringPbStyleWithColumns(40))); + getRawStringPbStyleWithColumns(40))); expect_eq( R"test( @@ -552,7 +546,6 @@ ASSERT_TRUE( format(R"test( ASSERT_TRUE(ParseFromString(R"pb(item_1: 1 item_2: 2)pb"), ptr);)test", getRawStringPbStyleWithColumns(40))); - } TEST_F(FormatTestRawStrings, RawStringsInOperands) { @@ -683,7 +676,6 @@ auto S = auto S=(count<3)?R"pb(item_1:1)pb":R"pb(item_2:2,item_3:3)pb"; )test", getRawStringPbStyleWithColumns(40))); - } TEST_F(FormatTestRawStrings, PrefixAndSuffixAlignment) { @@ -770,8 +762,7 @@ TEST_F(FormatTestRawStrings, DontFormatNonRawStrings) { TEST_F(FormatTestRawStrings, FormatsRawStringsWithEnclosingFunctionName) { FormatStyle Style = getRawStringPbStyleWithColumns(40); - Style.RawStringFormats[0].EnclosingFunctions.push_back( - "PARSE_TEXT_PROTO"); + Style.RawStringFormats[0].EnclosingFunctions.push_back("PARSE_TEXT_PROTO"); Style.RawStringFormats[0].EnclosingFunctions.push_back("ParseTextProto"); expect_eq(R"test(a = PARSE_TEXT_PROTO(R"(key: value)");)test", format(R"test(a = PARSE_TEXT_PROTO(R"(key:value)");)test", Style)); @@ -816,7 +807,8 @@ xxxxxxxaaaaax wwwwwww = _Verxrrrrrrrr(PARSE_TEXT_PROTO(R"pb( xxxxxxxaaaaax wwwwwww = _Verxrrrrrrrr(PARSE_TEXT_PROTO(R"pb( Category: aaaaaaaaaaaaaaaaaaaaaaaaaa )pb")); -)test", Style)); +)test", + Style)); // The '(' in R"pb is at column 61, break. expect_eq(R"test( xxxxxxxaaaaax wwwwwww = @@ -828,14 +820,15 @@ xxxxxxxaaaaax wwwwwww = xxxxxxxaaaaax wwwwwww = _Verxrrrrrrrrr(PARSE_TEXT_PROTO(R"pb( Category: aaaaaaaaaaaaaaaaaaaaaaaaaa )pb")); -)test", Style)); +)test", + Style)); } TEST_F(FormatTestRawStrings, KeepsRBraceFolloedByMoreLBracesOnSameLine) { FormatStyle Style = getRawStringPbStyleWithColumns(80); expect_eq( - R"test( + R"test( int f() { if (1) { TTTTTTTTTTTTTTTTTTTTT s = PARSE_TEXT_PROTO(R"pb( @@ -849,8 +842,8 @@ int f() { } } )test", - format( - R"test( + format( + R"test( int f() { if (1) { TTTTTTTTTTTTTTTTTTTTT s = PARSE_TEXT_PROTO(R"pb( @@ -862,14 +855,13 @@ int f() { } } )test", - Style)); + Style)); } TEST_F(FormatTestRawStrings, DoNotFormatUnrecognizedDelimitersInRecognizedFunctions) { FormatStyle Style = getRawStringPbStyleWithColumns(60); - Style.RawStringFormats[0].EnclosingFunctions.push_back( - "EqualsProto"); + Style.RawStringFormats[0].EnclosingFunctions.push_back("EqualsProto"); // EqualsProto is a recognized function, but the Raw delimiter is // unrecognized. Do not touch the string in this case, since it might be // special. @@ -933,7 +925,7 @@ int f() { } )test", Style)); - + expect_eq(R"test( int f() { int a = g(x, @@ -978,7 +970,8 @@ int f() { int f() { int a = g(R"pb(key: 1)pb", 2); -})test", Style)); +})test", + Style)); } TEST_F(FormatTestRawStrings, IndentsLastParamAfterNewline) { diff --git a/clang/unittests/Format/FormatTestSelective.cpp b/clang/unittests/Format/FormatTestSelective.cpp index f031a3dee54a2..c88d1b8bd8ba2 100644 --- a/clang/unittests/Format/FormatTestSelective.cpp +++ b/clang/unittests/Format/FormatTestSelective.cpp @@ -122,7 +122,8 @@ TEST_F(FormatTestSelective, FormatsCommentsLocally) { "int b;", format("int a; // comment\n" "// comment 2\n" - "int b;", 28, 0)); + "int b;", + 28, 0)); EXPECT_EQ("int aaaaaa; // comment\n" "int b;\n" "int c; // unrelated comment", @@ -585,14 +586,13 @@ TEST_F(FormatTestSelective, StopFormattingWhenLeavingScope) { TEST_F(FormatTestSelective, SelectivelyRequoteJavaScript) { Style = getGoogleStyle(FormatStyle::LK_JavaScript); - EXPECT_EQ( - "var x = \"a\";\n" - "var x = 'a';\n" - "var x = \"a\";", - format("var x = \"a\";\n" - "var x = \"a\";\n" - "var x = \"a\";", - 20, 0)); + EXPECT_EQ("var x = \"a\";\n" + "var x = 'a';\n" + "var x = \"a\";", + format("var x = \"a\";\n" + "var x = \"a\";\n" + "var x = \"a\";", + 20, 0)); } TEST_F(FormatTestSelective, KeepsIndentAfterCommentSectionImport) { diff --git a/clang/unittests/Format/FormatTestTextProto.cpp b/clang/unittests/Format/FormatTestTextProto.cpp index dba81fcd3aabf..3ae13d1728657 100644 --- a/clang/unittests/Format/FormatTestTextProto.cpp +++ b/clang/unittests/Format/FormatTestTextProto.cpp @@ -199,7 +199,7 @@ TEST_F(FormatTestTextProto, SupportsAngleBracketMessageFields) { "field_c: {}"); verifyFormat("field_a < field_b: 1 >,\n" - "msg_fid: < fiel_b: 123 >,\n" + "msg_fid: < fiel_b: 123 >,\n" "field_c <>"); verifyFormat("field_a < field_b: 1 >\n" @@ -450,14 +450,16 @@ TEST_F(FormatTestTextProto, FormatsExtensions) { " bbbbbbbbbbbbbb] { key: value }"); // These go over the column limit intentionally, since the alternative // [aa..a\n] is worse. - verifyFormat("[aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa] {\n" - " key: value\n" - "}"); - verifyFormat("[aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa] {\n" - " [type.type] {\n" - " keyyyyyyyyyyyyyy: valuuuuuuuuuuuuuuuuuuuuuuuuue\n" - " }\n" - "}"); + verifyFormat( + "[aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa] {\n" + " key: value\n" + "}"); + verifyFormat( + "[aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa] {\n" + " [type.type] {\n" + " keyyyyyyyyyyyyyy: valuuuuuuuuuuuuuuuuuuuuuuuuue\n" + " }\n" + "}"); verifyFormat("[aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/\n" " bbbbbbb] {\n" " [type.type] {\n" @@ -517,8 +519,7 @@ TEST_F(FormatTestTextProto, FormatsRepeatedListInitializers) { " 'long'\n" " ]\n" "}\n" - "key: value" - ); + "key: value"); FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto); Style.ColumnLimit = 60; // To make writing tests easier. Style.Cpp11BracedListStyle = true; @@ -555,7 +556,8 @@ TEST_F(FormatTestTextProto, PutsMultipleEntriesInExtensionsOnNewlines) { " key: value\n" " key: value\n" " }\n" - "}", Style); + "}", + Style); } TEST_F(FormatTestTextProto, BreaksAfterBraceFollowedByClosingBraceOnNextLine) { @@ -734,5 +736,5 @@ TEST_F(FormatTestTextProto, KeepsAmpersandsNextToKeys) { "}"); } -} // end namespace tooling +} // namespace format } // end namespace clang diff --git a/clang/unittests/Format/NamespaceEndCommentsFixerTest.cpp b/clang/unittests/Format/NamespaceEndCommentsFixerTest.cpp index ad77c747a592a..fee8597b4330e 100644 --- a/clang/unittests/Format/NamespaceEndCommentsFixerTest.cpp +++ b/clang/unittests/Format/NamespaceEndCommentsFixerTest.cpp @@ -423,8 +423,7 @@ TEST_F(NamespaceEndCommentsFixerTest, AddsNewlineIfNeeded) { TEST_F(NamespaceEndCommentsFixerTest, DoesNotAddEndCommentForShortNamespace) { EXPECT_EQ("namespace {}", fixNamespaceEndComments("namespace {}")); EXPECT_EQ("namespace A {}", fixNamespaceEndComments("namespace A {}")); - EXPECT_EQ("namespace A { a }", - fixNamespaceEndComments("namespace A { a }")); + EXPECT_EQ("namespace A { a }", fixNamespaceEndComments("namespace A { a }")); EXPECT_EQ("namespace A { a };", fixNamespaceEndComments("namespace A { a };")); } diff --git a/clang/unittests/Format/SortIncludesTest.cpp b/clang/unittests/Format/SortIncludesTest.cpp index 7a073a3148902..db3ed65d443b8 100644 --- a/clang/unittests/Format/SortIncludesTest.cpp +++ b/clang/unittests/Format/SortIncludesTest.cpp @@ -38,8 +38,7 @@ class SortIncludesTest : public ::testing::Test { return *Result; } - std::string sort(StringRef Code, - StringRef FileName = "input.cpp", + std::string sort(StringRef Code, StringRef FileName = "input.cpp", unsigned ExpectedNumRanges = 1) { return sort(Code, GetCodeRange(Code), FileName, ExpectedNumRanges); } @@ -227,7 +226,8 @@ TEST_F(SortIncludesTest, SupportClangFormatOffCStyle) { "#include \n" "#include \n" "#include \n" - "/* clang-format onwards */\n", "input.h", 2)); + "/* clang-format onwards */\n", + "input.h", 2)); } TEST_F(SortIncludesTest, IncludeSortingCanBeDisabled) { @@ -291,7 +291,8 @@ TEST_F(SortIncludesTest, SortsLocallyInEachBlock) { sort("#include \"a.h\"\n" "#include \"c.h\"\n" "\n" - "#include \"b.h\"\n", "input.h", 0)); + "#include \"b.h\"\n", + "input.h", 0)); } TEST_F(SortIncludesTest, SortsAllBlocksWhenMerging) { @@ -762,7 +763,8 @@ TEST_F(SortIncludesTest, DoNotSortLikelyXml) { sort("", "input.h", 0)); + "-->", + "input.h", 0)); } TEST_F(SortIncludesTest, DoNotOutputReplacementsForSortedBlocksWithRegrouping) { diff --git a/clang/unittests/Format/UsingDeclarationsSorterTest.cpp b/clang/unittests/Format/UsingDeclarationsSorterTest.cpp index 0f517d0a6116c..cd5d456b1d737 100644 --- a/clang/unittests/Format/UsingDeclarationsSorterTest.cpp +++ b/clang/unittests/Format/UsingDeclarationsSorterTest.cpp @@ -343,7 +343,8 @@ TEST_F(UsingDeclarationsSorterTest, SortsPartialRangeOfUsingDeclarations) { {tooling::Range(19, 1)})); } -TEST_F(UsingDeclarationsSorterTest, SortsUsingDeclarationsWithLeadingkComments) { +TEST_F(UsingDeclarationsSorterTest, + SortsUsingDeclarationsWithLeadingkComments) { EXPECT_EQ("/* comment */ using a;\n" "/* comment */ using b;", sortUsingDeclarations("/* comment */ using b;\n" @@ -366,7 +367,6 @@ TEST_F(UsingDeclarationsSorterTest, DeduplicatesUsingDeclarations) { "using e;\n" "using a;\n" "using e;")); - } } // end namespace diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp index 782b3f6a2b448..5bd2864b5ba1e 100644 --- a/clang/unittests/Tooling/ToolingTest.cpp +++ b/clang/unittests/Tooling/ToolingTest.cpp @@ -530,9 +530,11 @@ TEST(ClangToolTest, StripDependencyFileAdjuster) { EXPECT_TRUE(HasFlag("-w")); } -// Check getClangStripDependencyFileAdjuster strips /showIncludes +// Check getClangStripDependencyFileAdjuster strips /showIncludes and variants TEST(ClangToolTest, StripDependencyFileAdjusterShowIncludes) { - FixedCompilationDatabase Compilations("/", {"/showIncludes", "-c"}); + FixedCompilationDatabase Compilations( + "/", {"/showIncludes", "/showIncludes:user", "-showIncludes", + "-showIncludes:user", "-c"}); ClangTool Tool(Compilations, std::vector(1, "/a.cc")); Tool.mapVirtualFile("/a.cc", "void a() {}"); @@ -555,34 +557,9 @@ TEST(ClangToolTest, StripDependencyFileAdjusterShowIncludes) { return llvm::find(FinalArgs, Flag) != FinalArgs.end(); }; EXPECT_FALSE(HasFlag("/showIncludes")); - EXPECT_TRUE(HasFlag("-c")); -} - -// Check getClangStripDependencyFileAdjuster strips /showIncludes:user -TEST(ClangToolTest, StripDependencyFileAdjusterShowIncludesUser) { - FixedCompilationDatabase Compilations("/", {"/showIncludes:user", "-c"}); - - ClangTool Tool(Compilations, std::vector(1, "/a.cc")); - Tool.mapVirtualFile("/a.cc", "void a() {}"); - - std::unique_ptr Action( - newFrontendActionFactory()); - - CommandLineArguments FinalArgs; - ArgumentsAdjuster CheckFlagsAdjuster = - [&FinalArgs](const CommandLineArguments &Args, StringRef /*unused*/) { - FinalArgs = Args; - return Args; - }; - Tool.clearArgumentsAdjusters(); - Tool.appendArgumentsAdjuster(getClangStripDependencyFileAdjuster()); - Tool.appendArgumentsAdjuster(CheckFlagsAdjuster); - Tool.run(Action.get()); - - auto HasFlag = [&FinalArgs](const std::string &Flag) { - return llvm::find(FinalArgs, Flag) != FinalArgs.end(); - }; EXPECT_FALSE(HasFlag("/showIncludes:user")); + EXPECT_FALSE(HasFlag("-showIncludes")); + EXPECT_FALSE(HasFlag("-showIncludes:user")); EXPECT_TRUE(HasFlag("-c")); } diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 9376cd63939fe..5b734fe44e47a 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -94,7 +94,9 @@ class SVEType { bool isDefault() const { return DefaultType; } bool isFloat() const { return Float; } bool isInteger() const { return !Float && !Predicate; } - bool isScalarPredicate() const { return !Float && ElementBitwidth == 1; } + bool isScalarPredicate() const { + return !Float && Predicate && NumVectors == 0; + } bool isPredicateVector() const { return Predicate; } bool isPredicatePattern() const { return PredicatePattern; } bool isPrefetchOp() const { return PrefetchOp; } @@ -235,6 +237,23 @@ class Intrinsic { class SVEEmitter { private: + // The reinterpret builtins are generated separately because they + // need the cross product of all types (121 functions in total), + // which is inconvenient to specify in the arm_sve.td file or + // generate in CGBuiltin.cpp. + struct ReinterpretTypeInfo { + const char *Suffix; + const char *Type; + const char *BuiltinType; + }; + SmallVector Reinterprets = { + {"s8", "svint8_t", "q16Sc"}, {"s16", "svint16_t", "q8Ss"}, + {"s32", "svint32_t", "q4Si"}, {"s64", "svint64_t", "q2SWi"}, + {"u8", "svuint8_t", "q16Uc"}, {"u16", "svuint16_t", "q8Us"}, + {"u32", "svuint32_t", "q4Ui"}, {"u64", "svuint64_t", "q2UWi"}, + {"f16", "svfloat16_t", "q8h"}, {"f32", "svfloat32_t", "q4f"}, + {"f64", "svfloat64_t", "q2d"}}; + RecordKeeper &Records; llvm::StringMap EltTypes; llvm::StringMap MemEltTypes; @@ -407,12 +426,12 @@ std::string SVEType::str() const { if (Float) S += "float"; - else if (isScalarPredicate()) + else if (isScalarPredicate() || isPredicateVector()) S += "bool"; else S += "int"; - if (!isScalarPredicate()) + if (!isScalarPredicate() && !isPredicateVector()) S += utostr(ElementBitwidth); if (!isScalableVector() && isVector()) S += "x" + utostr(getNumElements()); @@ -433,7 +452,6 @@ void SVEType::applyTypespec() { switch (I) { case 'P': Predicate = true; - ElementBitwidth = 1; break; case 'U': Signed = false; @@ -510,6 +528,18 @@ void SVEType::applyModifier(char Mod) { Bitwidth = ElementBitwidth; NumVectors = 0; break; + case 'K': + Signed = true; + Float = false; + Bitwidth = ElementBitwidth; + NumVectors = 0; + break; + case 'L': + Signed = false; + Float = false; + Bitwidth = ElementBitwidth; + NumVectors = 0; + break; case 'u': Predicate = false; Signed = false; @@ -581,6 +611,16 @@ void SVEType::applyModifier(char Mod) { ElementBitwidth = Bitwidth = 64; NumVectors = 0; break; + case 'f': + Signed = false; + ElementBitwidth = Bitwidth = 64; + NumVectors = 0; + break; + case 'g': + Signed = false; + Float = false; + ElementBitwidth = 64; + break; case 't': Signed = true; Float = false; @@ -606,6 +646,12 @@ void SVEType::applyModifier(char Mod) { Float = true; ElementBitwidth = 64; break; + case 'Q': + Constant = true; + Pointer = true; + Void = true; + NumVectors = 0; + break; case 'S': Constant = true; Pointer = true; @@ -979,6 +1025,10 @@ void SVEEmitter::createHeader(raw_ostream &OS) { OS << "#error \"SVE support not enabled\"\n"; OS << "#else\n\n"; + OS << "#if !defined(__LITTLE_ENDIAN__)\n"; + OS << "#error \"Big endian is currently not supported for arm_sve.h\"\n"; + OS << "#endif\n"; + OS << "#include \n\n"; OS << "#ifdef __cplusplus\n"; OS << "extern \"C\" {\n"; @@ -1045,6 +1095,22 @@ void SVEEmitter::createHeader(raw_ostream &OS) { OS << "#define __aio static inline __attribute__((__always_inline__, " "__nodebug__, __overloadable__))\n\n"; + // Add reinterpret functions. + for (auto ShortForm : { false, true } ) + for (const ReinterpretTypeInfo &From : Reinterprets) + for (const ReinterpretTypeInfo &To : Reinterprets) { + if (ShortForm) { + OS << "__aio " << From.Type << " svreinterpret_" << From.Suffix; + OS << "(" << To.Type << " op) {\n"; + OS << " return __builtin_sve_reinterpret_" << From.Suffix << "_" + << To.Suffix << "(op);\n"; + OS << "}\n\n"; + } else + OS << "#define svreinterpret_" << From.Suffix << "_" << To.Suffix + << "(...) __builtin_sve_reinterpret_" << From.Suffix << "_" + << To.Suffix << "(__VA_ARGS__)\n"; + } + SmallVector, 128> Defs; std::vector RV = Records.getAllDerivedDefinitions("Inst"); for (auto *R : RV) @@ -1119,8 +1185,16 @@ void SVEEmitter::createBuiltins(raw_ostream &OS) { OS << "BUILTIN(__builtin_sve_" << Def->getMangledName() << ", \"" << Def->getBuiltinTypeStr() << "\", \"n\")\n"; } + + // Add reinterpret builtins + for (const ReinterpretTypeInfo &From : Reinterprets) + for (const ReinterpretTypeInfo &To : Reinterprets) + OS << "BUILTIN(__builtin_sve_reinterpret_" << From.Suffix << "_" + << To.Suffix << +", \"" << From.BuiltinType << To.BuiltinType + << "\", \"n\")\n"; + OS << "#endif\n\n"; -} + } void SVEEmitter::createCodeGenMap(raw_ostream &OS) { std::vector RV = Records.getAllDerivedDefinitions("Inst"); diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt index 1f8612206b7af..172076fd0ce59 100644 --- a/clang/utils/perf-training/CMakeLists.txt +++ b/clang/utils/perf-training/CMakeLists.txt @@ -23,7 +23,7 @@ if(LLVM_BUILD_INSTRUMENTED) ) add_custom_target(clear-profraw - COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} profraw + COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} profraw COMMENT "Clearing old profraw data") if(NOT LLVM_PROFDATA) @@ -34,7 +34,7 @@ if(LLVM_BUILD_INSTRUMENTED) message(STATUS "To enable merging PGO data LLVM_PROFDATA has to point to llvm-profdata") else() add_custom_target(generate-profdata - COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR} + COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Merging profdata" DEPENDS generate-profraw) endif() @@ -55,7 +55,7 @@ if(APPLE AND DTRACE) ) add_custom_target(clear-dtrace-logs - COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} dtrace + COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} dtrace COMMENT "Clearing old dtrace data") if(NOT CLANG_ORDER_FILE) @@ -63,7 +63,7 @@ if(APPLE AND DTRACE) endif() add_custom_target(generate-order-file - COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py gen-order-file --binary $ --output ${CLANG_ORDER_FILE} ${CMAKE_CURRENT_BINARY_DIR} + COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py gen-order-file --binary $ --output ${CLANG_ORDER_FILE} ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Generating order file" DEPENDS generate-dtrace-logs) endif() diff --git a/clang/utils/perf-training/lit.site.cfg.in b/clang/utils/perf-training/lit.site.cfg.in index 340a0e909b109..a1c6d6bc663da 100644 --- a/clang/utils/perf-training/lit.site.cfg.in +++ b/clang/utils/perf-training/lit.site.cfg.in @@ -7,7 +7,7 @@ config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@" config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@" config.test_source_root = "@CLANG_PGO_TRAINING_DATA@" config.target_triple = "@TARGET_TRIPLE@" -config.python_exe = "@PYTHON_EXECUTABLE@" +config.python_exe = "@Python3_EXECUTABLE@" # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time. diff --git a/clang/utils/perf-training/order-files.lit.site.cfg.in b/clang/utils/perf-training/order-files.lit.site.cfg.in index 87406dbaf9a6e..91f2b79d208d5 100644 --- a/clang/utils/perf-training/order-files.lit.site.cfg.in +++ b/clang/utils/perf-training/order-files.lit.site.cfg.in @@ -7,7 +7,7 @@ config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@" config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@" config.test_source_root = "@CLANG_PGO_TRAINING_DATA@" config.target_triple = "@TARGET_TRIPLE@" -config.python_exe = "@PYTHON_EXECUTABLE@" +config.python_exe = "@Python3_EXECUTABLE@" # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time. diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index fffaabe97ef0b..8619b6e6280ca 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -81,12 +81,34 @@ if (COMPILER_RT_STANDALONE_BUILD) set_target_properties(intrinsics_gen PROPERTIES FOLDER "Compiler-RT Misc") endif() - # Find Python interpreter. - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR " - Unable to find Python interpreter required testing. Please install Python - or specify the PYTHON_EXECUTABLE CMake variable.") + if(CMAKE_VERSION VERSION_LESS 3.12) + # Find Python interpreter. + include(FindPythonInterp) + if(NOT PYTHONINTERP_FOUND) + message(FATAL_ERROR " + Unable to find Python interpreter required testing. Please install Python + or specify the PYTHON_EXECUTABLE CMake variable.") + endif() + + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) + set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) + else() + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") + endif() + + # Treat python2 as python3 + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) + endif() endif() # Ensure that fat libraries are built correctly on Darwin diff --git a/compiler-rt/cmake/Modules/SanitizerUtils.cmake b/compiler-rt/cmake/Modules/SanitizerUtils.cmake index 699b03ae6a119..6c8651df3b3cc 100644 --- a/compiler-rt/cmake/Modules/SanitizerUtils.cmake +++ b/compiler-rt/cmake/Modules/SanitizerUtils.cmake @@ -34,7 +34,7 @@ macro(add_sanitizer_rt_symbols name) list(APPEND extra_args "--extra" ${arg}) endforeach() add_custom_command(OUTPUT ${stamp} - COMMAND ${PYTHON_EXECUTABLE} + COMMAND ${Python3_EXECUTABLE} ${SANITIZER_GEN_DYNAMIC_LIST} ${extra_args} $ --nm-executable "${SANITIZER_NM}" -o $.syms COMMAND ${CMAKE_COMMAND} -E touch ${stamp} @@ -84,7 +84,7 @@ macro(add_sanitizer_rt_version_list name) list(APPEND args "$") endforeach() add_custom_command(OUTPUT ${vers} - COMMAND ${PYTHON_EXECUTABLE} + COMMAND ${Python3_EXECUTABLE} ${SANITIZER_GEN_DYNAMIC_LIST} --version-list ${args} --nm-executable "${SANITIZER_NM}" -o ${vers} DEPENDS ${SANITIZER_GEN_DYNAMIC_LIST} ${ARG_EXTRA} ${ARG_LIBS} @@ -100,7 +100,7 @@ endmacro() if(CMAKE_HOST_UNIX AND NOT OS_NAME MATCHES "OpenBSD") add_custom_target(SanitizerLintCheck COMMAND env LLVM_CHECKOUT=${LLVM_MAIN_SRC_DIR} SILENT=1 TMPDIR= - PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE} + PYTHON_EXECUTABLE=${Python3_EXECUTABLE} COMPILER_RT=${COMPILER_RT_SOURCE_DIR} ${SANITIZER_LINT_SCRIPT} DEPENDS ${SANITIZER_LINT_SCRIPT} diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index 618f902a3098c..a3a827cdb6ab6 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -765,16 +765,23 @@ bool SignalContext::IsTrueFaultingAddress() const { return si->si_signo == SIGSEGV && si->si_code != 0; } +#if defined(__aarch64__) && defined(arm_thread_state64_get_sp) + #define AARCH64_GET_REG(r) \ + arm_thread_state64_get_##r(ucontext->uc_mcontext->__ss) +#else + #define AARCH64_GET_REG(r) ucontext->uc_mcontext->__ss.__##r +#endif + static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) { ucontext_t *ucontext = (ucontext_t*)context; # if defined(__aarch64__) - *pc = ucontext->uc_mcontext->__ss.__pc; + *pc = AARCH64_GET_REG(pc); # if defined(__IPHONE_8_0) && __IPHONE_OS_VERSION_MAX_ALLOWED >= __IPHONE_8_0 - *bp = ucontext->uc_mcontext->__ss.__fp; + *bp = AARCH64_GET_REG(fp); # else - *bp = ucontext->uc_mcontext->__ss.__lr; + *bp = AARCH64_GET_REG(lr); # endif - *sp = ucontext->uc_mcontext->__ss.__sp; + *sp = AARCH64_GET_REG(sp); # elif defined(__x86_64__) *pc = ucontext->uc_mcontext->__ss.__rip; *bp = ucontext->uc_mcontext->__ss.__rbp; @@ -1128,6 +1135,8 @@ void SignalContext::DumpAllRegisters(void *context) { ucontext_t *ucontext = (ucontext_t*)context; # define DUMPREG64(r) \ Printf("%s = 0x%016llx ", #r, ucontext->uc_mcontext->__ss.__ ## r); +# define DUMPREGA64(r) \ + Printf("%s = 0x%016llx ", #r, AARCH64_GET_REG(r)); # define DUMPREG32(r) \ Printf("%s = 0x%08x ", #r, ucontext->uc_mcontext->__ss.__ ## r); # define DUMPREG_(r) Printf(" "); DUMPREG(r); @@ -1153,7 +1162,7 @@ void SignalContext::DumpAllRegisters(void *context) { DUMPREG(x[16]); DUMPREG(x[17]); DUMPREG(x[18]); DUMPREG(x[19]); Printf("\n"); DUMPREG(x[20]); DUMPREG(x[21]); DUMPREG(x[22]); DUMPREG(x[23]); Printf("\n"); DUMPREG(x[24]); DUMPREG(x[25]); DUMPREG(x[26]); DUMPREG(x[27]); Printf("\n"); - DUMPREG(x[28]); DUMPREG___(fp); DUMPREG___(lr); DUMPREG___(sp); Printf("\n"); + DUMPREG(x[28]); DUMPREGA64(fp); DUMPREGA64(lr); DUMPREGA64(sp); Printf("\n"); # elif defined(__arm__) # define DUMPREG(r) DUMPREG32(r) DUMPREG_(r[0]); DUMPREG_(r[1]); DUMPREG_(r[2]); DUMPREG_(r[3]); Printf("\n"); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp index c7e12e59a32b0..6c577426ad566 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp @@ -160,7 +160,11 @@ PtraceRegistersStatus SuspendedThreadsListMac::GetRegistersAndSP( } internal_memcpy(buffer, ®s, sizeof(regs)); +#if defined(__aarch64__) && defined(arm_thread_state64_get_sp) + *sp = arm_thread_state64_get_sp(regs); +#else *sp = regs.SP_REG; +#endif // On x86_64 and aarch64, we must account for the stack redzone, which is 128 // bytes. diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index b44753a4373af..583b47a14258b 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -147,7 +147,10 @@ class Allocator { // Store some flags locally. Options.MayReturnNull = getFlags()->may_return_null; - Options.ZeroContents = getFlags()->zero_contents; + Options.FillContents = + getFlags()->zero_contents + ? ZeroFill + : (getFlags()->pattern_fill_contents ? PatternOrZeroFill : NoFill); Options.DeallocTypeMismatch = getFlags()->dealloc_type_mismatch; Options.DeleteSizeMismatch = getFlags()->delete_size_mismatch; Options.TrackAllocationStacks = false; @@ -256,7 +259,8 @@ class Allocator { } #endif // GWP_ASAN_HOOKS - ZeroContents |= static_cast(Options.ZeroContents); + FillContentsMode FillContents = + ZeroContents ? ZeroFill : Options.FillContents; if (UNLIKELY(Alignment > MaxAlignment)) { if (Options.MayReturnNull) @@ -309,7 +313,7 @@ class Allocator { } if (UNLIKELY(ClassId == 0)) Block = Secondary.allocate(NeededSize, Alignment, &SecondaryBlockEnd, - ZeroContents); + FillContents); if (UNLIKELY(!Block)) { if (Options.MayReturnNull) @@ -391,10 +395,11 @@ class Allocator { TaggedPtr = prepareTaggedChunk(Ptr, Size, BlockEnd); } storeAllocationStackMaybe(Ptr); - } else if (UNLIKELY(ZeroContents)) { + } else if (UNLIKELY(FillContents != NoFill)) { // This condition is not necessarily unlikely, but since memset is // costly, we might as well mark it as such. - memset(Block, 0, PrimaryT::getSizeByClassId(ClassId)); + memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte, + PrimaryT::getSizeByClassId(ClassId)); } } @@ -721,9 +726,15 @@ class Allocator { void disableMemoryTagging() { Primary.disableMemoryTagging(); } void setTrackAllocationStacks(bool Track) { + initThreadMaybe(); Options.TrackAllocationStacks = Track; } + void setFillContents(FillContentsMode FillContents) { + initThreadMaybe(); + Options.FillContents = FillContents; + } + const char *getStackDepotAddress() const { return reinterpret_cast(&Depot); } @@ -898,7 +909,7 @@ class Allocator { struct { u8 MayReturnNull : 1; // may_return_null - u8 ZeroContents : 1; // zero_contents + FillContentsMode FillContents : 2; // zero_contents, pattern_fill_contents u8 DeallocTypeMismatch : 1; // dealloc_type_mismatch u8 DeleteSizeMismatch : 1; // delete_size_mismatch u8 TrackAllocationStacks : 1; @@ -972,7 +983,9 @@ class Allocator { if (UNLIKELY(NewHeader.ClassId && useMemoryTagging())) { u8 PrevTag = extractTag(loadTag(reinterpret_cast(Ptr))); uptr TaggedBegin, TaggedEnd; - setRandomTag(Ptr, Size, &TaggedBegin, &TaggedEnd); + // Exclude the previous tag so that immediate use after free is detected + // 100% of the time. + setRandomTag(Ptr, Size, 1UL << PrevTag, &TaggedBegin, &TaggedEnd); storeDeallocationStackMaybe(Ptr, PrevTag); } // If the quarantine is disabled, the actual size of a chunk is 0 or larger diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h index 350d8d9fcd91e..9037f92b4976c 100644 --- a/compiler-rt/lib/scudo/standalone/common.h +++ b/compiler-rt/lib/scudo/standalone/common.h @@ -182,6 +182,15 @@ struct BlockInfo { uptr RegionEnd; }; +constexpr unsigned char PatternFillByte = 0xAB; + +enum FillContentsMode { + NoFill = 0, + ZeroFill = 1, + PatternOrZeroFill = 2 // Pattern fill unless the memory is known to be + // zero-initialized already. +}; + } // namespace scudo #endif // SCUDO_COMMON_H_ diff --git a/compiler-rt/lib/scudo/standalone/flags.inc b/compiler-rt/lib/scudo/standalone/flags.inc index 342af1c79ad64..b5cab47341662 100644 --- a/compiler-rt/lib/scudo/standalone/flags.inc +++ b/compiler-rt/lib/scudo/standalone/flags.inc @@ -34,6 +34,9 @@ SCUDO_FLAG(bool, delete_size_mismatch, true, SCUDO_FLAG(bool, zero_contents, false, "Zero chunk contents on allocation.") +SCUDO_FLAG(bool, pattern_fill_contents, false, + "Pattern fill chunk contents on allocation.") + SCUDO_FLAG(int, rss_limit_mb, -1, "Enforce an upper limit (in megabytes) to the process RSS. The " "allocator will terminate or return NULL when allocations are " diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h index 18dae2b8ca41d..6f347f4694e82 100644 --- a/compiler-rt/lib/scudo/standalone/memtag.h +++ b/compiler-rt/lib/scudo/standalone/memtag.h @@ -93,8 +93,8 @@ class ScopedDisableMemoryTagChecks { } }; -inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin, - uptr *TaggedEnd) { +inline void setRandomTag(void *Ptr, uptr Size, uptr ExcludeMask, + uptr *TaggedBegin, uptr *TaggedEnd) { void *End; __asm__ __volatile__( R"( @@ -102,7 +102,7 @@ inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin, // Set a random tag for Ptr in TaggedPtr. This needs to happen even if // Size = 0 so that TaggedPtr ends up pointing at a valid address. - irg %[TaggedPtr], %[Ptr] + irg %[TaggedPtr], %[Ptr], %[ExcludeMask] mov %[Cur], %[TaggedPtr] // Skip the loop if Size = 0. We don't want to do any tagging in this case. @@ -120,9 +120,9 @@ inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin, 2: )" - : [ TaggedPtr ] "=&r"(*TaggedBegin), [ Cur ] "=&r"(*TaggedEnd), - [ End ] "=&r"(End) - : [ Ptr ] "r"(Ptr), [ Size ] "r"(Size) + : + [TaggedPtr] "=&r"(*TaggedBegin), [Cur] "=&r"(*TaggedEnd), [End] "=&r"(End) + : [Ptr] "r"(Ptr), [Size] "r"(Size), [ExcludeMask] "r"(ExcludeMask) : "memory"); } @@ -138,7 +138,7 @@ inline void *prepareTaggedChunk(void *Ptr, uptr Size, uptr BlockEnd) { : "memory"); uptr TaggedBegin, TaggedEnd; - setRandomTag(Ptr, Size, &TaggedBegin, &TaggedEnd); + setRandomTag(Ptr, Size, 0, &TaggedBegin, &TaggedEnd); // Finally, set the tag of the granule past the end of the allocation to 0, // to catch linear overflows even if a previous larger allocation used the @@ -225,10 +225,11 @@ struct ScopedDisableMemoryTagChecks { ScopedDisableMemoryTagChecks() {} }; -inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin, - uptr *TaggedEnd) { +inline void setRandomTag(void *Ptr, uptr Size, uptr ExcludeMask, + uptr *TaggedBegin, uptr *TaggedEnd) { (void)Ptr; (void)Size; + (void)ExcludeMask; (void)TaggedBegin; (void)TaggedEnd; UNREACHABLE("memory tagging not supported"); diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index 9d5f130f2d45a..84eaa5091b434 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -236,7 +236,7 @@ template class MapAllocator { } void *allocate(uptr Size, uptr AlignmentHint = 0, uptr *BlockEnd = nullptr, - bool ZeroContents = false); + FillContentsMode FillContents = NoFill); void deallocate(void *Ptr); @@ -299,7 +299,8 @@ template class MapAllocator { // (pending rounding and headers). template void *MapAllocator::allocate(uptr Size, uptr AlignmentHint, - uptr *BlockEnd, bool ZeroContents) { + uptr *BlockEnd, + FillContentsMode FillContents) { DCHECK_GE(Size, AlignmentHint); const uptr PageSize = getPageSizeCached(); const uptr RoundedSize = @@ -312,8 +313,9 @@ void *MapAllocator::allocate(uptr Size, uptr AlignmentHint, *BlockEnd = H->BlockEnd; void *Ptr = reinterpret_cast(reinterpret_cast(H) + LargeBlock::getHeaderSize()); - if (ZeroContents) - memset(Ptr, 0, H->BlockEnd - reinterpret_cast(Ptr)); + if (FillContents) + memset(Ptr, FillContents == ZeroFill ? 0 : PatternFillByte, + H->BlockEnd - reinterpret_cast(Ptr)); const uptr BlockSize = H->BlockEnd - reinterpret_cast(H); { ScopedLock L(Mutex); diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp index a6f29a2610ed1..b9e1be1f32e8c 100644 --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -115,7 +115,44 @@ template static void testAllocator() { void *P = Allocator->allocate(Size, Origin, 1U << MinAlignLog, true); EXPECT_NE(P, nullptr); for (scudo::uptr I = 0; I < Size; I++) - EXPECT_EQ((reinterpret_cast(P))[I], 0); + ASSERT_EQ((reinterpret_cast(P))[I], 0); + memset(P, 0xaa, Size); + Allocator->deallocate(P, Origin, Size); + } + } + Allocator->releaseToOS(); + + // Ensure that specifying ZeroContents returns a zero'd out block. + Allocator->setFillContents(scudo::ZeroFill); + for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) { + for (scudo::uptr Delta = 0U; Delta <= 4U; Delta++) { + const scudo::uptr Size = (1U << SizeLog) + Delta * 128U; + void *P = Allocator->allocate(Size, Origin, 1U << MinAlignLog, false); + EXPECT_NE(P, nullptr); + for (scudo::uptr I = 0; I < Size; I++) + ASSERT_EQ((reinterpret_cast(P))[I], 0); + memset(P, 0xaa, Size); + Allocator->deallocate(P, Origin, Size); + } + } + Allocator->releaseToOS(); + + // Ensure that specifying PatternOrZeroFill returns a pattern-filled block in + // the primary allocator, and either pattern or zero filled block in the + // secondary. + Allocator->setFillContents(scudo::PatternOrZeroFill); + for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) { + for (scudo::uptr Delta = 0U; Delta <= 4U; Delta++) { + const scudo::uptr Size = (1U << SizeLog) + Delta * 128U; + void *P = Allocator->allocate(Size, Origin, 1U << MinAlignLog, false); + EXPECT_NE(P, nullptr); + for (scudo::uptr I = 0; I < Size; I++) { + unsigned char V = (reinterpret_cast(P))[I]; + if (AllocatorT::PrimaryT::canAllocate(Size)) + ASSERT_EQ(V, scudo::PatternFillByte); + else + ASSERT_TRUE(V == scudo::PatternFillByte || V == 0); + } memset(P, 0xaa, Size); Allocator->deallocate(P, Origin, Size); } diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc index 765d7daa349d1..4396dfc50d1df 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc @@ -228,4 +228,19 @@ SCUDO_PREFIX(malloc_set_track_allocation_stacks)(int track) { SCUDO_ALLOCATOR.setTrackAllocationStacks(track); } +// Sets whether scudo zero-initializes all allocated memory. The program must +// be single threaded at the point when the function is called. +INTERFACE WEAK void SCUDO_PREFIX(malloc_set_zero_contents)(int zero_contents) { + SCUDO_ALLOCATOR.setFillContents(zero_contents ? scudo::ZeroFill + : scudo::NoFill); +} + +// Sets whether scudo pattern-initializes all allocated memory. The program must +// be single threaded at the point when the function is called. +INTERFACE WEAK void +SCUDO_PREFIX(malloc_set_pattern_fill_contents)(int pattern_fill_contents) { + SCUDO_ALLOCATOR.setFillContents( + pattern_fill_contents ? scudo::PatternOrZeroFill : scudo::NoFill); +} + } // extern "C" diff --git a/compiler-rt/lib/xray/xray_trampoline_AArch64.S b/compiler-rt/lib/xray/xray_trampoline_AArch64.S index 9dc862bb55f69..3bf52cef60fed 100644 --- a/compiler-rt/lib/xray/xray_trampoline_AArch64.S +++ b/compiler-rt/lib/xray/xray_trampoline_AArch64.S @@ -27,6 +27,10 @@ __xray_FunctionEntry: STP Q2, Q3, [SP, #-32]! STP Q4, Q5, [SP, #-32]! STP Q6, Q7, [SP, #-32]! + /* X8 is the indirect result register and needs to be preserved for the body + of the function to use */ + STP X8, X0, [SP, #-16]! + /* Load the page address of _ZN6__xray19XRayPatchedFunctionE into X1 */ ADRP X1, _ZN6__xray19XRayPatchedFunctionE /* Load the handler function pointer into X2 */ @@ -41,6 +45,7 @@ __xray_FunctionEntry: BLR X2 FunctionEntry_restore: /* Pop the saved registers */ + LDP X8, X0, [SP], #16 LDP Q6, Q7, [SP], #32 LDP Q4, Q5, [SP], #32 LDP Q2, Q3, [SP], #32 @@ -71,7 +76,14 @@ __xray_FunctionExit: STP X3, X4, [SP, #-16]! STP X5, X6, [SP, #-16]! STP X7, X30, [SP, #-16]! - STR Q0, [SP, #-16]! + STP Q0, Q1, [SP, #-32]! + STP Q2, Q3, [SP, #-32]! + STP Q4, Q5, [SP, #-32]! + STP Q6, Q7, [SP, #-32]! + /* X8 is the indirect result register and needs to be preserved for the body + of the function to use */ + STP X8, X0, [SP, #-16]! + /* Load the page address of _ZN6__xray19XRayPatchedFunctionE into X1 */ ADRP X1, _ZN6__xray19XRayPatchedFunctionE /* Load the handler function pointer into X2 */ @@ -85,7 +97,11 @@ __xray_FunctionExit: /* Call the handler with 2 parameters in W0 and X1 */ BLR X2 FunctionExit_restore: - LDR Q0, [SP], #16 + LDP X8, X0, [SP], #16 + LDP Q6, Q7, [SP], #32 + LDP Q4, Q5, [SP], #32 + LDP Q2, Q3, [SP], #32 + LDP Q0, Q1, [SP], #32 LDP X7, X30, [SP], #16 LDP X5, X6, [SP], #16 LDP X3, X4, [SP], #16 diff --git a/compiler-rt/test/asan/TestCases/Darwin/duplicate_os_log_reports.cpp b/compiler-rt/test/asan/TestCases/Darwin/duplicate_os_log_reports.cpp index 5f923d22a9a41..b40d8fef26cb2 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/duplicate_os_log_reports.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/duplicate_os_log_reports.cpp @@ -10,7 +10,7 @@ // Check syslog output. We filter recent system logs based on PID to avoid // getting the logs of previous test runs. -// RUN: log show --debug --last 2m --predicate "processID == ${TEST_PID}" --style syslog > %t.process_syslog_output.txt +// RUN: log show --debug --last 5m --predicate "processID == ${TEST_PID}" --style syslog > %t.process_syslog_output.txt // RUN: FileCheck %s -input-file=%t.process_syslog_output.txt #include #include diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index f0618dfc067c2..efd64b3ca9f96 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -181,7 +181,7 @@ config.compile_wrapper = compile_wrapper try: - prepare_output = subprocess.check_output([prepare_script, config.apple_platform, config.clang]).strip() + prepare_output = subprocess.check_output([prepare_script, config.apple_platform, config.clang]).decode().strip() except subprocess.CalledProcessError as e: print("Command failed:") print(e.output) @@ -307,7 +307,8 @@ def get_apple_platform_version_aligned_with(macos_version, apple_platform): osx_version = (10, 0, 0) try: - osx_version = subprocess.check_output(["sw_vers", "-productVersion"]) + osx_version = subprocess.check_output(["sw_vers", "-productVersion"], + universal_newlines=True) osx_version = tuple(int(x) for x in osx_version.split('.')) if len(osx_version) == 2: osx_version = (osx_version[0], osx_version[1], 0) if osx_version >= (10, 11): @@ -319,7 +320,7 @@ def get_apple_platform_version_aligned_with(macos_version, apple_platform): # this "feature", we can pass the test on newer OS X versions and other # platforms. config.available_features.add('osx-no-ld64-live_support') - except: + except subprocess.CalledProcessError: pass config.darwin_osx_version = osx_version diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in index 60464bcdaa877..a0203c0855651 100644 --- a/compiler-rt/test/lit.common.configured.in +++ b/compiler-rt/test/lit.common.configured.in @@ -22,7 +22,7 @@ set_default("llvm_shlib_dir", "@LLVM_LIBRARY_OUTPUT_INTDIR@") set_default("gold_executable", "@GOLD_EXECUTABLE@") set_default("clang", "@COMPILER_RT_RESOLVED_TEST_COMPILER@") set_default("compiler_id", "@COMPILER_RT_TEST_COMPILER_ID@") -set_default("python_executable", "@PYTHON_EXECUTABLE@") +set_default("python_executable", "@Python3_EXECUTABLE@") set_default("compiler_rt_debug", @COMPILER_RT_DEBUG_PYBOOL@) set_default("compiler_rt_intercept_libdispatch", @COMPILER_RT_INTERCEPT_LIBDISPATCH_PYBOOL@) set_default("compiler_rt_libdir", "@COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR@") diff --git a/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test b/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test index 0c7198e3c4e9e..90857ebb3c32a 100644 --- a/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test +++ b/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test @@ -1,3 +1,6 @@ +# Work around PR45673 until the test code is fixed +# ALLOW_RETRIES: 2 + RUN: mkdir -p %t.d RUN: cd %t.d diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/aligned_alloc-alignment.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/aligned_alloc-alignment.cpp index 7b119e6a706b4..ef7f1cb44e60c 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/aligned_alloc-alignment.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/aligned_alloc-alignment.cpp @@ -31,7 +31,7 @@ int main(int argc, char **argv) { // CHECK: {{ERROR: .*Sanitizer: invalid alignment requested in aligned_alloc}} // Handle a case when aligned_alloc is aliased by memalign. // CHECK: {{#0 .*}}{{aligned_alloc|memalign}} - // CHECK: {{#1 .*main .*aligned_alloc-alignment.cpp:}}[[@LINE-4]] + // CHECK: {{#[12] .*main .*aligned_alloc-alignment.cpp:}}[[@LINE-4]] // CHECK: {{SUMMARY: .*Sanitizer: invalid-aligned-alloc-alignment}} // The NULL pointer is printed differently on different systems, while (long)0 diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/pvalloc-overflow.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/pvalloc-overflow.cpp index c64624389e67c..f0e556e9b24c3 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/pvalloc-overflow.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/pvalloc-overflow.cpp @@ -35,7 +35,7 @@ int main(int argc, char *argv[]) { } // CHECK: {{ERROR: .*Sanitizer: pvalloc parameters overflow: size .* rounded up to system page size .* cannot be represented in type size_t}} // CHECK: {{#0 .*pvalloc}} - // CHECK: {{#1 .*main .*pvalloc-overflow.cpp:}} + // CHECK: {{#[12] .*main .*pvalloc-overflow.cpp:}} // CHECK: {{SUMMARY: .*Sanitizer: pvalloc-overflow}} // The NULL pointer is printed differently on different systems, while (long)0 diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/posix_memalign-alignment.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/posix_memalign-alignment.cpp index 0b8e366d24731..70fba25f59b84 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Posix/posix_memalign-alignment.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/posix_memalign-alignment.cpp @@ -35,7 +35,7 @@ int main(int argc, char **argv) { int res = posix_memalign(&p, alignment, 100); // CHECK: {{ERROR: .*Sanitizer: invalid alignment requested in posix_memalign}} // CHECK: {{#0 .*posix_memalign}} - // CHECK: {{#1 .*main .*posix_memalign-alignment.cpp:}}[[@LINE-3]] + // CHECK: {{#[12] .*main .*posix_memalign-alignment.cpp:}}[[@LINE-3]] // CHECK: {{SUMMARY: .*Sanitizer: invalid-posix-memalign-alignment}} // The NULL pointer is printed differently on different systems, while (long)0 diff --git a/debuginfo-tests/CMakeLists.txt b/debuginfo-tests/CMakeLists.txt index 9f488e92faf4d..745e2384f363e 100644 --- a/debuginfo-tests/CMakeLists.txt +++ b/debuginfo-tests/CMakeLists.txt @@ -24,21 +24,20 @@ if (WIN32) set(DEBUGINFO_TEST_DEPS ${DEBUGINFO_TEST_DEPS} lld) endif() -if (NOT DEFINED PYTHON_EXECUTABLE) - message(FATAL_ERROR "Cannot run debuginfo-tests without python") -elseif(PYTHON_VERSION_MAJOR LESS 3) +if (NOT DEFINED Python3_EXECUTABLE) message(FATAL_ERROR "Cannot run debuginfo-tests without python 3") -else() - configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py - MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py - ) - - add_lit_testsuite(check-debuginfo "Running debug info integration tests" - ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ${DEBUGINFO_TEST_DEPS} - ) - set_target_properties(check-debuginfo PROPERTIES FOLDER "Debug info tests") endif() + +configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py + ) + +add_lit_testsuite(check-debuginfo "Running debug info integration tests" + ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${DEBUGINFO_TEST_DEPS} + ) + +set_target_properties(check-debuginfo PROPERTIES FOLDER "Debug info tests") diff --git a/debuginfo-tests/lit.site.cfg.py.in b/debuginfo-tests/lit.site.cfg.py.in index a07842a3ce8da..d5893f577aed0 100644 --- a/debuginfo-tests/lit.site.cfg.py.in +++ b/debuginfo-tests/lit.site.cfg.py.in @@ -22,7 +22,7 @@ config.is_msvc = lit.util.pythonize_bool("@MSVC@") config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" -config.python3_executable = "@PYTHON_EXECUTABLE@" +config.python3_executable = "@Python3_EXECUTABLE@" # Support substitution of the tools_dir with user parameters. This is # used when we can't determine the tool dir at configuration time. diff --git a/flang/documentation/BijectiveInternalNameUniquing.md b/flang/documentation/BijectiveInternalNameUniquing.md new file mode 100644 index 0000000000000..e23264aeb0b5f --- /dev/null +++ b/flang/documentation/BijectiveInternalNameUniquing.md @@ -0,0 +1,118 @@ +## Bijective Internal Name Uniquing + +FIR has a flat namespace. No two objects may have the same name at +the module level. (These would be functions, globals, etc.) +This necessitates some sort of encoding scheme to unique +symbols from the front-end into FIR. + +Another requirement is +to be able to reverse these unique names and recover the associated +symbol in the symbol table. + +Fortran is case insensitive, which allows the compiler to convert the +user's identifiers to all lower case. Such a universal conversion implies +that all upper case letters are available for use in uniquing. + +### Prefix `_Q` + +All uniqued names have the prefix sequence `_Q` to indicate the name has +been uniqued. (Q is chosen because it is a +[low frequency letter](http://pi.math.cornell.edu/~mec/2003-2004/cryptography/subs/frequencies.html) +in English.) + +### Scope Building + +Symbols can be scoped by the module, submodule, or procedure that contains +that symbol. After the `_Q` sigil, names are constructed from outermost to +innermost scope as + + * Module name prefixed with `M` + * Submodule name prefixed with `S` + * Procedure name prefixed with `F` + +Given: +``` + submodule (mod:s1mod) s2mod + ... + subroutine sub + ... + contains + function fun +``` + +The uniqued name of `fun` becomes: +``` + _QMmodSs1modSs2modFsubPfun +``` + +### Common blocks + + * A common block name will be prefixed with `B` + +### Module scope global data + + * A global data entity is prefixed with `E` + * A global entity that is constant (parameter) will be prefixed with `EC` + +### Procedures/Subprograms + + * A procedure/subprogram is prefixed with `P` + +Given: +``` + subroutine sub +``` +The uniqued name of `sub` becomes: +``` + _QPsub +``` + +### Derived types and related + + * A derived type is prefixed with `T` + * If a derived type has KIND parameters, they are listed in a consistent + canonical order where each takes the form `Ki` and where _i_ is the + compile-time constant value. (All type parameters are integer.) If _i_ + is a negative value, the prefix `KN` will be used and _i_ will reflect + the magnitude of the value. + +Given: +``` + module mymodule + type mytype + integer :: member + end type + ... +``` +The uniqued name of `mytype` becomes: +``` + _QMmymoduleTmytype +``` + +Given: +``` + type yourtype(k1,k2) + integer, kind :: k1, k2 + real :: mem1 + complex :: mem2 + end type +``` + +The uniqued name of `yourtype` where `k1=4` and `k2=-6` (at compile-time): +``` + _QTyourtypeK4KN6 +``` + + * A derived type dispatch table is prefixed with `D`. The dispatch table + for `type t` would be `_QDTt` + * A type descriptor instance is prefixed with `C`. Intrinsic types can + be encoded with their names and kinds. The type descriptor for the + type `yourtype` above would be `_QCTyourtypeK4KN6`. The type + descriptor for `REAL(4)` would be `_QCrealK4`. + +### Compiler generated names + +Compiler generated names do not have to be mapped back to Fortran. These +names will be prefixed with `_QQ` and followed by a unique compiler +generated identifier. There is, of course, no mapping back to a symbol +derived from the input source in this case as no such symbol exists. diff --git a/flang/include/flang/Common/uint128.h b/flang/include/flang/Common/uint128.h index 996b13778322d..eecf4a8ba1149 100644 --- a/flang/include/flang/Common/uint128.h +++ b/flang/include/flang/Common/uint128.h @@ -12,8 +12,11 @@ #ifndef FORTRAN_COMMON_UINT128_H_ #define FORTRAN_COMMON_UINT128_H_ +// Define AVOID_NATIVE_UINT128_T to force the use of UnsignedInt128 below +// instead of the C++ compiler's native 128-bit unsigned integer type, if +// it has one. #ifndef AVOID_NATIVE_UINT128_T -#define AVOID_NATIVE_UINT128_T 1 // always use this code for now for testing +#define AVOID_NATIVE_UINT128_T 0 #endif #include "leading-zero-bit-count.h" diff --git a/flang/include/flang/Decimal/binary-floating-point.h b/flang/include/flang/Decimal/binary-floating-point.h index 7cc8384861fb1..24c23b0ce5ce6 100644 --- a/flang/include/flang/Decimal/binary-floating-point.h +++ b/flang/include/flang/Decimal/binary-floating-point.h @@ -22,9 +22,8 @@ namespace Fortran::decimal { template -struct BinaryFloatingPointNumber - : public common::RealDetails { - +class BinaryFloatingPointNumber : public common::RealDetails { +public: using Details = common::RealDetails; using Details::bits; using Details::decimalPrecision; @@ -50,21 +49,23 @@ struct BinaryFloatingPointNumber constexpr BinaryFloatingPointNumber &operator=( BinaryFloatingPointNumber &&that) = default; + RawType raw() const { return raw_; } + template explicit constexpr BinaryFloatingPointNumber(A x) { - static_assert(sizeof raw <= sizeof x); - std::memcpy(reinterpret_cast(&raw), - reinterpret_cast(&x), sizeof raw); + static_assert(sizeof raw_ <= sizeof x); + std::memcpy(reinterpret_cast(&raw_), + reinterpret_cast(&x), sizeof raw_); } constexpr int BiasedExponent() const { return static_cast( - (raw >> significandBits) & ((1 << exponentBits) - 1)); + (raw_ >> significandBits) & ((1 << exponentBits) - 1)); } constexpr int UnbiasedExponent() const { int biased{BiasedExponent()}; return biased - exponentBias + (biased == 0); } - constexpr RawType Significand() const { return raw & significandMask; } + constexpr RawType Significand() const { return raw_ & significandMask; } constexpr RawType Fraction() const { RawType sig{Significand()}; if (isImplicitMSB && BiasedExponent() > 0) { @@ -74,7 +75,7 @@ struct BinaryFloatingPointNumber } constexpr bool IsZero() const { - return (raw & ((RawType{1} << (bits - 1)) - 1)) == 0; + return (raw_ & ((RawType{1} << (bits - 1)) - 1)) == 0; } constexpr bool IsNaN() const { return BiasedExponent() == maxExponent && Significand() != 0; @@ -86,11 +87,39 @@ struct BinaryFloatingPointNumber return BiasedExponent() == maxExponent - 1 && Significand() == significandMask; } - constexpr bool IsNegative() const { return ((raw >> (bits - 1)) & 1) != 0; } + constexpr bool IsNegative() const { return ((raw_ >> (bits - 1)) & 1) != 0; } + + constexpr void Negate() { raw_ ^= RawType{1} << (bits - 1); } + + // For calculating the nearest neighbors of a floating-point value + constexpr void Previous() { + RemoveExplicitMSB(); + --raw_; + InsertExplicitMSB(); + } + constexpr void Next() { + RemoveExplicitMSB(); + ++raw_; + InsertExplicitMSB(); + } - constexpr void Negate() { raw ^= RawType{1} << (bits - 1); } +private: + constexpr void RemoveExplicitMSB() { + if constexpr (!isImplicitMSB) { + raw_ = (raw_ & (significandMask >> 1)) | ((raw_ & ~significandMask) >> 1); + } + } + constexpr void InsertExplicitMSB() { + if constexpr (!isImplicitMSB) { + constexpr RawType mask{significandMask >> 1}; + raw_ = (raw_ & mask) | ((raw_ & ~mask) << 1); + if (BiasedExponent() > 0) { + raw_ |= RawType{1} << (significandBits - 1); + } + } + } - RawType raw{0}; + RawType raw_{0}; }; } // namespace Fortran::decimal #endif diff --git a/flang/include/flang/Evaluate/check-expression.h b/flang/include/flang/Evaluate/check-expression.h index a26f83b01bbbf..b14a47838e3aa 100644 --- a/flang/include/flang/Evaluate/check-expression.h +++ b/flang/include/flang/Evaluate/check-expression.h @@ -12,6 +12,7 @@ #define FORTRAN_EVALUATE_CHECK_EXPRESSION_H_ #include "expression.h" +#include "intrinsics.h" #include "type.h" #include @@ -41,24 +42,38 @@ bool IsInitialDataTarget( // Check whether an expression is a specification expression // (10.1.11(2), C1010). Constant expressions are always valid // specification expressions. + +// There are two contexts where specification expressions appear -- array +// bounds and type param expressions. We need to differentiate them because +// additional checks are required for array bounds expressions in declarations +// of derived type components (see C750). +ENUM_CLASS(SpecificationExprContext, TYPE_PARAM, BOUND) + template -void CheckSpecificationExpr( - const A &, parser::ContextualMessages &, const semantics::Scope &); +void CheckSpecificationExpr(const A &, parser::ContextualMessages &, + const semantics::Scope &, const IntrinsicProcTable &, + SpecificationExprContext); extern template void CheckSpecificationExpr(const Expr &x, - parser::ContextualMessages &, const semantics::Scope &); + parser::ContextualMessages &, const semantics::Scope &, + const IntrinsicProcTable &, SpecificationExprContext); extern template void CheckSpecificationExpr(const Expr &x, - parser::ContextualMessages &, const semantics::Scope &); + parser::ContextualMessages &, const semantics::Scope &, + const IntrinsicProcTable &, SpecificationExprContext); extern template void CheckSpecificationExpr(const Expr &x, - parser::ContextualMessages &, const semantics::Scope &); + parser::ContextualMessages &, const semantics::Scope &, + const IntrinsicProcTable &, SpecificationExprContext); extern template void CheckSpecificationExpr( const std::optional> &x, parser::ContextualMessages &, - const semantics::Scope &); + const semantics::Scope &, const IntrinsicProcTable &, + SpecificationExprContext); extern template void CheckSpecificationExpr( const std::optional> &x, parser::ContextualMessages &, - const semantics::Scope &); + const semantics::Scope &, const IntrinsicProcTable &, + SpecificationExprContext); extern template void CheckSpecificationExpr( const std::optional> &x, - parser::ContextualMessages &, const semantics::Scope &); + parser::ContextualMessages &, const semantics::Scope &, + const IntrinsicProcTable &, SpecificationExprContext); // Simple contiguity (9.5.4) template diff --git a/flang/include/flang/Evaluate/intrinsics.h b/flang/include/flang/Evaluate/intrinsics.h index fc79638189193..88d6a7af13eb7 100644 --- a/flang/include/flang/Evaluate/intrinsics.h +++ b/flang/include/flang/Evaluate/intrinsics.h @@ -55,6 +55,11 @@ struct SpecificIntrinsicFunctionInterface : public characteristics::Procedure { // All argument and result types are intrinsic types with default kinds. }; +// Generic intrinsic classes from table 16.1 +ENUM_CLASS(IntrinsicClass, atomicSubroutine, collectiveSubroutine, + elementalFunction, elementalSubroutine, inquiryFunction, pureSubroutine, + impureSubroutine, transformationalFunction, noClass) + class IntrinsicProcTable { private: class Implementation; @@ -68,6 +73,9 @@ class IntrinsicProcTable { // statement. bool IsIntrinsic(const std::string &) const; + // Inquiry intrinsics are defined in section 16.7, table 16.1 + IntrinsicClass GetIntrinsicClass(const std::string &) const; + // Probe the intrinsics for a match against a specific call. // On success, the actual arguments are transferred to the result // in dummy argument order; on failure, the actual arguments remain diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h new file mode 100644 index 0000000000000..9f6936e34e17b --- /dev/null +++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h @@ -0,0 +1,34 @@ +//===-- Optimizer/CodeGen/CodeGen.h -- code generation ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef OPTIMIZER_CODEGEN_CODEGEN_H +#define OPTIMIZER_CODEGEN_CODEGEN_H + +#include + +namespace llvm { +class raw_ostream; +} +namespace mlir { +class Pass; +} + +namespace fir { + +struct NameUniquer; + +/// Convert FIR to the LLVM IR dialect +std::unique_ptr createFIRToLLVMPass(NameUniquer &uniquer); + +/// Convert the LLVM IR dialect to LLVM-IR proper +std::unique_ptr +createLLVMDialectToLLVMPass(llvm::raw_ostream &output); + +} // namespace fir + +#endif // OPTIMIZER_CODEGEN_CODEGEN_H diff --git a/flang/include/flang/Optimizer/Dialect/FIRDialect.h b/flang/include/flang/Optimizer/Dialect/FIRDialect.h index 7a8fc18937fc2..92fd23b5044f8 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRDialect.h +++ b/flang/include/flang/Optimizer/Dialect/FIRDialect.h @@ -13,20 +13,6 @@ #include "mlir/InitAllDialects.h" #include "mlir/InitAllPasses.h" -namespace llvm { -class raw_ostream; -class StringRef; -} // namespace llvm - -namespace mlir { -class Attribute; -class DialectAsmParser; -class DialectAsmPrinter; -class Location; -class MLIRContext; -class Type; -} // namespace mlir - namespace fir { /// FIR dialect diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h index f5763693f7bb5..df43f4dc6ee1d 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.h +++ b/flang/include/flang/Optimizer/Dialect/FIROps.h @@ -10,6 +10,8 @@ #define OPTIMIZER_DIALECT_FIROPS_H #include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Interfaces/LoopLikeInterface.h" +#include "mlir/Interfaces/SideEffects.h" using namespace mlir; @@ -19,10 +21,10 @@ class FirEndOp; class LoopOp; class RealAttr; -void buildCmpFOp(mlir::Builder *builder, mlir::OperationState &result, +void buildCmpFOp(mlir::OpBuilder &builder, mlir::OperationState &result, mlir::CmpFPredicate predicate, mlir::Value lhs, mlir::Value rhs); -void buildCmpCOp(mlir::Builder *builder, mlir::OperationState &result, +void buildCmpCOp(mlir::OpBuilder &builder, mlir::OperationState &result, mlir::CmpFPredicate predicate, mlir::Value lhs, mlir::Value rhs); unsigned getCaseArgumentOffset(llvm::ArrayRef cases, diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 2011ca9aaa6fb..383256c3916f4 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -14,7 +14,9 @@ #ifndef FIR_DIALECT_FIR_OPS #define FIR_DIALECT_FIR_OPS +include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" +include "mlir/Interfaces/LoopLikeInterface.td" include "mlir/Interfaces/SideEffects.td" def fir_Dialect : Dialect { @@ -138,7 +140,7 @@ class fir_SimpleOp traits> // Base builder for allocate operations def fir_AllocateOpBuilder : OpBuilder< - "Builder *builder, OperationState &result, Type inType," + "OpBuilder &builder, OperationState &result, Type inType," "ValueRange lenParams = {}, ValueRange sizes = {}," "ArrayRef attributes = {}", [{ @@ -149,19 +151,19 @@ def fir_AllocateOpBuilder : OpBuilder< }]>; def fir_NamedAllocateOpBuilder : OpBuilder< - "Builder *builder, OperationState &result, Type inType, StringRef name," + "OpBuilder &builder, OperationState &result, Type inType, StringRef name," "ValueRange lenParams = {}, ValueRange sizes = {}," "ArrayRef attributes = {}", [{ result.addTypes(getRefTy(inType)); result.addAttribute("in_type", TypeAttr::get(inType)); - result.addAttribute("name", builder->getStringAttr(name)); + result.addAttribute("name", builder.getStringAttr(name)); result.addOperands(sizes); result.addAttributes(attributes); }]>; def fir_OneResultOpBuilder : OpBuilder< - "Builder *, OperationState &result, Type resultType," + "OpBuilder &, OperationState &result, Type resultType," "ValueRange operands, ArrayRef attributes = {}", [{ if (resultType) @@ -194,8 +196,9 @@ class fir_AllocatableBaseOp traits = []> : ); } -class fir_AllocatableOp traits =[]> : - fir_AllocatableBaseOp, +class fir_AllocatableOp traits = []> : + fir_AllocatableBaseOp])>, fir_TwoBuilders, Arguments<(ins TypeAttr:$in_type, Variadic:$args)> { @@ -262,18 +265,27 @@ class fir_AllocatableOp traits =[]> : static constexpr llvm::StringRef inType() { return "in_type"; } static constexpr llvm::StringRef lenpName() { return "len_param_count"; } mlir::Type getAllocatedType(); + bool hasLenParams() { return bool{getAttr(lenpName())}; } + unsigned numLenParams() { if (auto val = getAttrOfType(lenpName())) return val.getInt(); return 0; } + operand_range getLenParams() { return {operand_begin(), operand_begin() + numLenParams()}; } + + unsigned numShapeOperands() { + return operand_end() - operand_begin() + numLenParams(); + } + operand_range getShapeOperands() { return {operand_begin() + numLenParams(), operand_end()}; } + static mlir::Type getRefTy(mlir::Type ty); /// Get the input type of the allocation @@ -285,14 +297,16 @@ class fir_AllocatableOp traits =[]> : // Verify checks common to all allocation operations string allocVerify = [{ llvm::SmallVector visited; - if (verifyInType(getInType(), visited)) + if (verifyInType(getInType(), visited, numShapeOperands())) return emitOpError("invalid type for allocation"); if (verifyRecordLenParams(getInType(), numLenParams())) return emitOpError("LEN params do not correspond to type"); }]; } +//===----------------------------------------------------------------------===// // Memory SSA operations +//===----------------------------------------------------------------------===// def fir_AllocaOp : fir_AllocatableOp<"alloca"> { let summary = "allocate storage for a temporary on the stack given a type"; @@ -337,7 +351,7 @@ def fir_AllocaOp : fir_AllocatableOp<"alloca"> { }]; } -def fir_LoadOp : fir_OneResultOp<"load", []> { +def fir_LoadOp : fir_OneResultOp<"load", [MemoryEffects<[MemRead]>]> { let summary = "load a value from a memory reference"; let description = [{ Load a value from a memory reference into an ssa-value (virtual register). @@ -356,7 +370,7 @@ def fir_LoadOp : fir_OneResultOp<"load", []> { let arguments = (ins AnyReferenceLike:$memref); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value refVal", + "OpBuilder &builder, OperationState &result, Value refVal", [{ if (!refVal) { mlir::emitError(result.location, "LoadOp has null argument"); @@ -395,7 +409,7 @@ def fir_LoadOp : fir_OneResultOp<"load", []> { }]; } -def fir_StoreOp : fir_Op<"store", []> { +def fir_StoreOp : fir_Op<"store", [MemoryEffects<[MemWrite]>]> { let summary = "store an SSA-value to a memory location"; let description = [{ @@ -472,8 +486,7 @@ def fir_UndefOp : fir_OneResultOp<"undefined", [NoSideEffect]> { let assemblyFormat = "type($intype) attr-dict"; let verifier = [{ - if (auto ref = getType().dyn_cast()) - return emitOpError("undefined values of type !fir.ref not allowed"); + // allow `undef : ref` since it is a possible from transformations return mlir::success(); }]; } @@ -507,7 +520,7 @@ def fir_AllocMemOp : fir_AllocatableOp<"allocmem"> { }]; } -def fir_FreeMemOp : fir_Op<"freemem", []> { +def fir_FreeMemOp : fir_Op<"freemem", [MemoryEffects<[MemFree]>]> { let summary = "free a heap object"; let description = [{ @@ -529,7 +542,8 @@ def fir_FreeMemOp : fir_Op<"freemem", []> { let assemblyFormat = "$heapref attr-dict `:` type($heapref)"; } -//===----------------------------------------------------------------------===//// Terminator operations +//===----------------------------------------------------------------------===// +// Terminator operations //===----------------------------------------------------------------------===// class fir_SwitchTerminatorOp traits = []> : @@ -546,29 +560,15 @@ class fir_SwitchTerminatorOp traits = []> : let successors = (successor VariadicSuccessor:$targets); - let builders = [OpBuilder< - "Builder *, OperationState &result, Value selector," - "ValueRange properOperands, ArrayRef destinations," - "ArrayRef operands = {}," - "ArrayRef attributes = {}", - [{ - result.addOperands(selector); - result.addOperands(properOperands); - for (auto kvp : llvm::zip(destinations, operands)) { - result.addSuccessors(std::get<0>(kvp)); - result.addOperands(std::get<1>(kvp)); - } - result.addAttributes(attributes); - }] - >]; - string extraSwitchClassDeclaration = [{ using Conditions = mlir::Value; static constexpr llvm::StringRef getCasesAttr() { return "case_tags"; } // The number of destination conditions that may be tested - unsigned getNumConditions() { return getNumDest(); } + unsigned getNumConditions() { + return getAttrOfType(getCasesAttr()).size(); + } // The selector is the value being tested to determine the destination mlir::Value getSelector() { return selector(); } @@ -585,6 +585,7 @@ class fir_SwitchTerminatorOp traits = []> : llvm::Optional> getSuccessorOperands( llvm::ArrayRef operands, unsigned cond); + using BranchOpInterfaceTrait::getSuccessorOperands; // Helper function to deal with Optional operand forms void printSuccessorAtIndex(mlir::OpAsmPrinter &p, unsigned i) { @@ -595,12 +596,51 @@ class fir_SwitchTerminatorOp traits = []> : else p.printSuccessor(succ); } + + unsigned targetOffsetSize(); }]; } class fir_IntegralSwitchTerminatorOp traits = []> : fir_SwitchTerminatorOp { + let skipDefaultBuilders = 1; + let builders = [OpBuilder< + "OpBuilder &builder, OperationState &result, Value selector," + "ArrayRef compareOperands, ArrayRef destinations," + "ArrayRef destOperands = {}," + "ArrayRef attributes = {}", + [{ + result.addOperands(selector); + llvm::SmallVector ivalues; + for (auto iv : compareOperands) + ivalues.push_back(builder.getI64IntegerAttr(iv)); + ivalues.push_back(builder.getUnitAttr()); + result.addAttribute(getCasesAttr(), builder.getArrayAttr(ivalues)); + const auto count = destinations.size(); + for (auto d : destinations) + result.addSuccessors(d); + const auto opCount = destOperands.size(); + llvm::SmallVector argOffs; + int32_t sumArgs = 0; + for (std::remove_const_t i = 0; i != count; ++i) { + if (i < opCount) { + result.addOperands(destOperands[i]); + const auto argSz = destOperands[i].size(); + argOffs.push_back(argSz); + sumArgs += argSz; + } else { + argOffs.push_back(0); + } + } + result.addAttribute(getOperandSegmentSizeAttr(), + builder.getI32VectorAttr({1, 0, sumArgs})); + result.addAttribute(getTargetOffsetAttr(), + builder.getI32VectorAttr(argOffs)); + result.addAttributes(attributes); + }] + >]; + let parser = [{ mlir::OpAsmParser::OperandType selector; mlir::Type type; @@ -673,14 +713,17 @@ class fir_IntegralSwitchTerminatorOp())) return emitOpError("must be an integer"); auto cases = getAttrOfType(getCasesAttr()).getValue(); - auto count = getNumConditions(); + auto count = getNumDest(); + if (count == 0) + return emitOpError("must have at least one successor"); + if (getNumConditions() != count) + return emitOpError("number of cases and targets don't match"); + if (targetOffsetSize() != count) + return emitOpError("incorrect number of successor operand groups"); for (decltype(count) i = 0; i != count; ++i) { auto &attr = cases[i]; - if (attr.isa() || attr.isa()) { - // ok - } else { + if (!(attr.isa() || attr.isa())) return emitOpError("invalid case alternative"); - } } return mlir::success(); }]; @@ -749,6 +792,17 @@ def fir_SelectCaseOp : fir_SwitchTerminatorOp<"select_case"> { ``` }]; + let skipDefaultBuilders = 1; + let builders = [ + OpBuilder<"OpBuilder &builder, OperationState &result, Value selector," + "ArrayRef compareAttrs, ArrayRef cmpOperands," + "ArrayRef destinations, ArrayRef destOperands = {}," + "ArrayRef attributes = {}">, + OpBuilder<"OpBuilder &builder, OperationState &result, Value selector," + "ArrayRef compareAttrs, ArrayRef cmpOpList," + "ArrayRef destinations, ArrayRef destOperands = {}," + "ArrayRef attributes = {}">]; + let parser = "return parseSelectCase(parser, result);"; let printer = [{ @@ -785,23 +839,30 @@ def fir_SelectCaseOp : fir_SwitchTerminatorOp<"select_case"> { getSelector().getType().isa())) return emitOpError("must be an integer, character, or logical"); auto cases = getAttrOfType(getCasesAttr()).getValue(); - auto count = getNumConditions(); + auto count = getNumDest(); + if (count == 0) + return emitOpError("must have at least one successor"); + if (getNumConditions() != count) + return emitOpError("number of conditions and successors don't match"); + if (compareOffsetSize() != count) + return emitOpError("incorrect number of compare operand groups"); + if (targetOffsetSize() != count) + return emitOpError("incorrect number of successor operand groups"); for (decltype(count) i = 0; i != count; ++i) { auto &attr = cases[i]; - if (attr.isa() || - attr.isa() || - attr.isa() || - attr.isa() || - attr.isa()) { - // ok - } else { + if (!(attr.isa() || + attr.isa() || + attr.isa() || + attr.isa() || + attr.isa())) return emitOpError("incorrect select case attribute type"); - } } return mlir::success(); }]; - let extraClassDeclaration = extraSwitchClassDeclaration; + let extraClassDeclaration = extraSwitchClassDeclaration#[{ + unsigned compareOffsetSize(); + }]; } def fir_SelectTypeOp : fir_SwitchTerminatorOp<"select_type"> { @@ -824,6 +885,39 @@ def fir_SelectTypeOp : fir_SwitchTerminatorOp<"select_type"> { ``` }]; + let skipDefaultBuilders = 1; + let builders = [OpBuilder< + "OpBuilder &builder, OperationState &result, Value selector," + "ArrayRef typeOperands," + "ArrayRef destinations, ArrayRef destOperands = {}," + "ArrayRef attributes = {}", + [{ + result.addOperands(selector); + result.addAttribute(getCasesAttr(), builder.getArrayAttr(typeOperands)); + const auto count = destinations.size(); + for (auto d : destinations) + result.addSuccessors(d); + const auto opCount = destOperands.size(); + llvm::SmallVector argOffs; + int32_t sumArgs = 0; + for (std::remove_const_t i = 0; i != count; ++i) { + if (i < opCount) { + result.addOperands(destOperands[i]); + const auto argSz = destOperands[i].size(); + argOffs.push_back(argSz); + sumArgs += argSz; + } else { + argOffs.push_back(0); + } + } + result.addAttribute(getOperandSegmentSizeAttr(), + builder.getI32VectorAttr({1, 0, sumArgs})); + result.addAttribute(getTargetOffsetAttr(), + builder.getI32VectorAttr(argOffs)); + result.addAttributes(attributes); + }] + >]; + let parser = "return parseSelectType(parser, result);"; let printer = [{ @@ -847,15 +941,18 @@ def fir_SelectTypeOp : fir_SwitchTerminatorOp<"select_type"> { if (!(getSelector().getType().isa())) return emitOpError("must be a boxed type"); auto cases = getAttrOfType(getCasesAttr()).getValue(); - auto count = getNumConditions(); + auto count = getNumDest(); + if (count == 0) + return emitOpError("must have at least one successor"); + if (getNumConditions() != count) + return emitOpError("number of conditions and successors don't match"); + if (targetOffsetSize() != count) + return emitOpError("incorrect number of successor operand groups"); for (decltype(count) i = 0; i != count; ++i) { auto &attr = cases[i]; - if (attr.isa() || attr.isa() || - attr.isa()) { - // ok - } else { + if (!(attr.isa() || attr.isa() || + attr.isa())) return emitOpError("invalid type-case alternative"); - } } return mlir::success(); }]; @@ -1015,9 +1112,6 @@ def fir_EmboxOp : fir_Op<"embox", [NoSideEffect]> { } def fir_EmboxCharOp : fir_Op<"emboxchar", [NoSideEffect]> { - let arguments = (ins AnyReferenceLike:$memref, AnyIntegerLike:$len); - let results = (outs fir_BoxCharType); - let summary = "boxes a given CHARACTER reference and its LEN parameter"; let description = [{ @@ -1039,6 +1133,10 @@ def fir_EmboxCharOp : fir_Op<"emboxchar", [NoSideEffect]> { This buffer and its LEN value (10) are wrapped into a pair in `%6`. }]; + let arguments = (ins AnyReferenceLike:$memref, AnyIntegerLike:$len); + + let results = (outs fir_BoxCharType); + let assemblyFormat = [{ $memref `,` $len attr-dict `:` functional-type(operands, results) }]; @@ -1052,7 +1150,6 @@ def fir_EmboxCharOp : fir_Op<"emboxchar", [NoSideEffect]> { } def fir_EmboxProcOp : fir_Op<"emboxproc", [NoSideEffect]> { - let summary = "boxes a given procedure and optional host context"; let description = [{ @@ -1231,6 +1328,8 @@ def fir_BoxAddrOp : fir_SimpleOneResultOp<"box_addr", [NoSideEffect]> { let arguments = (ins fir_BoxType:$val); let results = (outs AnyReferenceLike); + + let hasFolder = 1; } def fir_BoxCharLenOp : fir_SimpleOp<"boxchar_len", [NoSideEffect]> { @@ -1248,6 +1347,8 @@ def fir_BoxCharLenOp : fir_SimpleOp<"boxchar_len", [NoSideEffect]> { let arguments = (ins fir_BoxCharType:$val); let results = (outs AnyIntegerLike); + + let hasFolder = 1; } def fir_BoxDimsOp : fir_Op<"box_dims", [NoSideEffect]> { @@ -1444,8 +1545,8 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoSideEffect]> { ```mlir %i = ... : index - %h = ... : !fir.heap> - %p = fir.coordinate_of %h, %i : (!fir.heap>, index) -> !fir.ref + %h = ... : !fir.heap> + %p = fir.coordinate_of %h, %i : (!fir.heap>, index) -> !fir.ref ``` In the example, `%p` will be a pointer to the `%i`-th f32 value in the @@ -1456,11 +1557,27 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoSideEffect]> { let results = (outs fir_ReferenceType); - let assemblyFormat = [{ - operands attr-dict `:` functional-type(operands, results) - }]; + let parser = "return parseCoordinateOp(parser, result);"; + let printer = [{ + p << getOperationName() << ' ' << getOperation()->getOperands(); + p.printOptionalAttrDict(getAttrs(), /*elidedAttrs=*/{baseType()}); + p << " : "; + p.printFunctionalType(getOperation()->getOperandTypes(), + getOperation()->getResultTypes()); + }]; + let verifier = [{ + auto refTy = ref().getType(); + if (fir::isa_ref_type(refTy)) { + auto eleTy = fir::dyn_cast_ptrEleTy(refTy); + if (auto arrTy = eleTy.dyn_cast()) { + if (arrTy.hasUnknownShape()) + return emitOpError("cannot find coordinate in unknown shape"); + if (arrTy.getConstantRows() < arrTy.getDimension() - 1) + return emitOpError("cannot find coordinate with unknown extents"); + } + } // Recovering a LEN type parameter only makes sense from a boxed value for (auto co : coor()) if (dyn_cast_or_null(co.getDefiningOp())) { @@ -1469,8 +1586,28 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoSideEffect]> { if (!ref().getType().dyn_cast()) return emitOpError("len_param_index must be used on box type"); } + if (auto attr = getAttr(CoordinateOp::baseType())) { + if (!attr.isa()) + return emitOpError("improperly constructed"); + } else { + return emitOpError("must have base type"); + } return mlir::success(); }]; + + let skipDefaultBuilders = 1; + let builders = [ + OpBuilder<"mlir::OpBuilder &builder, OperationState &result," + "Type type, Value ref, ValueRange coor," + "ArrayRef attrs = {}">, + OpBuilder<"mlir::OpBuilder &builder, OperationState &result," + "Type type, ValueRange operands," + "ArrayRef attrs = {}">]; + + let extraClassDeclaration = [{ + static constexpr llvm::StringRef baseType() { return "base_type"; } + mlir::Type getBaseType(); + }]; } def fir_ExtractValueOp : fir_OneResultOp<"extract_value", [NoSideEffect]> { @@ -1572,10 +1709,10 @@ def fir_FieldIndexOp : fir_OneResultOp<"field_index", [NoSideEffect]> { }]; let builders = [OpBuilder< - "Builder *builder, OperationState &result, StringRef fieldName," + "OpBuilder &builder, OperationState &result, StringRef fieldName," "Type recTy, ValueRange operands = {}", [{ - result.addAttribute(fieldAttrName(), builder->getStringAttr(fieldName)); + result.addAttribute(fieldAttrName(), builder.getStringAttr(fieldName)); result.addAttribute(typeAttrName(), TypeAttr::get(recTy)); result.addOperands(operands); }] @@ -1601,7 +1738,7 @@ def fir_GenDimsOp : fir_OneResultOp<"gendims", [NoSideEffect]> { stride must not be zero. ```mlir - %d = fir.gendims %l, %u, %s : (index, index, index) -> !fir.dims<1> + %d = fir.gendims %lo, %ext, %str : (index, index, index) -> !fir.dims<1> ``` }]; @@ -1696,9 +1833,9 @@ def fir_LenParamIndexOp : fir_OneResultOp<"len_param_index", [NoSideEffect]> { }]; let builders = [OpBuilder< - "Builder *builder, OperationState &result, StringRef fieldName, Type recTy", + "OpBuilder &builder, OperationState &result, StringRef fieldName, Type recTy", [{ - result.addAttribute(fieldAttrName(), builder->getStringAttr(fieldName)); + result.addAttribute(fieldAttrName(), builder.getStringAttr(fieldName)); result.addAttribute(typeAttrName(), TypeAttr::get(recTy)); }] >]; @@ -1712,20 +1849,52 @@ def fir_LenParamIndexOp : fir_OneResultOp<"len_param_index", [NoSideEffect]> { }]; } +//===----------------------------------------------------------------------===// // Fortran loops +//===----------------------------------------------------------------------===// -def ImplicitFirTerminator : SingleBlockImplicitTerminator<"FirEndOp">; +def fir_ResultOp : fir_Op<"result", [NoSideEffect, ReturnLike, Terminator]> { + let summary = "special terminator for use in fir region operations"; + + let description = [{ + Result takes a list of ssa-values produced in the block and forwards them + as a result to the operation that owns the region of the block. The + operation can retain the values or return them to its parent block + depending upon its semantics. + }]; + + let arguments = (ins Variadic:$results); + let builders = [ + OpBuilder<"OpBuilder &builder, OperationState &result", "/* do nothing */"> + ]; + + let assemblyFormat = "($results^ `:` type($results))? attr-dict"; + + let verifier = [{ return ::verify(*this); }]; +} + +def FirRegionTerminator : SingleBlockImplicitTerminator<"ResultOp">; + +class region_Op traits = []> : + fir_Op { + let printer = [{ return ::print(p, *this); }]; + let verifier = [{ return ::verify(*this); }]; + let parser = [{ return ::parse$cppClass(parser, result); }]; +} -def fir_LoopOp : fir_Op<"loop", [ImplicitFirTerminator]> { +def fir_LoopOp : region_Op<"do_loop", + [DeclareOpInterfaceMethods]> { let summary = "generalized loop operation"; let description = [{ Generalized high-level looping construct. This operation is similar to - MLIR's `loop.for`. An ordered loop will return the final value of `%i`. + MLIR's `loop.for`. ```mlir %l = constant 0 : index %u = constant 9 : index - fir.loop %i = %l to %u unordered { + %s = constant 1 : index + fir.do_loop %i = %l to %u step %s unordered { %x = fir.convert %i : (index) -> i32 %v = fir.call @compute(%x) : (i32) -> f32 %p = fir.coordinate_of %A, %i : (!fir.ref, index) -> !fir.ref @@ -1740,151 +1909,100 @@ def fir_LoopOp : fir_Op<"loop", [ImplicitFirTerminator]> { let arguments = (ins Index:$lowerBound, Index:$upperBound, - Variadic:$optStep, - OptionalAttr:$constantStep, + Index:$step, + Variadic:$initArgs, OptionalAttr:$unordered ); - - let results = (outs Variadic:$lastVal); - + let results = (outs + Variadic:$results + ); let regions = (region SizedRegion<1>:$region); let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"mlir::Builder *builder, OperationState &result," + OpBuilder<"mlir::OpBuilder &builder, OperationState &result," "mlir::Value lowerBound, mlir::Value upperBound," - "ValueRange step = {}, ArrayRef attributes = {}"> + "mlir::Value step, bool unordered = false," + "ValueRange iterArgs = llvm::None," + "ArrayRef attributes = {}"> ]; - let parser = "return parseLoopOp(parser, result);"; - - let printer = [{ - p << getOperationName() << ' ' << getInductionVar() << " = " - << lowerBound() << " to " << upperBound(); - auto s = optStep(); - if (s.begin() != s.end()) { - p << " step "; - p.printOperand(*s.begin()); - } - if (unordered()) - p << " unordered"; - p.printRegion(region(), /*printEntryBlockArgs=*/false, - /*printBlockTerminators=*/false); - p.printOptionalAttrDict(getAttrs(), {unorderedAttrName(), stepAttrName()}); - }]; - - let verifier = [{ - auto step = optStep(); - if (step.begin() != step.end()) { - // FIXME: size of step must be 1 - auto *s = (*step.begin()).getDefiningOp(); - if (auto cst = dyn_cast_or_null(s)) - if (cst.getValue() == 0) - return emitOpError("constant step operand must be nonzero"); - } - - // Check that the body defines as single block argument for the induction - // variable. - auto *body = getBody(); - if (body->getNumArguments() != 1 || - !body->getArgument(0).getType().isIndex()) - return emitOpError("expected body to have a single index argument for " - "the induction variable"); - if (lastVal().size() > 1) - return emitOpError("can only return one final value of iterator"); - return mlir::success(); - }]; - let extraClassDeclaration = [{ - static constexpr const char *unorderedAttrName() { return "unordered"; } - static constexpr const char *stepAttrName() { return "step"; } - - /// Is this an unordered loop? - bool isUnordered() { return getAttr(unorderedAttrName()).isa(); } + static constexpr llvm::StringRef unorderedAttrName() { return "unordered"; } - /// Does loop set (and return) the final value of the control variable? - bool hasLastValue() { return lastVal().size(); } - - /// Get the body of the loop - mlir::Block *getBody() { return ®ion().front(); } - - /// Get the block argument corresponding to the loop control value (PHI) mlir::Value getInductionVar() { return getBody()->getArgument(0); } - - /// Get a builder to insert operations into the LoopOp mlir::OpBuilder getBodyBuilder() { - return mlir::OpBuilder(getBody(), std::prev(getBody()->end())); + return OpBuilder(getBody(), std::prev(getBody()->end())); } - - void setLowerBound(mlir::Value bound) { - getOperation()->setOperand(0, bound); + mlir::Block::BlockArgListType getRegionIterArgs() { + return getBody()->getArguments().drop_front(); } + mlir::Operation::operand_range getIterOperands() { + return getOperands().drop_front(getNumControlOperands()); + } + + void setLowerBound(Value bound) { getOperation()->setOperand(0, bound); } + void setUpperBound(Value bound) { getOperation()->setOperand(1, bound); } + void setStep(Value step) { getOperation()->setOperand(2, step); } - void setUpperBound(mlir::Value bound) { - getOperation()->setOperand(1, bound); + /// Number of region arguments for loop-carried values + unsigned getNumRegionIterArgs() { + return getBody()->getNumArguments() - 1; } + /// Number of operands controlling the loop: lb, ub, step + unsigned getNumControlOperands() { return 3; } + /// Does the operation hold operands for loop-carried values + bool hasIterOperands() { + return getOperation()->getNumOperands() > getNumControlOperands(); + } + /// Get Number of loop-carried values + unsigned getNumIterOperands() { + return getOperation()->getNumOperands() - getNumControlOperands(); + } + + /// Get the body of the loop + mlir::Block *getBody() { return ®ion().front(); } - void setStep(mlir::Value step) { - getOperation()->setOperand(2, step); + void setUnordered() { + getOperation()->setAttr(unorderedAttrName(), + mlir::UnitAttr::get(getContext())); } }]; } -def fir_WhereOp : fir_Op<"where", [ImplicitFirTerminator]> { - let summary = "generalized conditional operation"; +def fir_WhereOp : region_Op<"if"> { + let summary = "if-then-else conditional operation"; let description = [{ - To conditionally execute operations (typically) within the body of a - `fir.loop` operation. This operation is similar to `loop.if`. + Used to conditionally execute operations. This operation is the FIR + dialect's version of `loop.if`. ```mlir %56 = ... : i1 %78 = ... : !fir.ref - fir.where %56 { + fir.if %56 { fir.store %76 to %78 : !fir.ref - } otherwise { + } else { fir.store %77 to %78 : !fir.ref } ``` }]; let arguments = (ins I1:$condition); + let results = (outs Variadic:$results); - let regions = (region SizedRegion<1>:$whereRegion, AnyRegion:$otherRegion); + let regions = (region + SizedRegion<1>:$whereRegion, + AnyRegion:$otherRegion + ); let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, " - "Value cond, bool withOtherRegion"> + OpBuilder<"OpBuilder &builder, OperationState &result, " + "Value cond, bool withOtherRegion">, + OpBuilder<"OpBuilder &builder, OperationState &result, " + "TypeRange resultTypes, Value cond, bool withOtherRegion"> ]; - let parser = [{ return parseWhereOp(parser, result); }]; - - let printer = [{ - p << getOperationName() << ' ' << condition(); - p.printRegion(whereRegion(), /*printEntryBlockArgs=*/false, - /*printBlockTerminators=*/false); - - // Print the 'else' regions if it exists and has a block. - auto &otherReg = otherRegion(); - if (!otherReg.empty()) { - p << " otherwise"; - p.printRegion(otherReg, /*printEntryBlockArgs=*/false, - /*printBlockTerminators=*/false); - } - p.printOptionalAttrDict(getAttrs()); - }]; - - let verifier = [{ - for (auto ®ion : getOperation()->getRegions()) { - if (region.empty()) - continue; - for (auto &b : region) - if (b.getNumArguments() != 0) - return emitOpError("requires that child entry blocks have no args"); - } - return mlir::success(); - }]; - let extraClassDeclaration = [{ mlir::OpBuilder getWhereBodyBuilder() { assert(!whereRegion().empty() && "Unexpected empty 'where' region."); @@ -1899,9 +2017,81 @@ def fir_WhereOp : fir_Op<"where", [ImplicitFirTerminator]> { }]; } +def fir_IterWhileOp : region_Op<"iterate_while", + [DeclareOpInterfaceMethods]> { + let summary = "DO loop with early exit condition"; + let description = [{ + This construct is useful for lowering implied-DO loops. It is very similar + to `fir::LoopOp` with the addition that it requires a single loop-carried + bool value that signals an early exit condition to the operation. A `true` + disposition means the next loop iteration should proceed. A `false` + indicates that the `fir.iterate_while` operation should terminate and + return its iteration arguments. + }]; + + let arguments = (ins + Index:$lowerBound, + Index:$upperBound, + Index:$step, + I1:$iterateIn, + Variadic:$initArgs + ); + let results = (outs + I1:$iterateResult, + Variadic:$results + ); + let regions = (region SizedRegion<1>:$region); + + let skipDefaultBuilders = 1; + let builders = [ + OpBuilder<"mlir::OpBuilder &builder, OperationState &result," + "mlir::Value lowerBound, mlir::Value upperBound," + "mlir::Value step, mlir::Value iterate," + "ValueRange iterArgs = llvm::None," + "ArrayRef attributes = {}"> + ]; + + let extraClassDeclaration = [{ + mlir::Block *getBody() { return ®ion().front(); } + mlir::Value getIterateVar() { return getBody()->getArgument(1); } + mlir::Value getInductionVar() { return getBody()->getArgument(0); } + mlir::OpBuilder getBodyBuilder() { + return mlir::OpBuilder(getBody(), std::prev(getBody()->end())); + } + mlir::Block::BlockArgListType getRegionIterArgs() { + return getBody()->getArguments().drop_front(); + } + mlir::Operation::operand_range getIterOperands() { + return getOperands().drop_front(getNumControlOperands()); + } + + void setLowerBound(Value bound) { getOperation()->setOperand(0, bound); } + void setUpperBound(Value bound) { getOperation()->setOperand(1, bound); } + void setStep(mlir::Value step) { getOperation()->setOperand(2, step); } + + /// Number of region arguments for loop-carried values + unsigned getNumRegionIterArgs() { + return getBody()->getNumArguments() - 1; + } + /// Number of operands controlling the loop + unsigned getNumControlOperands() { return 3; } + /// Does the operation hold operands for loop-carried values + bool hasIterOperands() { + return getOperation()->getNumOperands() > getNumControlOperands(); + } + /// Get Number of loop-carried values + unsigned getNumIterOperands() { + return getOperation()->getNumOperands() - getNumControlOperands(); + } + }]; +} + +//===----------------------------------------------------------------------===// // Procedure call operations +//===----------------------------------------------------------------------===// -def fir_CallOp : fir_Op<"call", []> { +def fir_CallOp : fir_Op<"call", + [MemoryEffects<[MemAlloc, MemFree, MemRead, MemWrite]>]> { let summary = "call a procedure"; let description = [{ @@ -1931,7 +2121,8 @@ def fir_CallOp : fir_Op<"call", []> { }]; } -def fir_DispatchOp : fir_Op<"dispatch", []> { +def fir_DispatchOp : fir_Op<"dispatch", + [MemoryEffects<[MemAlloc, MemFree, MemRead, MemWrite]>]> { let summary = "call a type-bound procedure"; let description = [{ @@ -2032,7 +2223,8 @@ def fir_StringLitOp : fir_Op<"string_lit", [NoSideEffect]> { else if (auto v = val.dyn_cast()) result.attributes.push_back(builder.getNamedAttr(xlist(), v)); else - return mlir::failure(); + return parser.emitError(parser.getCurrentLocation(), + "found an invalid constant"); mlir::IntegerAttr sz; mlir::Type type; if (parser.parseLParen() || @@ -2040,9 +2232,11 @@ def fir_StringLitOp : fir_Op<"string_lit", [NoSideEffect]> { parser.parseRParen() || parser.parseColonType(type)) return mlir::failure(); + if (!(type.isa() || type.isa())) + return parser.emitError(parser.getCurrentLocation(), + "must have character type"); type = fir::SequenceType::get({sz.getInt()}, type); - if (!type || - parser.addTypesToList(type, result.types)) + if (!type || parser.addTypesToList(type, result.types)) return mlir::failure(); return mlir::success(); }]; @@ -2091,7 +2285,7 @@ def fir_StringLitOp : fir_Op<"string_lit", [NoSideEffect]> { class fir_ArithmeticOp traits = []> : fir_Op, + !listconcat(traits, [NoSideEffect, SameOperandsAndResultType])>, Results<(outs AnyType)> { let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); @@ -2102,7 +2296,7 @@ class fir_ArithmeticOp traits = []> : class fir_UnaryArithmeticOp traits = []> : fir_Op, + !listconcat(traits, [NoSideEffect, SameOperandsAndResultType])>, Results<(outs AnyType)> { let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); @@ -2147,9 +2341,15 @@ class RealArithmeticOp traits = []> : fir_ArithmeticOp, Arguments<(ins AnyRealLike:$lhs, AnyRealLike:$rhs)>; -def fir_AddfOp : RealArithmeticOp<"addf", [Commutative]>; -def fir_SubfOp : RealArithmeticOp<"subf">; -def fir_MulfOp : RealArithmeticOp<"mulf", [Commutative]>; +def fir_AddfOp : RealArithmeticOp<"addf", [Commutative]> { + let hasFolder = 1; +} +def fir_SubfOp : RealArithmeticOp<"subf"> { + let hasFolder = 1; +} +def fir_MulfOp : RealArithmeticOp<"mulf", [Commutative]> { + let hasFolder = 1; +} def fir_DivfOp : RealArithmeticOp<"divf">; def fir_ModfOp : RealArithmeticOp<"modf">; // Pow is a builtin call and not a primitive @@ -2168,7 +2368,7 @@ def fir_CmpfOp : fir_Op<"cmpf", let results = (outs AnyLogicalLike); let builders = [OpBuilder< - "Builder *builder, OperationState &result, CmpFPredicate predicate," + "OpBuilder &builder, OperationState &result, CmpFPredicate predicate," "Value lhs, Value rhs", [{ buildCmpFOp(builder, result, predicate, lhs, rhs); }]>]; @@ -2190,8 +2390,7 @@ def fir_CmpfOp : fir_Op<"cmpf", }]; } -def fir_ConstcOp : fir_Op<"constc", [NoSideEffect]>, - Results<(outs fir_ComplexType)> { +def fir_ConstcOp : fir_Op<"constc", [NoSideEffect]> { let summary = "create a complex constant"; let description = [{ @@ -2200,6 +2399,8 @@ def fir_ConstcOp : fir_Op<"constc", [NoSideEffect]>, the standard dialect. }]; + let results = (outs fir_ComplexType); + let parser = [{ fir::RealAttr realp; fir::RealAttr imagp; @@ -2276,7 +2477,7 @@ def fir_CmpcOp : fir_Op<"cmpc", let printer = "printCmpcOp(p, *this);"; let builders = [OpBuilder< - "Builder *builder, OperationState &result, CmpFPredicate predicate," + "OpBuilder &builder, OperationState &result, CmpFPredicate predicate," "Value lhs, Value rhs", [{ buildCmpCOp(builder, result, predicate, lhs, rhs); }]>]; @@ -2297,6 +2498,7 @@ def fir_CmpcOp : fir_Op<"cmpc", def fir_AddrOfOp : fir_OneResultOp<"address_of", [NoSideEffect]> { let summary = "convert a symbol to an SSA value"; + let description = [{ Convert a symbol (a function or global reference) to an SSA-value to be used in other Operations. @@ -2315,6 +2517,7 @@ def fir_AddrOfOp : fir_OneResultOp<"address_of", [NoSideEffect]> { def fir_ConvertOp : fir_OneResultOp<"convert", [NoSideEffect]> { let summary = "encapsulates all Fortran scalar type conversions"; + let description = [{ Generalized type conversion. Convert the ssa value from type T to type U. Not all pairs of types have conversions. When types T and U are the same @@ -2333,6 +2536,32 @@ def fir_ConvertOp : fir_OneResultOp<"convert", [NoSideEffect]> { let assemblyFormat = [{ $value attr-dict `:` functional-type($value, results) }]; + + let hasFolder = 1; + + let verifier = [{ + auto inType = value().getType(); + auto outType = getType(); + if (inType == outType) + return mlir::success(); + if ((isPointerCompatible(inType) && isPointerCompatible(outType)) || + (isIntegerCompatible(inType) && isIntegerCompatible(outType)) || + (isIntegerCompatible(inType) && isFloatCompatible(outType)) || + (isFloatCompatible(inType) && isIntegerCompatible(outType)) || + (isFloatCompatible(inType) && isFloatCompatible(outType)) || + (isIntegerCompatible(inType) && isPointerCompatible(outType)) || + (isPointerCompatible(inType) && isIntegerCompatible(outType)) || + (inType.isa() && outType.isa()) || + (fir::isa_complex(inType) && fir::isa_complex(outType))) + return mlir::success(); + return emitOpError("invalid type conversion"); + }]; + + let extraClassDeclaration = [{ + static bool isIntegerCompatible(mlir::Type ty); + static bool isFloatCompatible(mlir::Type ty); + static bool isPointerCompatible(mlir::Type ty); + }]; } def FortranTypeAttr : Attr()">, @@ -2380,7 +2609,7 @@ def fir_GenTypeDescOp : fir_OneResultOp<"gentypedesc", [NoSideEffect]> { }]; let builders = [ - OpBuilder<"Builder *, OperationState &result, mlir::TypeAttr inty"> + OpBuilder<"OpBuilder &, OperationState &result, mlir::TypeAttr inty"> ]; let verifier = [{ @@ -2403,7 +2632,7 @@ def fir_GenTypeDescOp : fir_OneResultOp<"gentypedesc", [NoSideEffect]> { } def fir_NoReassocOp : fir_OneResultOp<"no_reassoc", - [SameOperandsAndResultType]> { + [NoSideEffect, SameOperandsAndResultType]> { let summary = "synthetic op to prevent reassociation"; let description = [{ Primitive operation meant to intrusively prevent operator reassociation. @@ -2447,101 +2676,86 @@ def fir_GlobalOp : fir_Op<"global", [IsolatedFromAbove, Symbol]> { %z = constant 0 : index %o = constant 1 : index %4 = fir.insert_value %3, %1, %z : (tuple, i32, index) -> tuple - %5 = fir.insert_value %4, %1, %o : (tuple, f32, index) -> tuple - return %5 + %5 = fir.insert_value %4, %2, %o : (tuple, f32, index) -> tuple + fir.has_value %5 : tuple } ``` }]; let arguments = (ins StrAttr:$sym_name, - OptionalAttr:$initval, - UnitAttr:$constant, - TypeAttr:$type + SymbolRefAttr:$symref, + TypeAttr:$type, + OptionalAttr:$initVal, + OptionalAttr:$constant, + OptionalAttr:$linkName ); let regions = (region AtMostRegion<1>:$region); - let parser = [{ - // Parse the name as a symbol reference attribute. - SymbolRefAttr nameAttr; - if (parser.parseAttribute(nameAttr, mlir::SymbolTable::getSymbolAttrName(), - result.attributes)) - return failure(); - - auto &builder = parser.getBuilder(); - auto name = nameAttr.getRootReference(); - result.attributes.back().second = builder.getStringAttr(name); - - bool simpleInitializer = false; - if (!parser.parseOptionalLParen()) { - Attribute attr; - if (parser.parseAttribute(attr, initValAttrName(), result.attributes) || - parser.parseRParen()) - return failure(); - simpleInitializer = true; - } - - if (succeeded(parser.parseOptionalKeyword(constantAttrName()))) { - // if "constant" keyword then mark this as a constant, not a variable - result.addAttribute(constantAttrName(), builder.getUnitAttr()); - } - - mlir::Type globalType; - if (parser.parseColonType(globalType)) - return failure(); - - result.addAttribute(typeAttrName(), mlir::TypeAttr::get(globalType)); - - if (!simpleInitializer) { - // Parse the optional initializer body. - if (parser.parseRegion(*result.addRegion(), llvm::None, llvm::None)) - return failure(); - } - - return success(); - }]; + let parser = "return parseGlobalOp(parser, result);"; let printer = [{ - auto varName = getAttrOfType( - mlir::SymbolTable::getSymbolAttrName()).getValue(); - p << getOperationName() << " @" << varName; - if (auto iv = initval().getValueOr(Attribute())) { - p << '('; - p.printAttribute(iv); - p << ')'; - } + p << getOperationName(); + if (linkName().hasValue()) + p << ' ' << linkName().getValue(); + p << ' '; + p.printAttributeWithoutType(getAttr(symbolAttrName())); + if (auto val = getValueOrNull()) + p << '(' << val << ')'; if (getAttr(constantAttrName())) - p << ' ' << constantAttrName(); + p << " constant"; p << " : "; p.printType(getType()); - Region &body = getOperation()->getRegion(0); - if (!body.empty()) - p.printRegion(body, /*printEntryBlockArgs=*/false, - /*printBlockTerminators=*/true); + if (hasInitializationBody()) + p.printRegion(getOperation()->getRegion(0), /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/true); }]; let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"mlir::Builder *builder, OperationState &result," - "StringRef name, Type type, ArrayRef attrs = {}", - [{ - result.addAttribute(typeAttrName(), mlir::TypeAttr::get(type)); - result.addAttribute(mlir::SymbolTable::getSymbolAttrName(), - builder->getStringAttr(name)); - result.addAttributes(attrs); - }]> + OpBuilder<"mlir::OpBuilder &builder, OperationState &result," + "StringRef name, Type type, ArrayRef attrs = {}">, + OpBuilder<"mlir::OpBuilder &builder, OperationState &result," + "StringRef name, bool isConstant, Type type," + "ArrayRef attrs = {}">, + OpBuilder<"mlir::OpBuilder &builder, OperationState &result," + "StringRef name, Type type, StringAttr linkage = {}," + "ArrayRef attrs = {}">, + OpBuilder<"mlir::OpBuilder &builder, OperationState &result," + "StringRef name, bool isConstant, Type type," + "StringAttr linkage = {}," + "ArrayRef attrs = {}">, + OpBuilder<"mlir::OpBuilder &builder, OperationState &result," + "StringRef name, Type type, Attribute initVal," + "StringAttr linkage = {}," + "ArrayRef attrs = {}">, + OpBuilder<"mlir::OpBuilder &builder, OperationState &result," + "StringRef name, bool isConstant, Type type," + "Attribute initVal, StringAttr linkage = {}," + "ArrayRef attrs = {}">, ]; let extraClassDeclaration = [{ + static constexpr llvm::StringRef symbolAttrName() { return "symref"; } static constexpr llvm::StringRef constantAttrName() { return "constant"; } - static constexpr llvm::StringRef initValAttrName() { return "initval"; } + static constexpr llvm::StringRef initValAttrName() { return "initVal"; } + static constexpr llvm::StringRef linkageAttrName() { return "linkName"; } static constexpr llvm::StringRef typeAttrName() { return "type"; } + /// The printable type of the global mlir::Type getType() { return getAttrOfType(typeAttrName()).getValue(); } + /// The semantic type of the global + mlir::Type resultType() { + return fir::AllocaOp::wrapResultType(getType()); + } + + /// Return the initializer attribute if it exists, or a null attribute. + Attribute getValueOrNull() { return initVal().getValueOr(Attribute()); } + /// Append the next initializer value to the `GlobalOp` to construct /// the variable's initial value. void appendInitialValue(mlir::Operation *op); @@ -2551,6 +2765,19 @@ def fir_GlobalOp : fir_Op<"global", [IsolatedFromAbove, Symbol]> { /// A GlobalOp has one block. mlir::Block &getBlock() { return getRegion().front(); } + + /// Determine if `linkage` is a supported keyword + static mlir::ParseResult verifyValidLinkage(StringRef linkage); + + bool hasInitializationBody() { + return (getOperation()->getNumRegions() == 1) && !getRegion().empty() && + !isa(getBlock().front()); + } + + mlir::FlatSymbolRefAttr getSymbol() { + return mlir::FlatSymbolRefAttr::get(getAttrOfType( + mlir::SymbolTable::getSymbolAttrName()).getValue(), getContext()); + } }]; } @@ -2601,8 +2828,10 @@ def fir_GlobalLenOp : fir_Op<"global_len", []> { }]; } -def fir_DispatchTableOp : fir_Op<"dispatch_table", [IsolatedFromAbove, Symbol, - ImplicitFirTerminator]> { +def ImplicitFirTerminator : SingleBlockImplicitTerminator<"FirEndOp">; + +def fir_DispatchTableOp : fir_Op<"dispatch_table", + [IsolatedFromAbove, Symbol, ImplicitFirTerminator]> { let summary = "Dispatch table definition"; let description = [{ @@ -2663,11 +2892,11 @@ def fir_DispatchTableOp : fir_Op<"dispatch_table", [IsolatedFromAbove, Symbol, let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"mlir::Builder *builder, OperationState *result," + OpBuilder<"mlir::OpBuilder &builder, OperationState *result," "StringRef name, Type type, ArrayRef attrs = {}", [{ result->addAttribute(mlir::SymbolTable::getSymbolAttrName(), - builder->getStringAttr(name)); + builder.getStringAttr(name)); result->addAttributes(attrs); }]> ]; diff --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h index 73174371718a2..3749c87b9e947 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRType.h +++ b/flang/include/flang/Optimizer/Dialect/FIRType.h @@ -24,6 +24,8 @@ class hash_code; namespace mlir { class DialectAsmParser; class DialectAsmPrinter; +class ComplexType; +class FloatType; } // namespace mlir namespace fir { @@ -89,6 +91,19 @@ bool isa_fir_or_std_type(mlir::Type t); /// Is `t` a FIR dialect type that implies a memory (de)reference? bool isa_ref_type(mlir::Type t); +/// Is `t` a type that is always trivially pass-by-reference? +bool isa_passbyref_type(mlir::Type t); + +/// Is `t` a boxed type? +bool isa_box_type(mlir::Type t); + +/// Is `t` a type that can conform to be pass-by-reference? Depending on the +/// context, these types may simply demote to pass-by-reference or a reference +/// to them may have to be passed instead. +inline bool conformsWithPassByRef(mlir::Type t) { + return isa_ref_type(t) || isa_box_type(t); +} + /// Is `t` a FIR dialect aggregate type? bool isa_aggregate(mlir::Type t); @@ -127,6 +142,10 @@ class CplxType : public mlir::Type::TypeBase type. + mlir::Type getElementType() const; + KindTy getFKind() const; }; @@ -324,6 +343,21 @@ class SequenceType : public mlir::Type::TypeBase`) + bool hasUnknownShape() const { return getShape().empty(); } + + /// Is the interior of the sequence constant? Check if the array is + /// one of constant shape (`array`), unknown shape + /// (`array<*xT>`), or rows with shape and ending with column(s) of + /// unknown extent (`array`). + bool hasConstantInterior() const; + /// The value `-1` represents an unknown extent for a dimension static constexpr Extent getUnknownExtent() { return -1; } @@ -394,6 +428,20 @@ mlir::Type parseFirType(FIROpsDialect *, mlir::DialectAsmParser &parser); void printFirType(FIROpsDialect *, mlir::Type ty, mlir::DialectAsmPrinter &p); +/// Guarantee `type` is a scalar integral type (standard Integer, standard +/// Index, or FIR Int). Aborts execution if condition is false. +void verifyIntegralType(mlir::Type type); + +/// Is `t` a FIR Real or MLIR Float type? +inline bool isa_real(mlir::Type t) { + return t.isa() || t.isa(); +} + +/// Is `t` a FIR or MLIR Complex type? +inline bool isa_complex(mlir::Type t) { + return t.isa() || t.isa(); +} + } // namespace fir #endif // OPTIMIZER_DIALECT_FIRTYPE_H diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h index 9c940fe7d6d87..cc203e90bc913 100644 --- a/flang/lib/Decimal/big-radix-floating-point.h +++ b/flang/lib/Decimal/big-radix-floating-point.h @@ -27,6 +27,7 @@ #include "flang/Common/unsigned-const-division.h" #include "flang/Decimal/binary-floating-point.h" #include "flang/Decimal/decimal.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -111,6 +112,8 @@ template class BigRadixFloatingPointNumber { void Minimize( BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more); + llvm::raw_ostream &Dump(llvm::raw_ostream &) const; + private: BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber &that) : digits_{that.digits_}, exponent_{that.exponent_}, @@ -283,14 +286,6 @@ template class BigRadixFloatingPointNumber { } } - template void MultiplyByRounded() { - if (int carry{MultiplyBy()}) { - LoseLeastSignificantDigit(); - digit_[digits_ - 1] += carry; - exponent_ += log10Radix; - } - } - void LoseLeastSignificantDigit(); // with rounding void PushCarry(int carry) { diff --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp index a7af4036f0d62..02e39c241ee1a 100644 --- a/flang/lib/Decimal/binary-to-decimal.cpp +++ b/flang/lib/Decimal/binary-to-decimal.cpp @@ -8,6 +8,8 @@ #include "big-radix-floating-point.h" #include "flang/Decimal/decimal.h" +#include +#include namespace Fortran::decimal { @@ -54,17 +56,18 @@ BigRadixFloatingPointNumber::BigRadixFloatingPointNumber( ++exponent_; } + int overflow{0}; for (; twoPow >= 9; twoPow -= 9) { // D * 10.**E * 2.**twoPow -> (D*(2**9)) * 10.**E * 2.**(twoPow-9) - MultiplyByRounded<512>(); + overflow |= MultiplyBy<512>(); } for (; twoPow >= 3; twoPow -= 3) { // D * 10.**E * 2.**twoPow -> (D*(2**3)) * 10.**E * 2.**(twoPow-3) - MultiplyByRounded<8>(); + overflow |= MultiplyBy<8>(); } for (; twoPow > 0; --twoPow) { // D * 10.**E * 2.**twoPow -> (2*D) * 10.**E * 2.**(twoPow-1) - MultiplyByRounded<2>(); + overflow |= MultiplyBy<2>(); } while (twoPow < 0) { @@ -85,21 +88,23 @@ BigRadixFloatingPointNumber::BigRadixFloatingPointNumber( for (; twoPow <= -4; twoPow += 4) { // D * 10.**E * 2.**twoPow -> 625D * 10.**(E-4) * 2.**(twoPow+4) - MultiplyByRounded<(5 * 5 * 5 * 5)>(); + overflow |= MultiplyBy<(5 * 5 * 5 * 5)>(); exponent_ -= 4; } if (twoPow <= -2) { // D * 10.**E * 2.**twoPow -> 25D * 10.**(E-2) * 2.**(twoPow+2) - MultiplyByRounded<25>(); + overflow |= MultiplyBy<5 * 5>(); twoPow += 2; exponent_ -= 2; } for (; twoPow < 0; ++twoPow) { // D * 10.**E * 2.**twoPow -> 5D * 10.**(E-1) * 2.**(twoPow+1) - MultiplyByRounded<5>(); + overflow |= MultiplyBy<5>(); --exponent_; } + assert(overflow == 0); + // twoPow == 0, the decimal encoding is complete. Normalize(); } @@ -299,37 +304,6 @@ void BigRadixFloatingPointNumber::Minimize( Normalize(); } -template -void BigRadixFloatingPointNumber::LoseLeastSignificantDigit() { - Digit LSD{digit_[0]}; - for (int j{0}; j < digits_ - 1; ++j) { - digit_[j] = digit_[j + 1]; - } - digit_[digits_ - 1] = 0; - bool incr{false}; - switch (rounding_) { - case RoundNearest: - case RoundDefault: - incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); - break; - case RoundUp: - incr = LSD > 0 && !isNegative_; - break; - case RoundDown: - incr = LSD > 0 && isNegative_; - break; - case RoundToZero: - break; - case RoundCompatible: - incr = LSD >= radix / 2; - break; - } - for (int j{0}; (digit_[j] += incr) == radix; ++j) { - digit_[j] = 0; - } -} - template ConversionToDecimalResult ConvertToDecimal(char *buffer, std::size_t size, enum DecimalConversionFlags flags, int digits, @@ -358,12 +332,13 @@ ConversionToDecimalResult ConvertToDecimal(char *buffer, std::size_t size, // decimal sequence in that range. using Binary = typename Big::Real; Binary less{x}; - --less.raw; + less.Previous(); Binary more{x}; if (!x.IsMaximalFiniteMagnitude()) { - ++more.raw; + more.Next(); } number.Minimize(Big{less, rounding}, Big{more, rounding}); + } else { } return number.ConvertToDecimal(buffer, size, flags, digits); } @@ -412,4 +387,22 @@ ConversionToDecimalResult ConvertLongDoubleToDecimal(char *buffer, } #endif } + +template +llvm::raw_ostream &BigRadixFloatingPointNumber::Dump( + llvm::raw_ostream &o) const { + if (isNegative_) { + o << '-'; + } + o << "10**(" << exponent_ << ") * ...\n"; + for (int j{digits_}; --j >= 0;) { + std::string str{std::to_string(digit_[j])}; + o << std::string(20 - str.size(), ' ') << str << " [" << j << ']'; + if (j + 1 == digitLimit_) { + o << " (limit)"; + } + o << '\n'; + } + return o; +} } // namespace Fortran::decimal diff --git a/flang/lib/Decimal/decimal-to-binary.cpp b/flang/lib/Decimal/decimal-to-binary.cpp index f7efb230faf2b..502f0a003d641 100644 --- a/flang/lib/Decimal/decimal-to-binary.cpp +++ b/flang/lib/Decimal/decimal-to-binary.cpp @@ -139,6 +139,37 @@ bool BigRadixFloatingPointNumber::ParseNumber( return true; } +template +void BigRadixFloatingPointNumber::LoseLeastSignificantDigit() { + Digit LSD{digit_[0]}; + for (int j{0}; j < digits_ - 1; ++j) { + digit_[j] = digit_[j + 1]; + } + digit_[digits_ - 1] = 0; + bool incr{false}; + switch (rounding_) { + case RoundNearest: + case RoundDefault: + incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); + break; + case RoundUp: + incr = LSD > 0 && !isNegative_; + break; + case RoundDown: + incr = LSD > 0 && isNegative_; + break; + case RoundToZero: + break; + case RoundCompatible: + incr = LSD >= radix / 2; + break; + } + for (int j{0}; (digit_[j] += incr) == radix; ++j) { + digit_[j] = 0; + } +} + // This local utility class represents an unrounded nonnegative // binary floating-point value with an unbiased (i.e., signed) // binary exponent, an integer value (not a fraction) with an implied diff --git a/flang/lib/Evaluate/check-expression.cpp b/flang/lib/Evaluate/check-expression.cpp index 3f71cb6a1aeaf..43686815ab351 100644 --- a/flang/lib/Evaluate/check-expression.cpp +++ b/flang/lib/Evaluate/check-expression.cpp @@ -7,10 +7,13 @@ //===----------------------------------------------------------------------===// #include "flang/Evaluate/check-expression.h" +#include "flang/Evaluate/intrinsics.h" #include "flang/Evaluate/traverse.h" #include "flang/Evaluate/type.h" #include "flang/Semantics/symbol.h" #include "flang/Semantics/tools.h" +#include +#include namespace Fortran::evaluate { @@ -171,6 +174,7 @@ class IsInitialDataTargetHelper return (*this)(x.left()); } bool operator()(const Relational &) const { return false; } + private: parser::ContextualMessages *messages_; }; @@ -187,8 +191,10 @@ class CheckSpecificationExprHelper public: using Result = std::optional; using Base = AnyTraverse; - explicit CheckSpecificationExprHelper(const semantics::Scope &s) - : Base{*this}, scope_{s} {} + explicit CheckSpecificationExprHelper(const semantics::Scope &s, + const IntrinsicProcTable &table, SpecificationExprContext specExprContext) + : Base{*this}, scope_{s}, table_{table}, specExprContext_{ + specExprContext} {} using Base::operator(); Result operator()(const ProcedureDesignator &) const { @@ -199,6 +205,10 @@ class CheckSpecificationExprHelper Result operator()(const semantics::Symbol &symbol) const { if (semantics::IsNamedConstant(symbol)) { return std::nullopt; + } else if (scope_.IsDerivedType() && IsVariableName(symbol) && + specExprContext_ == SpecificationExprContext::BOUND) { // C750 + return "reference to variable '"s + symbol.name().ToString() + + "' not allowed for derived type components"; } else if (symbol.IsDummy()) { if (symbol.attrs().test(semantics::Attr::OPTIONAL)) { return "reference to OPTIONAL dummy argument '"s + @@ -243,16 +253,51 @@ class CheckSpecificationExprHelper return std::nullopt; } + template + Result operator()(const TypeParamInquiry &inq) const { + if (scope_.IsDerivedType() && !IsConstantExpr(inq) && + inq.parameter().owner() != scope_ && + specExprContext_ == SpecificationExprContext::BOUND) { // C750 + return "non-constant reference to a type parameter inquiry " + "not allowed for derived type components"; + } + return std::nullopt; + } + template Result operator()(const FunctionRef &x) const { if (const auto *symbol{x.proc().GetSymbol()}) { if (!semantics::IsPureProcedure(*symbol)) { return "reference to impure function '"s + symbol->name().ToString() + "'"; } + if (semantics::IsStmtFunction(*symbol)) { + return "reference to statement function '"s + + symbol->name().ToString() + "'"; + } + if (scope_.IsDerivedType() && + specExprContext_ == SpecificationExprContext::BOUND) { // C750 + return "reference to function '"s + symbol->name().ToString() + + "' not allowed for derived type components"; + } // TODO: other checks for standard module procedures } else { const SpecificIntrinsic &intrin{DEREF(x.proc().GetSpecificIntrinsic())}; - if (intrin.name == "present") { + if (scope_.IsDerivedType() && + specExprContext_ == SpecificationExprContext::BOUND) { // C750 + if ((table_.IsIntrinsic(intrin.name) && + badIntrinsicsForComponents_.find(intrin.name) != + badIntrinsicsForComponents_.end()) || + IsProhibitedFunction(intrin.name)) { + return "reference to intrinsic '"s + intrin.name + + "' not allowed for derived type components"; + } + if (table_.GetIntrinsicClass(intrin.name) == + IntrinsicClass::inquiryFunction && + !IsConstantExpr(x)) { + return "non-constant reference to inquiry intrinsic '"s + + intrin.name + "' not allowed for derived type components"; + } + } else if (intrin.name == "present") { return std::nullopt; // no need to check argument(s) } if (IsConstantExpr(x)) { @@ -265,29 +310,42 @@ class CheckSpecificationExprHelper private: const semantics::Scope &scope_; + const IntrinsicProcTable &table_; + const SpecificationExprContext specExprContext_; + const std::set badIntrinsicsForComponents_{ + "allocated", "associated", "extends_type_of", "present", "same_type_as"}; + static bool IsProhibitedFunction(std::string name) { return false; } }; template void CheckSpecificationExpr(const A &x, parser::ContextualMessages &messages, - const semantics::Scope &scope) { - if (auto why{CheckSpecificationExprHelper{scope}(x)}) { + const semantics::Scope &scope, const IntrinsicProcTable &table, + SpecificationExprContext specExprContext) { + if (auto why{ + CheckSpecificationExprHelper{scope, table, specExprContext}(x)}) { messages.Say("Invalid specification expression: %s"_err_en_US, *why); } } template void CheckSpecificationExpr(const Expr &, - parser::ContextualMessages &, const semantics::Scope &); + parser::ContextualMessages &, const semantics::Scope &, + const IntrinsicProcTable &, SpecificationExprContext); template void CheckSpecificationExpr(const Expr &, - parser::ContextualMessages &, const semantics::Scope &); + parser::ContextualMessages &, const semantics::Scope &, + const IntrinsicProcTable &, SpecificationExprContext); template void CheckSpecificationExpr(const Expr &, - parser::ContextualMessages &, const semantics::Scope &); + parser::ContextualMessages &, const semantics::Scope &, + const IntrinsicProcTable &, SpecificationExprContext); template void CheckSpecificationExpr(const std::optional> &, - parser::ContextualMessages &, const semantics::Scope &); + parser::ContextualMessages &, const semantics::Scope &, + const IntrinsicProcTable &, SpecificationExprContext); template void CheckSpecificationExpr(const std::optional> &, - parser::ContextualMessages &, const semantics::Scope &); + parser::ContextualMessages &, const semantics::Scope &, + const IntrinsicProcTable &, SpecificationExprContext); template void CheckSpecificationExpr( const std::optional> &, parser::ContextualMessages &, - const semantics::Scope &); + const semantics::Scope &, const IntrinsicProcTable &, + SpecificationExprContext); // IsSimplyContiguous() -- 9.5.4 class IsSimplyContiguousHelper diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index cbf082bd8ac5a..605b100f42f3a 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -229,6 +229,7 @@ struct IntrinsicInterface { IntrinsicDummyArgument dummy[maxArguments]; TypePattern result; Rank rank{Rank::elemental}; + IntrinsicClass intrinsicClass{IntrinsicClass::elementalFunction}; std::optional Match(const CallCharacteristics &, const common::IntrinsicTypeDefaultKinds &, ActualArguments &, FoldingContext &context) const; @@ -265,19 +266,21 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"aimag", {{"x", SameComplex}}, SameReal}, {"aint", {{"a", SameReal}, MatchingDefaultKIND}, KINDReal}, {"all", {{"mask", SameLogical, Rank::array}, OptionalDIM}, SameLogical, - Rank::dimReduced}, - {"allocated", {{"array", AnyData, Rank::array}}, DefaultLogical}, - {"allocated", {{"scalar", AnyData, Rank::scalar}}, DefaultLogical}, + Rank::dimReduced, IntrinsicClass::transformationalFunction}, + {"allocated", {{"array", AnyData, Rank::array}}, DefaultLogical, + Rank::elemental, IntrinsicClass::inquiryFunction}, + {"allocated", {{"scalar", AnyData, Rank::scalar}}, DefaultLogical, + Rank::elemental, IntrinsicClass::inquiryFunction}, {"anint", {{"a", SameReal}, MatchingDefaultKIND}, KINDReal}, {"any", {{"mask", SameLogical, Rank::array}, OptionalDIM}, SameLogical, - Rank::dimReduced}, + Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"asin", {{"x", SameFloating}}, SameFloating}, {"asind", {{"x", SameFloating}}, SameFloating}, {"asinh", {{"x", SameFloating}}, SameFloating}, {"associated", {{"pointer", Addressable, Rank::known}, {"target", Addressable, Rank::known, Optionality::optional}}, - DefaultLogical}, + DefaultLogical, Rank::elemental, IntrinsicClass::inquiryFunction}, {"atan", {{"x", SameFloating}}, SameFloating}, {"atand", {{"x", SameFloating}}, SameFloating}, {"atan", {{"y", OperandReal}, {"x", OperandReal}}, OperandReal}, @@ -291,14 +294,14 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"bessel_jn", {{"n1", AnyInt, Rank::scalar}, {"n2", AnyInt, Rank::scalar}, {"x", SameReal, Rank::scalar}}, - SameReal, Rank::vector}, + SameReal, Rank::vector, IntrinsicClass::transformationalFunction}, {"bessel_y0", {{"x", SameReal}}, SameReal}, {"bessel_y1", {{"x", SameReal}}, SameReal}, {"bessel_yn", {{"n", AnyInt}, {"x", SameReal}}, SameReal}, {"bessel_yn", {{"n1", AnyInt, Rank::scalar}, {"n2", AnyInt, Rank::scalar}, {"x", SameReal, Rank::scalar}}, - SameReal, Rank::vector}, + SameReal, Rank::vector, IntrinsicClass::transformationalFunction}, {"bge", {{"i", AnyInt, Rank::elementalOrBOZ}, {"j", AnyInt, Rank::elementalOrBOZ}}, @@ -308,7 +311,7 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"j", AnyInt, Rank::elementalOrBOZ}}, DefaultLogical}, {"bit_size", {{"i", SameInt, Rank::anyOrAssumedRank}}, SameInt, - Rank::scalar}, + Rank::scalar, IntrinsicClass::inquiryFunction}, {"ble", {{"i", AnyInt, Rank::elementalOrBOZ}, {"j", AnyInt, Rank::elementalOrBOZ}}, @@ -327,34 +330,36 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"y", AnyIntOrReal, Rank::elementalOrBOZ, Optionality::optional}, DefaultingKIND}, KINDComplex}, - {"command_argument_count", {}, DefaultInt, Rank::scalar}, + {"command_argument_count", {}, DefaultInt, Rank::scalar, + IntrinsicClass::transformationalFunction}, {"conjg", {{"z", SameComplex}}, SameComplex}, {"cos", {{"x", SameFloating}}, SameFloating}, {"cosd", {{"x", SameFloating}}, SameFloating}, {"cosh", {{"x", SameFloating}}, SameFloating}, {"count", {{"mask", AnyLogical, Rank::array}, OptionalDIM, DefaultingKIND}, - KINDInt, Rank::dimReduced}, + KINDInt, Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"cshift", {{"array", SameType, Rank::array}, {"shift", AnyInt, Rank::dimRemoved}, OptionalDIM}, - SameType, Rank::conformable}, + SameType, Rank::conformable, IntrinsicClass::transformationalFunction}, {"dble", {{"a", AnyNumeric, Rank::elementalOrBOZ}}, DoublePrecision}, {"digits", {{"x", AnyIntOrReal, Rank::anyOrAssumedRank}}, DefaultInt, - Rank::scalar}, + Rank::scalar, IntrinsicClass::inquiryFunction}, {"dim", {{"x", OperandIntOrReal}, {"y", OperandIntOrReal}}, OperandIntOrReal}, {"dot_product", {{"vector_a", AnyLogical, Rank::vector}, {"vector_b", AnyLogical, Rank::vector}}, - ResultLogical, Rank::scalar}, + ResultLogical, Rank::scalar, IntrinsicClass::transformationalFunction}, {"dot_product", {{"vector_a", AnyComplex, Rank::vector}, {"vector_b", AnyNumeric, Rank::vector}}, - ResultNumeric, Rank::scalar}, // conjugates vector_a + ResultNumeric, Rank::scalar, // conjugates vector_a + IntrinsicClass::transformationalFunction}, {"dot_product", {{"vector_a", AnyIntOrReal, Rank::vector}, {"vector_b", AnyNumeric, Rank::vector}}, - ResultNumeric, Rank::scalar}, + ResultNumeric, Rank::scalar, IntrinsicClass::transformationalFunction}, {"dprod", {{"x", DefaultReal}, {"y", DefaultReal}}, DoublePrecision}, {"dshiftl", {{"i", SameInt}, {"j", SameInt, Rank::elementalOrBOZ}, @@ -372,68 +377,72 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"boundary", SameIntrinsic, Rank::dimRemoved, Optionality::optional}, OptionalDIM}, - SameIntrinsic, Rank::conformable}, + SameIntrinsic, Rank::conformable, + IntrinsicClass::transformationalFunction}, {"eoshift", {{"array", SameDerivedType, Rank::array}, {"shift", AnyInt, Rank::dimRemoved}, {"boundary", SameDerivedType, Rank::dimRemoved}, OptionalDIM}, - SameDerivedType, Rank::conformable}, + SameDerivedType, Rank::conformable, + IntrinsicClass::transformationalFunction}, {"epsilon", {{"x", SameReal, Rank::anyOrAssumedRank}}, SameReal, - Rank::scalar}, + Rank::scalar, IntrinsicClass::inquiryFunction}, {"erf", {{"x", SameReal}}, SameReal}, {"erfc", {{"x", SameReal}}, SameReal}, {"erfc_scaled", {{"x", SameReal}}, SameReal}, {"exp", {{"x", SameFloating}}, SameFloating}, + {"exp", {{"x", SameFloating}}, SameFloating}, {"exponent", {{"x", AnyReal}}, DefaultInt}, + {"exp", {{"x", SameFloating}}, SameFloating}, {"extends_type_of", {{"a", ExtensibleDerived, Rank::anyOrAssumedRank}, {"mold", ExtensibleDerived, Rank::anyOrAssumedRank}}, - DefaultLogical, Rank::scalar}, + DefaultLogical, Rank::scalar, IntrinsicClass::inquiryFunction}, {"findloc", {{"array", AnyNumeric, Rank::array}, {"value", AnyNumeric, Rank::scalar}, RequiredDIM, OptionalMASK, SizeDefaultKIND, {"back", AnyLogical, Rank::scalar, Optionality::optional}}, - KINDInt, Rank::dimRemoved}, + KINDInt, Rank::dimRemoved, IntrinsicClass::transformationalFunction}, {"findloc", {{"array", AnyNumeric, Rank::array}, {"value", AnyNumeric, Rank::scalar}, OptionalMASK, SizeDefaultKIND, {"back", AnyLogical, Rank::scalar, Optionality::optional}}, - KINDInt, Rank::vector}, + KINDInt, Rank::vector, IntrinsicClass::transformationalFunction}, {"findloc", {{"array", SameChar, Rank::array}, {"value", SameChar, Rank::scalar}, RequiredDIM, OptionalMASK, SizeDefaultKIND, {"back", AnyLogical, Rank::scalar, Optionality::optional}}, - KINDInt, Rank::dimRemoved}, + KINDInt, Rank::dimRemoved, IntrinsicClass::transformationalFunction}, {"findloc", {{"array", SameChar, Rank::array}, {"value", SameChar, Rank::scalar}, OptionalMASK, SizeDefaultKIND, {"back", AnyLogical, Rank::scalar, Optionality::optional}}, - KINDInt, Rank::vector}, + KINDInt, Rank::vector, IntrinsicClass::transformationalFunction}, {"findloc", {{"array", AnyLogical, Rank::array}, {"value", AnyLogical, Rank::scalar}, RequiredDIM, OptionalMASK, SizeDefaultKIND, {"back", AnyLogical, Rank::scalar, Optionality::optional}}, - KINDInt, Rank::dimRemoved}, + KINDInt, Rank::dimRemoved, IntrinsicClass::transformationalFunction}, {"findloc", {{"array", AnyLogical, Rank::array}, {"value", AnyLogical, Rank::scalar}, OptionalMASK, SizeDefaultKIND, {"back", AnyLogical, Rank::scalar, Optionality::optional}}, - KINDInt, Rank::vector}, + KINDInt, Rank::vector, IntrinsicClass::transformationalFunction}, {"floor", {{"a", AnyReal}, DefaultingKIND}, KINDInt}, {"fraction", {{"x", SameReal}}, SameReal}, {"gamma", {{"x", SameReal}}, SameReal}, {"huge", {{"x", SameIntOrReal, Rank::anyOrAssumedRank}}, SameIntOrReal, - Rank::scalar}, + Rank::scalar, IntrinsicClass::inquiryFunction}, {"hypot", {{"x", OperandReal}, {"y", OperandReal}}, OperandReal}, {"iachar", {{"c", AnyChar}, DefaultingKIND}, KINDInt}, {"iall", {{"array", SameInt, Rank::array}, OptionalDIM, OptionalMASK}, - SameInt, Rank::dimReduced}, + SameInt, Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"iany", {{"array", SameInt, Rank::array}, OptionalDIM, OptionalMASK}, - SameInt, Rank::dimReduced}, + SameInt, Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"iparity", {{"array", SameInt, Rank::array}, OptionalDIM, OptionalMASK}, - SameInt, Rank::dimReduced}, + SameInt, Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"iand", {{"i", SameInt}, {"j", SameInt, Rank::elementalOrBOZ}}, SameInt}, {"iand", {{"i", BOZ}, {"j", SameInt}}, SameInt}, {"ibclr", {{"i", SameInt}, {"pos", AnyInt}}, SameInt}, @@ -461,19 +470,20 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"size", AnyInt, Rank::elemental, Optionality::optional}}, SameInt}, {"is_contiguous", {{"array", Addressable, Rank::anyOrAssumedRank}}, - DefaultLogical}, + DefaultLogical, Rank::elemental, IntrinsicClass::inquiryFunction}, {"is_iostat_end", {{"i", AnyInt}}, DefaultLogical}, {"is_iostat_eor", {{"i", AnyInt}}, DefaultLogical}, - {"kind", {{"x", AnyIntrinsic}}, DefaultInt}, + {"kind", {{"x", AnyIntrinsic}}, DefaultInt, Rank::elemental, + IntrinsicClass::inquiryFunction}, {"lbound", {{"array", AnyData, Rank::anyOrAssumedRank}, RequiredDIM, SizeDefaultKIND}, - KINDInt, Rank::scalar}, + KINDInt, Rank::scalar, IntrinsicClass::inquiryFunction}, {"lbound", {{"array", AnyData, Rank::anyOrAssumedRank}, SizeDefaultKIND}, - KINDInt, Rank::vector}, + KINDInt, Rank::vector, IntrinsicClass::inquiryFunction}, {"leadz", {{"i", AnyInt}}, DefaultInt}, {"len", {{"string", AnyChar, Rank::anyOrAssumedRank}, DefaultingKIND}, - KINDInt, Rank::scalar}, + KINDInt, Rank::scalar, IntrinsicClass::inquiryFunction}, {"len_trim", {{"string", AnyChar}, DefaultingKIND}, KINDInt}, {"lge", {{"string_a", SameChar}, {"string_b", SameChar}}, DefaultLogical}, {"lgt", {{"string_a", SameChar}, {"string_b", SameChar}}, DefaultLogical}, @@ -488,27 +498,27 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"matmul", {{"array_a", AnyLogical, Rank::vector}, {"array_b", AnyLogical, Rank::matrix}}, - ResultLogical, Rank::vector}, + ResultLogical, Rank::vector, IntrinsicClass::transformationalFunction}, {"matmul", {{"array_a", AnyLogical, Rank::matrix}, {"array_b", AnyLogical, Rank::vector}}, - ResultLogical, Rank::vector}, + ResultLogical, Rank::vector, IntrinsicClass::transformationalFunction}, {"matmul", {{"array_a", AnyLogical, Rank::matrix}, {"array_b", AnyLogical, Rank::matrix}}, - ResultLogical, Rank::matrix}, + ResultLogical, Rank::matrix, IntrinsicClass::transformationalFunction}, {"matmul", {{"array_a", AnyNumeric, Rank::vector}, {"array_b", AnyNumeric, Rank::matrix}}, - ResultNumeric, Rank::vector}, + ResultNumeric, Rank::vector, IntrinsicClass::transformationalFunction}, {"matmul", {{"array_a", AnyNumeric, Rank::matrix}, {"array_b", AnyNumeric, Rank::vector}}, - ResultNumeric, Rank::vector}, + ResultNumeric, Rank::vector, IntrinsicClass::transformationalFunction}, {"matmul", {{"array_a", AnyNumeric, Rank::matrix}, {"array_b", AnyNumeric, Rank::matrix}}, - ResultNumeric, Rank::matrix}, + ResultNumeric, Rank::matrix, IntrinsicClass::transformationalFunction}, {"maskl", {{"i", AnyInt}, DefaultingKIND}, KINDInt}, {"maskr", {{"i", AnyInt}, DefaultingKIND}, KINDInt}, {"max", @@ -520,15 +530,16 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"a3", SameChar, Rank::elemental, Optionality::repeats}}, SameChar}, {"maxexponent", {{"x", AnyReal, Rank::anyOrAssumedRank}}, DefaultInt, - Rank::scalar}, + Rank::scalar, IntrinsicClass::inquiryFunction}, {"maxloc", {{"array", AnyRelatable, Rank::array}, OptionalDIM, OptionalMASK, SizeDefaultKIND, {"back", AnyLogical, Rank::scalar, Optionality::optional}}, - KINDInt, Rank::dimReduced}, + KINDInt, Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"maxval", {{"array", SameRelatable, Rank::array}, OptionalDIM, OptionalMASK}, - SameRelatable, Rank::dimReduced}, + SameRelatable, Rank::dimReduced, + IntrinsicClass::transformationalFunction}, {"merge", {{"tsource", SameType}, {"fsource", SameType}, {"mask", AnyLogical}}, SameType}, @@ -548,25 +559,26 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"a3", SameChar, Rank::elemental, Optionality::repeats}}, SameChar}, {"minexponent", {{"x", AnyReal, Rank::anyOrAssumedRank}}, DefaultInt, - Rank::scalar}, + Rank::scalar, IntrinsicClass::inquiryFunction}, {"minloc", {{"array", AnyRelatable, Rank::array}, OptionalDIM, OptionalMASK, SizeDefaultKIND, {"back", AnyLogical, Rank::scalar, Optionality::optional}}, - KINDInt, Rank::dimReduced}, + KINDInt, Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"minval", {{"array", SameRelatable, Rank::array}, OptionalDIM, OptionalMASK}, - SameRelatable, Rank::dimReduced}, + SameRelatable, Rank::dimReduced, + IntrinsicClass::transformationalFunction}, {"mod", {{"a", OperandIntOrReal}, {"p", OperandIntOrReal}}, OperandIntOrReal}, {"modulo", {{"a", OperandIntOrReal}, {"p", OperandIntOrReal}}, OperandIntOrReal}, {"nearest", {{"x", SameReal}, {"s", AnyReal}}, SameReal}, {"new_line", {{"x", SameChar, Rank::anyOrAssumedRank}}, SameChar, - Rank::scalar}, + Rank::scalar, IntrinsicClass::inquiryFunction}, {"nint", {{"a", AnyReal}, DefaultingKIND}, KINDInt}, {"norm2", {{"x", SameReal, Rank::array}, OptionalDIM}, SameReal, - Rank::dimReduced}, + Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"not", {{"i", SameInt}}, SameInt}, // NULL() is a special case handled in Probe() below {"out_of_range", @@ -581,24 +593,25 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {{"array", SameType, Rank::array}, {"mask", AnyLogical, Rank::conformable}, {"vector", SameType, Rank::vector, Optionality::optional}}, - SameType, Rank::vector}, + SameType, Rank::vector, IntrinsicClass::transformationalFunction}, {"parity", {{"mask", SameLogical, Rank::array}, OptionalDIM}, SameLogical, - Rank::dimReduced}, + Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"popcnt", {{"i", AnyInt}}, DefaultInt}, {"poppar", {{"i", AnyInt}}, DefaultInt}, {"product", {{"array", SameNumeric, Rank::array}, OptionalDIM, OptionalMASK}, - SameNumeric, Rank::dimReduced}, + SameNumeric, Rank::dimReduced, + IntrinsicClass::transformationalFunction}, {"precision", {{"x", AnyFloating, Rank::anyOrAssumedRank}}, DefaultInt, - Rank::scalar}, + Rank::scalar, IntrinsicClass::inquiryFunction}, {"present", {{"a", Addressable, Rank::anyOrAssumedRank}}, DefaultLogical, - Rank::scalar}, + Rank::scalar, IntrinsicClass::inquiryFunction}, {"radix", {{"x", AnyIntOrReal, Rank::anyOrAssumedRank}}, DefaultInt, - Rank::scalar}, + Rank::scalar, IntrinsicClass::inquiryFunction}, {"range", {{"x", AnyNumeric, Rank::anyOrAssumedRank}}, DefaultInt, - Rank::scalar}, - {"rank", {{"a", AnyData, Rank::anyOrAssumedRank}}, DefaultInt, - Rank::scalar}, + Rank::scalar, IntrinsicClass::inquiryFunction}, + {"rank", {{"a", AnyData, Rank::anyOrAssumedRank}}, DefaultInt, Rank::scalar, + IntrinsicClass::inquiryFunction}, {"real", {{"a", SameComplex, Rank::elemental}}, SameReal}, // 16.9.160(4)(ii) {"real", {{"a", AnyNumeric, Rank::elementalOrBOZ}, DefaultingKIND}, @@ -608,19 +621,19 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"operation", SameType, Rank::reduceOperation}, OptionalDIM, OptionalMASK, {"identity", SameType, Rank::scalar}, {"ordered", AnyLogical, Rank::scalar, Optionality::optional}}, - SameType, Rank::dimReduced}, + SameType, Rank::dimReduced, IntrinsicClass::transformationalFunction}, {"repeat", {{"string", SameChar, Rank::scalar}, {"ncopies", AnyInt}}, - SameChar, Rank::scalar}, + SameChar, Rank::scalar, IntrinsicClass::transformationalFunction}, {"reshape", {{"source", SameType, Rank::array}, {"shape", AnyInt, Rank::shape}, {"pad", SameType, Rank::array, Optionality::optional}, {"order", AnyInt, Rank::vector, Optionality::optional}}, - SameType, Rank::shaped}, + SameType, Rank::shaped, IntrinsicClass::transformationalFunction}, {"rrspacing", {{"x", SameReal}}, SameReal}, {"same_type_as", {{"a", ExtensibleDerived, Rank::anyOrAssumedRank}, {"b", ExtensibleDerived, Rank::anyOrAssumedRank}}, - DefaultLogical, Rank::scalar}, + DefaultLogical, Rank::scalar, IntrinsicClass::inquiryFunction}, {"scale", {{"x", SameReal}, {"i", AnyInt}}, SameReal}, {"scan", {{"string", SameChar}, {"set", SameChar}, @@ -628,27 +641,27 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ DefaultingKIND}, KINDInt}, {"selected_char_kind", {{"name", DefaultChar, Rank::scalar}}, DefaultInt, - Rank::scalar}, + Rank::scalar, IntrinsicClass::transformationalFunction}, {"selected_int_kind", {{"r", AnyInt, Rank::scalar}}, DefaultInt, - Rank::scalar}, + Rank::scalar, IntrinsicClass::transformationalFunction}, {"selected_real_kind", {{"p", AnyInt, Rank::scalar}, {"r", AnyInt, Rank::scalar, Optionality::optional}, {"radix", AnyInt, Rank::scalar, Optionality::optional}}, - DefaultInt, Rank::scalar}, + DefaultInt, Rank::scalar, IntrinsicClass::transformationalFunction}, {"selected_real_kind", {{"p", AnyInt, Rank::scalar, Optionality::optional}, {"r", AnyInt, Rank::scalar}, {"radix", AnyInt, Rank::scalar, Optionality::optional}}, - DefaultInt, Rank::scalar}, + DefaultInt, Rank::scalar, IntrinsicClass::transformationalFunction}, {"selected_real_kind", {{"p", AnyInt, Rank::scalar, Optionality::optional}, {"r", AnyInt, Rank::scalar, Optionality::optional}, {"radix", AnyInt, Rank::scalar}}, - DefaultInt, Rank::scalar}, + DefaultInt, Rank::scalar, IntrinsicClass::transformationalFunction}, {"set_exponent", {{"x", SameReal}, {"i", AnyInt}}, SameReal}, {"shape", {{"source", AnyData, Rank::anyOrAssumedRank}, SizeDefaultKIND}, - KINDInt, Rank::vector}, + KINDInt, Rank::vector, IntrinsicClass::inquiryFunction}, {"shifta", {{"i", SameInt}, {"shift", AnyInt}}, SameInt}, {"shiftl", {{"i", SameInt}, {"shift", AnyInt}}, SameInt}, {"shiftr", {{"i", SameInt}, {"shift", AnyInt}}, SameInt}, @@ -659,45 +672,49 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"size", {{"array", AnyData, Rank::anyOrAssumedRank}, OptionalDIM, SizeDefaultKIND}, - KINDInt, Rank::scalar}, + KINDInt, Rank::scalar, IntrinsicClass::inquiryFunction}, {"spacing", {{"x", SameReal}}, SameReal}, {"spread", {{"source", SameType, Rank::known}, RequiredDIM, {"ncopies", AnyInt, Rank::scalar}}, - SameType, Rank::rankPlus1}, + SameType, Rank::rankPlus1, IntrinsicClass::transformationalFunction}, {"sqrt", {{"x", SameFloating}}, SameFloating}, {"storage_size", {{"a", AnyData, Rank::anyOrAssumedRank}, SizeDefaultKIND}, - KINDInt, Rank::scalar}, + KINDInt, Rank::scalar, IntrinsicClass::inquiryFunction}, {"sum", {{"array", SameNumeric, Rank::array}, OptionalDIM, OptionalMASK}, - SameNumeric, Rank::dimReduced}, + SameNumeric, Rank::dimReduced, + IntrinsicClass::transformationalFunction}, {"tan", {{"x", SameFloating}}, SameFloating}, {"tand", {{"x", SameFloating}}, SameFloating}, {"tanh", {{"x", SameFloating}}, SameFloating}, - {"tiny", {{"x", SameReal, Rank::anyOrAssumedRank}}, SameReal, Rank::scalar}, + {"tiny", {{"x", SameReal, Rank::anyOrAssumedRank}}, SameReal, Rank::scalar, + IntrinsicClass::inquiryFunction}, {"trailz", {{"i", AnyInt}}, DefaultInt}, {"transfer", {{"source", AnyData, Rank::known}, {"mold", SameType, Rank::scalar}}, - SameType, Rank::scalar}, + SameType, Rank::scalar, IntrinsicClass::transformationalFunction}, {"transfer", {{"source", AnyData, Rank::known}, {"mold", SameType, Rank::array}}, - SameType, Rank::vector}, + SameType, Rank::vector, IntrinsicClass::transformationalFunction}, {"transfer", {{"source", AnyData, Rank::anyOrAssumedRank}, {"mold", SameType, Rank::anyOrAssumedRank}, {"size", AnyInt, Rank::scalar}}, - SameType, Rank::vector}, - {"transpose", {{"matrix", SameType, Rank::matrix}}, SameType, Rank::matrix}, - {"trim", {{"string", SameChar, Rank::scalar}}, SameChar, Rank::scalar}, + SameType, Rank::vector, IntrinsicClass::transformationalFunction}, + {"transpose", {{"matrix", SameType, Rank::matrix}}, SameType, Rank::matrix, + IntrinsicClass::transformationalFunction}, + {"trim", {{"string", SameChar, Rank::scalar}}, SameChar, Rank::scalar, + IntrinsicClass::transformationalFunction}, {"ubound", {{"array", AnyData, Rank::anyOrAssumedRank}, RequiredDIM, SizeDefaultKIND}, - KINDInt, Rank::scalar}, + KINDInt, Rank::scalar, IntrinsicClass::inquiryFunction}, {"ubound", {{"array", AnyData, Rank::anyOrAssumedRank}, SizeDefaultKIND}, - KINDInt, Rank::vector}, + KINDInt, Rank::vector, IntrinsicClass::inquiryFunction}, {"unpack", {{"vector", SameType, Rank::vector}, {"mask", AnyLogical, Rank::array}, {"field", SameType, Rank::conformable}}, - SameType, Rank::conformable}, + SameType, Rank::conformable, IntrinsicClass::transformationalFunction}, {"verify", {{"string", SameChar}, {"set", SameChar}, {"back", AnyLogical, Rank::elemental, Optionality::optional}, @@ -900,33 +917,34 @@ static const SpecificIntrinsicInterface specificIntrinsicFunction[]{ }; static const IntrinsicInterface intrinsicSubroutine[]{ - {"cpu_time", {{"time", AnyReal, Rank::scalar}}, {}}, + {"cpu_time", {{"time", AnyReal, Rank::scalar}}, {}, Rank::elemental, + IntrinsicClass::impureSubroutine}, {"date_and_time", {{"date", DefaultChar, Rank::scalar, Optionality::optional}, {"time", DefaultChar, Rank::scalar, Optionality::optional}, {"zone", DefaultChar, Rank::scalar, Optionality::optional}, {"values", AnyInt, Rank::vector, Optionality::optional}}, - {}}, + {}, Rank::elemental, IntrinsicClass::impureSubroutine}, {"execute_command_line", {{"command", DefaultChar, Rank::scalar}, {"wait", AnyLogical, Rank::scalar, Optionality::optional}, {"exitstat", AnyInt, Rank::scalar, Optionality::optional}, {"cmdstat", AnyInt, Rank::scalar, Optionality::optional}, {"cmdmsg", DefaultChar, Rank::scalar, Optionality::optional}}, - {}}, + {}, Rank::elemental, IntrinsicClass::impureSubroutine}, {"get_command", {{"command", DefaultChar, Rank::scalar, Optionality::optional}, {"length", AnyInt, Rank::scalar, Optionality::optional}, {"status", AnyInt, Rank::scalar, Optionality::optional}, {"errmsg", DefaultChar, Rank::scalar, Optionality::optional}}, - {}}, + {}, Rank::elemental, IntrinsicClass::impureSubroutine}, {"get_command_argument", {{"number", AnyInt, Rank::scalar}, {"value", DefaultChar, Rank::scalar, Optionality::optional}, {"length", AnyInt, Rank::scalar, Optionality::optional}, {"status", AnyInt, Rank::scalar, Optionality::optional}, {"errmsg", DefaultChar, Rank::scalar, Optionality::optional}}, - {}}, + {}, Rank::elemental, IntrinsicClass::impureSubroutine}, {"get_environment_variable", {{"name", DefaultChar, Rank::scalar}, {"value", DefaultChar, Rank::scalar, Optionality::optional}, @@ -934,31 +952,34 @@ static const IntrinsicInterface intrinsicSubroutine[]{ {"status", AnyInt, Rank::scalar, Optionality::optional}, {"trim_name", AnyLogical, Rank::scalar, Optionality::optional}, {"errmsg", DefaultChar, Rank::scalar, Optionality::optional}}, - {}}, + {}, Rank::elemental, IntrinsicClass::impureSubroutine}, {"move_alloc", {{"from", SameType, Rank::known}, {"to", SameType, Rank::known}, {"stat", AnyInt, Rank::scalar, Optionality::optional}, {"errmsg", DefaultChar, Rank::scalar, Optionality::optional}}, - {}}, + {}, Rank::elemental, IntrinsicClass::pureSubroutine}, {"mvbits", {{"from", SameInt}, {"frompos", AnyInt}, {"len", AnyInt}, {"to", SameInt}, {"topos", AnyInt}}, - {}}, // elemental + {}, Rank::elemental, IntrinsicClass::elementalSubroutine}, // elemental {"random_init", {{"repeatable", AnyLogical, Rank::scalar}, {"image_distinct", AnyLogical, Rank::scalar}}, - {}}, - {"random_number", {{"harvest", AnyReal, Rank::known}}, {}}, + {}, Rank::elemental, IntrinsicClass::impureSubroutine}, + {"random_number", {{"harvest", AnyReal, Rank::known}}, {}, Rank::elemental, + IntrinsicClass::impureSubroutine}, {"random_seed", {{"size", DefaultInt, Rank::scalar, Optionality::optional}, {"put", DefaultInt, Rank::vector, Optionality::optional}, {"get", DefaultInt, Rank::vector, Optionality::optional}}, - {}}, // TODO: at most one argument can be present + {}, Rank::elemental, + IntrinsicClass::impureSubroutine}, // TODO: at most one argument can be + // present {"system_clock", {{"count", AnyInt, Rank::scalar, Optionality::optional}, {"count_rate", AnyIntOrReal, Rank::scalar, Optionality::optional}, {"count_max", AnyInt, Rank::scalar, Optionality::optional}}, - {}}, + {}, Rank::elemental, IntrinsicClass::impureSubroutine}, }; // TODO: Intrinsic subroutine EVENT_QUERY @@ -1532,6 +1553,8 @@ class IntrinsicProcTable::Implementation { bool IsIntrinsic(const std::string &) const; + IntrinsicClass GetIntrinsicClass(const std::string &) const; + std::optional Probe(const CallCharacteristics &, ActualArguments &, FoldingContext &, const IntrinsicProcTable &) const; @@ -1571,6 +1594,23 @@ bool IntrinsicProcTable::Implementation::IsIntrinsic( return name == "null" || name == "__builtin_c_f_pointer"; } +IntrinsicClass IntrinsicProcTable::Implementation::GetIntrinsicClass( + const std::string &name) const { + auto specificIntrinsic{specificFuncs_.find(name)}; + if (specificIntrinsic != specificFuncs_.end()) { + return specificIntrinsic->second->intrinsicClass; + } + auto genericIntrinsic{genericFuncs_.find(name)}; + if (genericIntrinsic != genericFuncs_.end()) { + return genericIntrinsic->second->intrinsicClass; + } + auto subrIntrinsic{subroutines_.find(name)}; + if (subrIntrinsic != subroutines_.end()) { + return subrIntrinsic->second->intrinsicClass; + } + return IntrinsicClass::noClass; +} + bool CheckAndRearrangeArguments(ActualArguments &arguments, parser::ContextualMessages &messages, const char *const dummyKeywords[], std::size_t trailingOptionals) { @@ -2014,6 +2054,11 @@ bool IntrinsicProcTable::IsIntrinsic(const std::string &name) const { return DEREF(impl_).IsIntrinsic(name); } +IntrinsicClass IntrinsicProcTable::GetIntrinsicClass( + const std::string &name) const { + return DEREF(impl_).GetIntrinsicClass(name); +} + std::optional IntrinsicProcTable::Probe( const CallCharacteristics &call, ActualArguments &arguments, FoldingContext &context) const { diff --git a/flang/lib/Optimizer/Dialect/FIRAttr.cpp b/flang/lib/Optimizer/Dialect/FIRAttr.cpp index 76cccbfb06d32..09780d306fb87 100644 --- a/flang/lib/Optimizer/Dialect/FIRAttr.cpp +++ b/flang/lib/Optimizer/Dialect/FIRAttr.cpp @@ -8,16 +8,11 @@ #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIRDialect.h" -#include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Support/KindMapping.h" #include "mlir/IR/AttributeSupport.h" -#include "mlir/IR/Diagnostics.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/Types.h" -#include "mlir/Parser.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" using namespace fir; diff --git a/flang/lib/Optimizer/Dialect/FIRDialect.cpp b/flang/lib/Optimizer/Dialect/FIRDialect.cpp index 9a56c6bc403e7..6364c24137f2f 100644 --- a/flang/lib/Optimizer/Dialect/FIRDialect.cpp +++ b/flang/lib/Optimizer/Dialect/FIRDialect.cpp @@ -10,8 +10,6 @@ #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIRType.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" -#include "mlir/IR/StandardTypes.h" using namespace fir; diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index f62e1c4ced18a..2e67c86486fc0 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -10,28 +10,35 @@ #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Dialect/FIRType.h" +#include "mlir/Dialect/CommonFolders.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/Function.h" +#include "mlir/IR/Matchers.h" #include "mlir/IR/Module.h" -#include "mlir/IR/StandardTypes.h" -#include "mlir/IR/SymbolTable.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/TypeSwitch.h" using namespace fir; -/// return true if the sequence type is abstract or the record type is malformed -/// or contains an abstract sequence type +/// Return true if a sequence type is of some incomplete size or a record type +/// is malformed or contains an incomplete sequence type. An incomplete sequence +/// type is one with more unknown extents in the type than have been provided +/// via `dynamicExtents`. Sequence types with an unknown rank are incomplete by +/// definition. static bool verifyInType(mlir::Type inType, - llvm::SmallVectorImpl &visited) { + llvm::SmallVectorImpl &visited, + unsigned dynamicExtents = 0) { if (auto st = inType.dyn_cast()) { auto shape = st.getShape(); if (shape.size() == 0) return true; - for (auto ext : shape) - if (ext < 0) + for (std::size_t i = 0, end{shape.size()}; i < end; ++i) { + if (shape[i] != fir::SequenceType::getUnknownExtent()) + continue; + if (dynamicExtents-- == 0) return true; + } } else if (auto rt = inType.dyn_cast()) { // don't recurse if we're already visiting this one if (llvm::is_contained(visited, rt.getName())) @@ -57,6 +64,15 @@ static bool verifyRecordLenParams(mlir::Type inType, unsigned numLenParams) { return false; } +//===----------------------------------------------------------------------===// +// AddfOp +//===----------------------------------------------------------------------===// + +mlir::OpFoldResult fir::AddfOp::fold(llvm::ArrayRef opnds) { + return mlir::constFoldBinaryOp( + opnds, [](APFloat a, APFloat b) { return a + b; }); +} + //===----------------------------------------------------------------------===// // AllocaOp //===----------------------------------------------------------------------===// @@ -100,6 +116,33 @@ mlir::Type fir::AllocMemOp::wrapResultType(mlir::Type intype) { return HeapType::get(intype); } +//===----------------------------------------------------------------------===// +// BoxAddrOp +//===----------------------------------------------------------------------===// + +mlir::OpFoldResult fir::BoxAddrOp::fold(llvm::ArrayRef opnds) { + if (auto v = val().getDefiningOp()) { + if (auto box = dyn_cast(v)) + return box.memref(); + if (auto box = dyn_cast(v)) + return box.memref(); + } + return {}; +} + +//===----------------------------------------------------------------------===// +// BoxCharLenOp +//===----------------------------------------------------------------------===// + +mlir::OpFoldResult +fir::BoxCharLenOp::fold(llvm::ArrayRef opnds) { + if (auto v = val().getDefiningOp()) { + if (auto box = dyn_cast(v)) + return box.len(); + } + return {}; +} + //===----------------------------------------------------------------------===// // BoxDimsOp //===----------------------------------------------------------------------===// @@ -185,13 +228,13 @@ mlir::CmpFPredicate fir::CmpfOp::getPredicateByName(llvm::StringRef name) { return pred.getValue(); } -void fir::buildCmpFOp(Builder *builder, OperationState &result, +void fir::buildCmpFOp(OpBuilder &builder, OperationState &result, CmpFPredicate predicate, Value lhs, Value rhs) { result.addOperands({lhs, rhs}); - result.types.push_back(builder->getI1Type()); + result.types.push_back(builder.getI1Type()); result.addAttribute( CmpfOp::getPredicateAttrName(), - builder->getI64IntegerAttr(static_cast(predicate))); + builder.getI64IntegerAttr(static_cast(predicate))); } template @@ -251,13 +294,13 @@ mlir::ParseResult fir::parseCmpfOp(mlir::OpAsmParser &parser, // CmpcOp //===----------------------------------------------------------------------===// -void fir::buildCmpCOp(Builder *builder, OperationState &result, +void fir::buildCmpCOp(OpBuilder &builder, OperationState &result, CmpFPredicate predicate, Value lhs, Value rhs) { result.addOperands({lhs, rhs}); - result.types.push_back(builder->getI1Type()); + result.types.push_back(builder.getI1Type()); result.addAttribute( fir::CmpcOp::getPredicateAttrName(), - builder->getI64IntegerAttr(static_cast(predicate))); + builder.getI64IntegerAttr(static_cast(predicate))); } static void printCmpcOp(OpAsmPrinter &p, fir::CmpcOp op) { printCmpOp(p, op); } @@ -267,6 +310,103 @@ mlir::ParseResult fir::parseCmpcOp(mlir::OpAsmParser &parser, return parseCmpOp(parser, result); } +//===----------------------------------------------------------------------===// +// ConvertOp +//===----------------------------------------------------------------------===// + +mlir::OpFoldResult fir::ConvertOp::fold(llvm::ArrayRef opnds) { + if (value().getType() == getType()) + return value(); + if (matchPattern(value(), m_Op())) { + auto inner = cast(value().getDefiningOp()); + // (convert (convert 'a : logical -> i1) : i1 -> logical) ==> forward 'a + if (auto toTy = getType().dyn_cast()) + if (auto fromTy = inner.value().getType().dyn_cast()) + if (inner.getType().isa() && (toTy == fromTy)) + return inner.value(); + // (convert (convert 'a : i1 -> logical) : logical -> i1) ==> forward 'a + if (auto toTy = getType().dyn_cast()) + if (auto fromTy = inner.value().getType().dyn_cast()) + if (inner.getType().isa() && (toTy == fromTy) && + (fromTy.getWidth() == 1)) + return inner.value(); + } + return {}; +} + +bool fir::ConvertOp::isIntegerCompatible(mlir::Type ty) { + return ty.isa() || ty.isa() || + ty.isa() || ty.isa() || + ty.isa(); +} + +bool fir::ConvertOp::isFloatCompatible(mlir::Type ty) { + return ty.isa() || ty.isa(); +} + +bool fir::ConvertOp::isPointerCompatible(mlir::Type ty) { + return ty.isa() || ty.isa() || + ty.isa() || ty.isa() || + ty.isa(); +} + +//===----------------------------------------------------------------------===// +// CoordinateOp +//===----------------------------------------------------------------------===// + +static mlir::ParseResult parseCoordinateOp(mlir::OpAsmParser &parser, + mlir::OperationState &result) { + llvm::ArrayRef allOperandTypes; + llvm::ArrayRef allResultTypes; + llvm::SMLoc allOperandLoc = parser.getCurrentLocation(); + llvm::SmallVector allOperands; + if (parser.parseOperandList(allOperands)) + return failure(); + if (parser.parseOptionalAttrDict(result.attributes)) + return failure(); + if (parser.parseColon()) + return failure(); + + mlir::FunctionType funcTy; + if (parser.parseType(funcTy)) + return failure(); + allOperandTypes = funcTy.getInputs(); + allResultTypes = funcTy.getResults(); + result.addTypes(allResultTypes); + if (parser.resolveOperands(allOperands, allOperandTypes, allOperandLoc, + result.operands)) + return failure(); + if (funcTy.getNumInputs()) { + // No inputs handled by verify + result.addAttribute(fir::CoordinateOp::baseType(), + mlir::TypeAttr::get(funcTy.getInput(0))); + } + return success(); +} + +mlir::Type fir::CoordinateOp::getBaseType() { + return getAttr(CoordinateOp::baseType()).cast().getValue(); +} + +void fir::CoordinateOp::build(OpBuilder &, OperationState &result, + mlir::Type resType, ValueRange operands, + ArrayRef attrs) { + assert(operands.size() >= 1u && "mismatched number of parameters"); + result.addOperands(operands); + result.addAttribute(fir::CoordinateOp::baseType(), + mlir::TypeAttr::get(operands[0].getType())); + result.attributes.append(attrs.begin(), attrs.end()); + result.addTypes({resType}); +} + +void fir::CoordinateOp::build(OpBuilder &builder, OperationState &result, + mlir::Type resType, mlir::Value ref, + ValueRange coor, ArrayRef attrs) { + llvm::SmallVector operands{ref}; + operands.append(coor.begin(), coor.end()); + build(builder, result, resType, operands, attrs); +} + //===----------------------------------------------------------------------===// // DispatchOp //===----------------------------------------------------------------------===// @@ -331,7 +471,7 @@ static mlir::ParseResult parseEmboxOp(mlir::OpAsmParser &parser, // GenTypeDescOp //===----------------------------------------------------------------------===// -void fir::GenTypeDescOp::build(Builder *, OperationState &result, +void fir::GenTypeDescOp::build(OpBuilder &, OperationState &result, mlir::TypeAttr inty) { result.addAttribute("in_type", inty); result.addTypes(TypeDescType::get(inty.getValue())); @@ -341,10 +481,288 @@ void fir::GenTypeDescOp::build(Builder *, OperationState &result, // GlobalOp //===----------------------------------------------------------------------===// +static ParseResult parseGlobalOp(OpAsmParser &parser, OperationState &result) { + // Parse the optional linkage + llvm::StringRef linkage; + auto &builder = parser.getBuilder(); + if (mlir::succeeded(parser.parseOptionalKeyword(&linkage))) { + if (fir::GlobalOp::verifyValidLinkage(linkage)) + return failure(); + mlir::StringAttr linkAttr = builder.getStringAttr(linkage); + result.addAttribute(fir::GlobalOp::linkageAttrName(), linkAttr); + } + + // Parse the name as a symbol reference attribute. + mlir::SymbolRefAttr nameAttr; + if (parser.parseAttribute(nameAttr, fir::GlobalOp::symbolAttrName(), + result.attributes)) + return failure(); + result.addAttribute(mlir::SymbolTable::getSymbolAttrName(), + builder.getStringAttr(nameAttr.getRootReference())); + + bool simpleInitializer = false; + if (mlir::succeeded(parser.parseOptionalLParen())) { + Attribute attr; + if (parser.parseAttribute(attr, fir::GlobalOp::initValAttrName(), + result.attributes) || + parser.parseRParen()) + return failure(); + simpleInitializer = true; + } + + if (succeeded(parser.parseOptionalKeyword("constant"))) { + // if "constant" keyword then mark this as a constant, not a variable + result.addAttribute(fir::GlobalOp::constantAttrName(), + builder.getUnitAttr()); + } + + mlir::Type globalType; + if (parser.parseColonType(globalType)) + return failure(); + + result.addAttribute(fir::GlobalOp::typeAttrName(), + mlir::TypeAttr::get(globalType)); + + if (simpleInitializer) { + result.addRegion(); + } else { + // Parse the optional initializer body. + if (parser.parseRegion(*result.addRegion(), llvm::None, llvm::None)) + return failure(); + } + + return success(); +} + void fir::GlobalOp::appendInitialValue(mlir::Operation *op) { getBlock().getOperations().push_back(op); } +void fir::GlobalOp::build(mlir::OpBuilder &builder, OperationState &result, + StringRef name, bool isConstant, Type type, + Attribute initialVal, StringAttr linkage, + ArrayRef attrs) { + result.addRegion(); + result.addAttribute(typeAttrName(), mlir::TypeAttr::get(type)); + result.addAttribute(mlir::SymbolTable::getSymbolAttrName(), + builder.getStringAttr(name)); + result.addAttribute(symbolAttrName(), builder.getSymbolRefAttr(name)); + if (isConstant) + result.addAttribute(constantAttrName(), builder.getUnitAttr()); + if (initialVal) + result.addAttribute(initValAttrName(), initialVal); + if (linkage) + result.addAttribute(linkageAttrName(), linkage); + result.attributes.append(attrs.begin(), attrs.end()); +} + +void fir::GlobalOp::build(mlir::OpBuilder &builder, OperationState &result, + StringRef name, Type type, Attribute initialVal, + StringAttr linkage, ArrayRef attrs) { + build(builder, result, name, /*isConstant=*/false, type, {}, linkage, attrs); +} + +void fir::GlobalOp::build(mlir::OpBuilder &builder, OperationState &result, + StringRef name, bool isConstant, Type type, + StringAttr linkage, ArrayRef attrs) { + build(builder, result, name, isConstant, type, {}, linkage, attrs); +} + +void fir::GlobalOp::build(mlir::OpBuilder &builder, OperationState &result, + StringRef name, Type type, StringAttr linkage, + ArrayRef attrs) { + build(builder, result, name, /*isConstant=*/false, type, {}, linkage, attrs); +} + +void fir::GlobalOp::build(mlir::OpBuilder &builder, OperationState &result, + StringRef name, bool isConstant, Type type, + ArrayRef attrs) { + build(builder, result, name, isConstant, type, StringAttr{}, attrs); +} + +void fir::GlobalOp::build(mlir::OpBuilder &builder, OperationState &result, + StringRef name, Type type, + ArrayRef attrs) { + build(builder, result, name, /*isConstant=*/false, type, attrs); +} + +mlir::ParseResult fir::GlobalOp::verifyValidLinkage(StringRef linkage) { + // Supporting only a subset of the LLVM linkage types for now + static const llvm::SmallVector validNames = { + "internal", "common", "weak"}; + return mlir::success(llvm::is_contained(validNames, linkage)); +} + +//===----------------------------------------------------------------------===// +// IterWhileOp +//===----------------------------------------------------------------------===// + +void fir::IterWhileOp::build(mlir::OpBuilder &builder, + mlir::OperationState &result, mlir::Value lb, + mlir::Value ub, mlir::Value step, + mlir::Value iterate, mlir::ValueRange iterArgs, + llvm::ArrayRef attributes) { + result.addOperands({lb, ub, step, iterate}); + result.addTypes(iterate.getType()); + result.addOperands(iterArgs); + for (auto v : iterArgs) + result.addTypes(v.getType()); + mlir::Region *bodyRegion = result.addRegion(); + bodyRegion->push_back(new Block{}); + bodyRegion->front().addArgument(builder.getIndexType()); + bodyRegion->front().addArgument(iterate.getType()); + bodyRegion->front().addArguments(iterArgs.getTypes()); + result.addAttributes(attributes); +} + +static mlir::ParseResult parseIterWhileOp(mlir::OpAsmParser &parser, + mlir::OperationState &result) { + auto &builder = parser.getBuilder(); + mlir::OpAsmParser::OperandType inductionVariable, lb, ub, step; + if (parser.parseLParen() || parser.parseRegionArgument(inductionVariable) || + parser.parseEqual()) + return mlir::failure(); + + // Parse loop bounds. + auto indexType = builder.getIndexType(); + auto i1Type = builder.getIntegerType(1); + if (parser.parseOperand(lb) || + parser.resolveOperand(lb, indexType, result.operands) || + parser.parseKeyword("to") || parser.parseOperand(ub) || + parser.resolveOperand(ub, indexType, result.operands) || + parser.parseKeyword("step") || parser.parseOperand(step) || + parser.parseRParen() || + parser.resolveOperand(step, indexType, result.operands)) + return mlir::failure(); + + mlir::OpAsmParser::OperandType iterateVar, iterateInput; + if (parser.parseKeyword("and") || parser.parseLParen() || + parser.parseRegionArgument(iterateVar) || parser.parseEqual() || + parser.parseOperand(iterateInput) || parser.parseRParen() || + parser.resolveOperand(iterateInput, i1Type, result.operands)) + return mlir::failure(); + + // Parse the initial iteration arguments. + llvm::SmallVector regionArgs; + // Induction variable. + regionArgs.push_back(inductionVariable); + regionArgs.push_back(iterateVar); + result.addTypes(i1Type); + + if (mlir::succeeded(parser.parseOptionalKeyword("iter_args"))) { + llvm::SmallVector operands; + llvm::SmallVector regionTypes; + // Parse assignment list and results type list. + if (parser.parseAssignmentList(regionArgs, operands) || + parser.parseArrowTypeList(regionTypes)) + return mlir::failure(); + // Resolve input operands. + for (auto operand_type : llvm::zip(operands, regionTypes)) + if (parser.resolveOperand(std::get<0>(operand_type), + std::get<1>(operand_type), result.operands)) + return mlir::failure(); + result.addTypes(regionTypes); + } + + if (parser.parseOptionalAttrDictWithKeyword(result.attributes)) + return mlir::failure(); + + llvm::SmallVector argTypes; + // Induction variable (hidden) + argTypes.push_back(indexType); + // Loop carried variables (including iterate) + argTypes.append(result.types.begin(), result.types.end()); + // Parse the body region. + auto *body = result.addRegion(); + if (regionArgs.size() != argTypes.size()) + return parser.emitError( + parser.getNameLoc(), + "mismatch in number of loop-carried values and defined values"); + + if (parser.parseRegion(*body, regionArgs, argTypes)) + return failure(); + + fir::IterWhileOp::ensureTerminator(*body, builder, result.location); + + return mlir::success(); +} + +static mlir::LogicalResult verify(fir::IterWhileOp op) { + if (auto cst = dyn_cast_or_null(op.step().getDefiningOp())) + if (cst.getValue() <= 0) + return op.emitOpError("constant step operand must be positive"); + + // Check that the body defines as single block argument for the induction + // variable. + auto *body = op.getBody(); + if (!body->getArgument(1).getType().isInteger(1)) + return op.emitOpError( + "expected body second argument to be an index argument for " + "the induction variable"); + if (!body->getArgument(0).getType().isIndex()) + return op.emitOpError( + "expected body first argument to be an index argument for " + "the induction variable"); + + auto opNumResults = op.getNumResults(); + if (opNumResults == 0) + return mlir::failure(); + if (op.getNumIterOperands() != opNumResults) + return op.emitOpError( + "mismatch in number of loop-carried values and defined values"); + if (op.getNumRegionIterArgs() != opNumResults) + return op.emitOpError( + "mismatch in number of basic block args and defined values"); + auto iterOperands = op.getIterOperands(); + auto iterArgs = op.getRegionIterArgs(); + auto opResults = op.getResults(); + unsigned i = 0; + for (auto e : llvm::zip(iterOperands, iterArgs, opResults)) { + if (std::get<0>(e).getType() != std::get<2>(e).getType()) + return op.emitOpError() << "types mismatch between " << i + << "th iter operand and defined value"; + if (std::get<1>(e).getType() != std::get<2>(e).getType()) + return op.emitOpError() << "types mismatch between " << i + << "th iter region arg and defined value"; + + i++; + } + return mlir::success(); +} + +static void print(mlir::OpAsmPrinter &p, fir::IterWhileOp op) { + p << fir::IterWhileOp::getOperationName() << " (" << op.getInductionVar() + << " = " << op.lowerBound() << " to " << op.upperBound() << " step " + << op.step() << ") and ("; + assert(op.hasIterOperands()); + auto regionArgs = op.getRegionIterArgs(); + auto operands = op.getIterOperands(); + p << regionArgs.front() << " = " << *operands.begin() << ")"; + if (regionArgs.size() > 1) { + p << " iter_args("; + llvm::interleaveComma( + llvm::zip(regionArgs.drop_front(), operands.drop_front()), p, + [&](auto it) { p << std::get<0>(it) << " = " << std::get<1>(it); }); + p << ") -> (" << op.getResultTypes().drop_front() << ')'; + } + p.printOptionalAttrDictWithKeyword(op.getAttrs(), {}); + p.printRegion(op.region(), /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/true); +} + +mlir::Region &fir::IterWhileOp::getLoopBody() { return region(); } + +bool fir::IterWhileOp::isDefinedOutsideOfLoop(mlir::Value value) { + return !region().isAncestor(value.getParentRegion()); +} + +mlir::LogicalResult +fir::IterWhileOp::moveOutOfLoop(llvm::ArrayRef ops) { + for (auto op : ops) + op->moveBefore(*this); + return success(); +} + //===----------------------------------------------------------------------===// // LoadOp //===----------------------------------------------------------------------===// @@ -367,65 +785,83 @@ mlir::ParseResult fir::LoadOp::getElementOf(mlir::Type &ele, mlir::Type ref) { // LoopOp //===----------------------------------------------------------------------===// -void fir::LoopOp::build(mlir::Builder *builder, OperationState &result, - mlir::Value lb, mlir::Value ub, ValueRange step, - ArrayRef attributes) { - if (step.empty()) - result.addOperands({lb, ub}); - else - result.addOperands({lb, ub, step[0]}); +void fir::LoopOp::build(mlir::OpBuilder &builder, mlir::OperationState &result, + mlir::Value lb, mlir::Value ub, mlir::Value step, + bool unordered, mlir::ValueRange iterArgs, + llvm::ArrayRef attributes) { + result.addOperands({lb, ub, step}); + result.addOperands(iterArgs); + for (auto v : iterArgs) + result.addTypes(v.getType()); mlir::Region *bodyRegion = result.addRegion(); - LoopOp::ensureTerminator(*bodyRegion, *builder, result.location); - bodyRegion->front().addArgument(builder->getIndexType()); + bodyRegion->push_back(new Block{}); + if (iterArgs.empty()) + LoopOp::ensureTerminator(*bodyRegion, builder, result.location); + bodyRegion->front().addArgument(builder.getIndexType()); + bodyRegion->front().addArguments(iterArgs.getTypes()); + if (unordered) + result.addAttribute(unorderedAttrName(), builder.getUnitAttr()); result.addAttributes(attributes); - NamedAttributeList attrs(attributes); - if (!attrs.get(unorderedAttrName())) - result.addTypes(builder->getIndexType()); } static mlir::ParseResult parseLoopOp(mlir::OpAsmParser &parser, mlir::OperationState &result) { auto &builder = parser.getBuilder(); - OpAsmParser::OperandType inductionVariable, lb, ub, step; + mlir::OpAsmParser::OperandType inductionVariable, lb, ub, step; // Parse the induction variable followed by '='. if (parser.parseRegionArgument(inductionVariable) || parser.parseEqual()) return mlir::failure(); // Parse loop bounds. - mlir::Type indexType = builder.getIndexType(); + auto indexType = builder.getIndexType(); if (parser.parseOperand(lb) || parser.resolveOperand(lb, indexType, result.operands) || parser.parseKeyword("to") || parser.parseOperand(ub) || - parser.resolveOperand(ub, indexType, result.operands)) - return mlir::failure(); + parser.resolveOperand(ub, indexType, result.operands) || + parser.parseKeyword("step") || parser.parseOperand(step) || + parser.resolveOperand(step, indexType, result.operands)) + return failure(); - if (parser.parseOptionalKeyword(fir::LoopOp::stepAttrName())) { - result.addAttribute(fir::LoopOp::stepAttrName(), - builder.getIntegerAttr(builder.getIndexType(), 1)); - } else if (parser.parseOperand(step) || - parser.resolveOperand(step, indexType, result.operands)) { - return mlir::failure(); + if (mlir::succeeded(parser.parseOptionalKeyword("unordered"))) + result.addAttribute(fir::LoopOp::unorderedAttrName(), + builder.getUnitAttr()); + + // Parse the optional initial iteration arguments. + llvm::SmallVector regionArgs, operands; + llvm::SmallVector argTypes; + regionArgs.push_back(inductionVariable); + + if (succeeded(parser.parseOptionalKeyword("iter_args"))) { + // Parse assignment list and results type list. + if (parser.parseAssignmentList(regionArgs, operands) || + parser.parseArrowTypeList(result.types)) + return failure(); + // Resolve input operands. + for (auto operand_type : llvm::zip(operands, result.types)) + if (parser.resolveOperand(std::get<0>(operand_type), + std::get<1>(operand_type), result.operands)) + return failure(); } - // Parse the optional `unordered` keyword - bool isUnordered = false; - if (!parser.parseOptionalKeyword(LoopOp::unorderedAttrName())) { - result.addAttribute(LoopOp::unorderedAttrName(), builder.getUnitAttr()); - isUnordered = true; - } + if (parser.parseOptionalAttrDictWithKeyword(result.attributes)) + return mlir::failure(); + // Induction variable. + argTypes.push_back(indexType); + // Loop carried variables + argTypes.append(result.types.begin(), result.types.end()); // Parse the body region. - mlir::Region *body = result.addRegion(); - if (parser.parseRegion(*body, inductionVariable, indexType)) - return mlir::failure(); + auto *body = result.addRegion(); + if (regionArgs.size() != argTypes.size()) + return parser.emitError( + parser.getNameLoc(), + "mismatch in number of loop-carried values and defined values"); + + if (parser.parseRegion(*body, regionArgs, argTypes)) + return failure(); fir::LoopOp::ensureTerminator(*body, builder, result.location); - // Parse the optional attribute list. - if (parser.parseOptionalAttrDict(result.attributes)) - return mlir::failure(); - if (!isUnordered) - result.addTypes(builder.getIndexType()); return mlir::success(); } @@ -438,6 +874,115 @@ fir::LoopOp fir::getForInductionVarOwner(mlir::Value val) { return dyn_cast_or_null(containingInst); } +// Lifted from loop.loop +static mlir::LogicalResult verify(fir::LoopOp op) { + if (auto cst = dyn_cast_or_null(op.step().getDefiningOp())) + if (cst.getValue() <= 0) + return op.emitOpError("constant step operand must be positive"); + + // Check that the body defines as single block argument for the induction + // variable. + auto *body = op.getBody(); + if (!body->getArgument(0).getType().isIndex()) + return op.emitOpError( + "expected body first argument to be an index argument for " + "the induction variable"); + + auto opNumResults = op.getNumResults(); + if (opNumResults == 0) + return success(); + if (op.getNumIterOperands() != opNumResults) + return op.emitOpError( + "mismatch in number of loop-carried values and defined values"); + if (op.getNumRegionIterArgs() != opNumResults) + return op.emitOpError( + "mismatch in number of basic block args and defined values"); + auto iterOperands = op.getIterOperands(); + auto iterArgs = op.getRegionIterArgs(); + auto opResults = op.getResults(); + unsigned i = 0; + for (auto e : llvm::zip(iterOperands, iterArgs, opResults)) { + if (std::get<0>(e).getType() != std::get<2>(e).getType()) + return op.emitOpError() << "types mismatch between " << i + << "th iter operand and defined value"; + if (std::get<1>(e).getType() != std::get<2>(e).getType()) + return op.emitOpError() << "types mismatch between " << i + << "th iter region arg and defined value"; + + i++; + } + return success(); +} + +static void print(mlir::OpAsmPrinter &p, fir::LoopOp op) { + bool printBlockTerminators = false; + p << fir::LoopOp::getOperationName() << ' ' << op.getInductionVar() << " = " + << op.lowerBound() << " to " << op.upperBound() << " step " << op.step(); + if (op.unordered()) + p << " unordered"; + if (op.hasIterOperands()) { + p << " iter_args("; + auto regionArgs = op.getRegionIterArgs(); + auto operands = op.getIterOperands(); + llvm::interleaveComma(llvm::zip(regionArgs, operands), p, [&](auto it) { + p << std::get<0>(it) << " = " << std::get<1>(it); + }); + p << ") -> (" << op.getResultTypes() << ')'; + printBlockTerminators = true; + } + p.printOptionalAttrDictWithKeyword(op.getAttrs(), + {fir::LoopOp::unorderedAttrName()}); + p.printRegion(op.region(), /*printEntryBlockArgs=*/false, + printBlockTerminators); +} + +mlir::Region &fir::LoopOp::getLoopBody() { return region(); } + +bool fir::LoopOp::isDefinedOutsideOfLoop(mlir::Value value) { + return !region().isAncestor(value.getParentRegion()); +} + +mlir::LogicalResult +fir::LoopOp::moveOutOfLoop(llvm::ArrayRef ops) { + for (auto op : ops) + op->moveBefore(*this); + return success(); +} + +//===----------------------------------------------------------------------===// +// MulfOp +//===----------------------------------------------------------------------===// + +mlir::OpFoldResult fir::MulfOp::fold(llvm::ArrayRef opnds) { + return mlir::constFoldBinaryOp( + opnds, [](APFloat a, APFloat b) { return a * b; }); +} + +//===----------------------------------------------------------------------===// +// ResultOp +//===----------------------------------------------------------------------===// + +static mlir::LogicalResult verify(fir::ResultOp op) { + auto parentOp = op.getParentOp(); + auto results = parentOp->getResults(); + auto operands = op.getOperands(); + + if (isa(parentOp) || isa(parentOp) || + isa(parentOp)) { + if (parentOp->getNumResults() != op.getNumOperands()) + return op.emitOpError() << "parent of result must have same arity"; + for (auto e : llvm::zip(results, operands)) { + if (std::get<0>(e).getType() != std::get<1>(e).getType()) + return op.emitOpError() + << "types mismatch between result op and its parent"; + } + } else { + return op.emitOpError() + << "result only terminates if, do_loop, or iterate_while regions"; + } + return success(); +} + //===----------------------------------------------------------------------===// // SelectOp //===----------------------------------------------------------------------===// @@ -450,14 +995,30 @@ static constexpr llvm::StringRef getTargetOffsetAttr() { return "target_operand_offsets"; } -template +template static A getSubOperands(unsigned pos, A allArgs, - mlir::DenseIntElementsAttr ranges) { + mlir::DenseIntElementsAttr ranges, + AdditionalArgs &&... additionalArgs) { unsigned start = 0; for (unsigned i = 0; i < pos; ++i) start += (*(ranges.begin() + i)).getZExtValue(); - unsigned end = start + (*(ranges.begin() + pos)).getZExtValue(); - return {std::next(allArgs.begin(), start), std::next(allArgs.begin(), end)}; + return allArgs.slice(start, (*(ranges.begin() + pos)).getZExtValue(), + std::forward(additionalArgs)...); +} + +static mlir::MutableOperandRange +getMutableSuccessorOperands(unsigned pos, mlir::MutableOperandRange operands, + StringRef offsetAttr) { + Operation *owner = operands.getOwner(); + NamedAttribute targetOffsetAttr = + *owner->getMutableAttrDict().getNamed(offsetAttr); + return getSubOperands( + pos, operands, targetOffsetAttr.second.cast(), + mlir::MutableOperandRange::OperandSegment(pos, targetOffsetAttr)); +} + +static unsigned denseElementsSize(mlir::DenseIntElementsAttr attr) { + return attr.getNumElements(); } llvm::Optional fir::SelectOp::getCompareOperands(unsigned) { @@ -469,10 +1030,10 @@ fir::SelectOp::getCompareOperands(llvm::ArrayRef, unsigned) { return {}; } -llvm::Optional -fir::SelectOp::getSuccessorOperands(unsigned oper) { - auto a = getAttrOfType(getTargetOffsetAttr()); - return {getSubOperands(oper, targetArgs(), a)}; +llvm::Optional +fir::SelectOp::getMutableSuccessorOperands(unsigned oper) { + return ::getMutableSuccessorOperands(oper, targetArgsMutable(), + getTargetOffsetAttr()); } llvm::Optional> @@ -484,7 +1045,10 @@ fir::SelectOp::getSuccessorOperands(llvm::ArrayRef operands, return {getSubOperands(oper, getSubOperands(2, operands, segments), a)}; } -bool fir::SelectOp::canEraseSuccessorOperand() { return true; } +unsigned fir::SelectOp::targetOffsetSize() { + return denseElementsSize( + getAttrOfType(getTargetOffsetAttr())); +} //===----------------------------------------------------------------------===// // SelectCaseOp @@ -505,10 +1069,10 @@ fir::SelectCaseOp::getCompareOperands(llvm::ArrayRef operands, return {getSubOperands(cond, getSubOperands(1, operands, segments), a)}; } -llvm::Optional -fir::SelectCaseOp::getSuccessorOperands(unsigned oper) { - auto a = getAttrOfType(getTargetOffsetAttr()); - return {getSubOperands(oper, targetArgs(), a)}; +llvm::Optional +fir::SelectCaseOp::getMutableSuccessorOperands(unsigned oper) { + return ::getMutableSuccessorOperands(oper, targetArgsMutable(), + getTargetOffsetAttr()); } llvm::Optional> @@ -520,8 +1084,6 @@ fir::SelectCaseOp::getSuccessorOperands(llvm::ArrayRef operands, return {getSubOperands(oper, getSubOperands(2, operands, segments), a)}; } -bool fir::SelectCaseOp::canEraseSuccessorOperand() { return true; } - // parser for fir.select_case Op static mlir::ParseResult parseSelectCase(mlir::OpAsmParser &parser, mlir::OperationState &result) { @@ -596,6 +1158,94 @@ static mlir::ParseResult parseSelectCase(mlir::OpAsmParser &parser, return mlir::success(); } +unsigned fir::SelectCaseOp::compareOffsetSize() { + return denseElementsSize( + getAttrOfType(getCompareOffsetAttr())); +} + +unsigned fir::SelectCaseOp::targetOffsetSize() { + return denseElementsSize( + getAttrOfType(getTargetOffsetAttr())); +} + +void fir::SelectCaseOp::build(mlir::OpBuilder &builder, + mlir::OperationState &result, + mlir::Value selector, + llvm::ArrayRef compareAttrs, + llvm::ArrayRef cmpOperands, + llvm::ArrayRef destinations, + llvm::ArrayRef destOperands, + llvm::ArrayRef attributes) { + result.addOperands(selector); + result.addAttribute(getCasesAttr(), builder.getArrayAttr(compareAttrs)); + llvm::SmallVector operOffs; + int32_t operSize = 0; + for (auto attr : compareAttrs) { + if (attr.isa()) { + operOffs.push_back(2); + operSize += 2; + } else if (attr.isa()) { + operOffs.push_back(0); + } else { + operOffs.push_back(1); + ++operSize; + } + } + for (auto ops : cmpOperands) + result.addOperands(ops); + result.addAttribute(getCompareOffsetAttr(), + builder.getI32VectorAttr(operOffs)); + const auto count = destinations.size(); + for (auto d : destinations) + result.addSuccessors(d); + const auto opCount = destOperands.size(); + llvm::SmallVector argOffs; + int32_t sumArgs = 0; + for (std::remove_const_t i = 0; i != count; ++i) { + if (i < opCount) { + result.addOperands(destOperands[i]); + const auto argSz = destOperands[i].size(); + argOffs.push_back(argSz); + sumArgs += argSz; + } else { + argOffs.push_back(0); + } + } + result.addAttribute(getOperandSegmentSizeAttr(), + builder.getI32VectorAttr({1, operSize, sumArgs})); + result.addAttribute(getTargetOffsetAttr(), builder.getI32VectorAttr(argOffs)); + result.addAttributes(attributes); +} + +/// This builder has a slightly simplified interface in that the list of +/// operands need not be partitioned by the builder. Instead the operands are +/// partitioned here, before being passed to the default builder. This +/// partitioning is unchecked, so can go awry on bad input. +void fir::SelectCaseOp::build(mlir::OpBuilder &builder, + mlir::OperationState &result, + mlir::Value selector, + llvm::ArrayRef compareAttrs, + llvm::ArrayRef cmpOpList, + llvm::ArrayRef destinations, + llvm::ArrayRef destOperands, + llvm::ArrayRef attributes) { + llvm::SmallVector cmpOpers; + auto iter = cmpOpList.begin(); + for (auto &attr : compareAttrs) { + if (attr.isa()) { + cmpOpers.push_back(mlir::ValueRange({iter, iter + 2})); + iter += 2; + } else if (attr.isa()) { + cmpOpers.push_back(mlir::ValueRange{}); + } else { + cmpOpers.push_back(mlir::ValueRange({iter, iter + 1})); + ++iter; + } + } + build(builder, result, selector, compareAttrs, cmpOpers, destinations, + destOperands, attributes); +} + //===----------------------------------------------------------------------===// // SelectRankOp //===----------------------------------------------------------------------===// @@ -610,10 +1260,10 @@ fir::SelectRankOp::getCompareOperands(llvm::ArrayRef, unsigned) { return {}; } -llvm::Optional -fir::SelectRankOp::getSuccessorOperands(unsigned oper) { - auto a = getAttrOfType(getTargetOffsetAttr()); - return {getSubOperands(oper, targetArgs(), a)}; +llvm::Optional +fir::SelectRankOp::getMutableSuccessorOperands(unsigned oper) { + return ::getMutableSuccessorOperands(oper, targetArgsMutable(), + getTargetOffsetAttr()); } llvm::Optional> @@ -625,7 +1275,10 @@ fir::SelectRankOp::getSuccessorOperands(llvm::ArrayRef operands, return {getSubOperands(oper, getSubOperands(2, operands, segments), a)}; } -bool fir::SelectRankOp::canEraseSuccessorOperand() { return true; } +unsigned fir::SelectRankOp::targetOffsetSize() { + return denseElementsSize( + getAttrOfType(getTargetOffsetAttr())); +} //===----------------------------------------------------------------------===// // SelectTypeOp @@ -641,10 +1294,10 @@ fir::SelectTypeOp::getCompareOperands(llvm::ArrayRef, unsigned) { return {}; } -llvm::Optional -fir::SelectTypeOp::getSuccessorOperands(unsigned oper) { - auto a = getAttrOfType(getTargetOffsetAttr()); - return {getSubOperands(oper, targetArgs(), a)}; +llvm::Optional +fir::SelectTypeOp::getMutableSuccessorOperands(unsigned oper) { + return ::getMutableSuccessorOperands(oper, targetArgsMutable(), + getTargetOffsetAttr()); } llvm::Optional> @@ -656,8 +1309,6 @@ fir::SelectTypeOp::getSuccessorOperands(llvm::ArrayRef operands, return {getSubOperands(oper, getSubOperands(2, operands, segments), a)}; } -bool fir::SelectTypeOp::canEraseSuccessorOperand() { return true; } - static ParseResult parseSelectType(OpAsmParser &parser, OperationState &result) { mlir::OpAsmParser::OperandType selector; @@ -703,6 +1354,11 @@ static ParseResult parseSelectType(OpAsmParser &parser, return mlir::success(); } +unsigned fir::SelectTypeOp::targetOffsetSize() { + return denseElementsSize( + getAttrOfType(getTargetOffsetAttr())); +} + //===----------------------------------------------------------------------===// // StoreOp //===----------------------------------------------------------------------===// @@ -726,18 +1382,27 @@ bool fir::StringLitOp::isWideValue() { return eleTy.cast().getFKind() != 1; } +//===----------------------------------------------------------------------===// +// SubfOp +//===----------------------------------------------------------------------===// + +mlir::OpFoldResult fir::SubfOp::fold(llvm::ArrayRef opnds) { + return mlir::constFoldBinaryOp( + opnds, [](APFloat a, APFloat b) { return a - b; }); +} + //===----------------------------------------------------------------------===// // WhereOp //===----------------------------------------------------------------------===// -void fir::WhereOp::build(mlir::Builder *builder, OperationState &result, +void fir::WhereOp::build(mlir::OpBuilder &builder, OperationState &result, mlir::Value cond, bool withElseRegion) { result.addOperands(cond); mlir::Region *thenRegion = result.addRegion(); mlir::Region *elseRegion = result.addRegion(); - WhereOp::ensureTerminator(*thenRegion, *builder, result.location); + WhereOp::ensureTerminator(*thenRegion, builder, result.location); if (withElseRegion) - WhereOp::ensureTerminator(*elseRegion, *builder, result.location); + WhereOp::ensureTerminator(*elseRegion, builder, result.location); } static mlir::ParseResult parseWhereOp(OpAsmParser &parser, @@ -758,7 +1423,7 @@ static mlir::ParseResult parseWhereOp(OpAsmParser &parser, WhereOp::ensureTerminator(*thenRegion, parser.getBuilder(), result.location); - if (!parser.parseOptionalKeyword("otherwise")) { + if (!parser.parseOptionalKeyword("else")) { if (parser.parseRegion(*elseRegion, {}, {})) return mlir::failure(); WhereOp::ensureTerminator(*elseRegion, parser.getBuilder(), @@ -772,6 +1437,43 @@ static mlir::ParseResult parseWhereOp(OpAsmParser &parser, return mlir::success(); } +static LogicalResult verify(fir::WhereOp op) { + // Verify that the entry of each child region does not have arguments. + for (auto ®ion : op.getOperation()->getRegions()) { + if (region.empty()) + continue; + + for (auto &b : region) + if (b.getNumArguments() != 0) + return op.emitOpError( + "requires that child entry blocks have no arguments"); + } + if (op.getNumResults() != 0 && op.otherRegion().empty()) + return op.emitOpError("must have an else block if defining values"); + + return mlir::success(); +} + +static void print(mlir::OpAsmPrinter &p, fir::WhereOp op) { + bool printBlockTerminators = false; + p << fir::WhereOp::getOperationName() << ' ' << op.condition(); + if (!op.results().empty()) { + p << " -> (" << op.getResultTypes() << ')'; + printBlockTerminators = true; + } + p.printRegion(op.whereRegion(), /*printEntryBlockArgs=*/false, + printBlockTerminators); + + // Print the 'else' regions if it exists and has a block. + auto &otherReg = op.otherRegion(); + if (!otherReg.empty()) { + p << " else"; + p.printRegion(otherReg, /*printEntryBlockArgs=*/false, + printBlockTerminators); + } + p.printOptionalAttrDict(op.getAttrs()); +} + //===----------------------------------------------------------------------===// mlir::ParseResult fir::isValidCaseAttr(mlir::Attribute attr) { @@ -840,6 +1542,7 @@ mlir::FuncOp fir::createFuncOp(mlir::Location loc, mlir::ModuleOp module, if (auto f = module.lookupSymbol(name)) return f; mlir::OpBuilder modBuilder(module.getBodyRegion()); + modBuilder.setInsertionPoint(module.getBody()->getTerminator()); return modBuilder.create(loc, name, type, attrs); } diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp index 1fedd00e5da31..97d169de7e77a 100644 --- a/flang/lib/Optimizer/Dialect/FIRType.cpp +++ b/flang/lib/Optimizer/Dialect/FIRType.cpp @@ -8,15 +8,12 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/FIRDialect.h" -#include "mlir/IR/Builders.h" #include "mlir/IR/Diagnostics.h" -#include "mlir/IR/Dialect.h" #include "mlir/IR/DialectImplementation.h" -#include "mlir/IR/StandardTypes.h" -#include "mlir/Parser.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/ErrorHandling.h" using namespace fir; @@ -181,7 +178,7 @@ SequenceType parseSequence(mlir::DialectAsmParser &parser, mlir::Location) { return SequenceType::get(shape, eleTy, map); } -bool verifyIntegerType(mlir::Type ty) { +static bool verifyIntegerType(mlir::Type ty) { return ty.isa() || ty.isa(); } @@ -842,6 +839,14 @@ bool isa_ref_type(mlir::Type t) { return t.isa() || t.isa() || t.isa(); } +bool isa_box_type(mlir::Type t) { + return t.isa() || t.isa() || t.isa(); +} + +bool isa_passbyref_type(mlir::Type t) { + return t.isa() || isa_box_type(t); +} + bool isa_aggregate(mlir::Type t) { return t.isa() || t.isa(); } @@ -905,6 +910,10 @@ CplxType fir::CplxType::get(mlir::MLIRContext *ctxt, KindTy kind) { return Base::get(ctxt, FIR_COMPLEX, kind); } +mlir::Type fir::CplxType::getElementType() const { + return fir::RealType::get(getContext(), getFKind()); +} + KindTy fir::CplxType::getFKind() const { return getImpl()->getFKind(); } // REAL @@ -1061,6 +1070,34 @@ SequenceType::Shape fir::SequenceType::getShape() const { return getImpl()->getShape(); } +unsigned fir::SequenceType::getConstantRows() const { + auto shape = getShape(); + unsigned count = 0; + for (auto d : shape) { + if (d < 0) + break; + ++count; + } + return count; +} + +// This test helps us determine if we can degenerate an array to a +// pointer to some interior section (possibly a single element) of the +// sequence. This is used to determine if we can lower to the LLVM IR. +bool fir::SequenceType::hasConstantInterior() const { + if (hasUnknownShape()) + return true; + auto rows = getConstantRows(); + auto dim = getDimension(); + if (rows == dim) + return true; + auto shape = getShape(); + for (unsigned i{rows}, size{dim}; i < size; ++i) + if (shape[i] != getUnknownExtent()) + return false; + return true; +} + mlir::LogicalResult fir::SequenceType::verifyConstructionInvariants( mlir::Location loc, const SequenceType::Shape &shape, mlir::Type eleTy, mlir::AffineMapAttr map) { @@ -1178,6 +1215,12 @@ llvm::SmallPtrSet recordTypeVisited; } // namespace +void fir::verifyIntegralType(mlir::Type type) { + if (verifyIntegerType(type) || type.isa()) + return; + llvm_unreachable("expected integral type"); +} + void fir::printFirType(FIROpsDialect *, mlir::Type ty, mlir::DialectAsmPrinter &p) { auto &os = p.getStream(); diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index da02b4fbe47f3..edbd01d4eca07 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -33,7 +33,10 @@ class CheckHelper { void Check() { Check(context_.globalScope()); } void Check(const ParamValue &, bool canBeAssumed); - void Check(const Bound &bound) { CheckSpecExpr(bound.GetExplicit()); } + void Check(const Bound &bound) { + CheckSpecExpr( + bound.GetExplicit(), evaluate::SpecificationExprContext::BOUND); + } void Check(const ShapeSpec &spec) { Check(spec.lbound()); Check(spec.ubound()); @@ -44,7 +47,9 @@ class CheckHelper { void Check(const Scope &); private: - template void CheckSpecExpr(const A &x) { + template + void CheckSpecExpr( + const A &x, const evaluate::SpecificationExprContext specExprContext) { if (symbolBeingChecked_ && IsSaved(*symbolBeingChecked_)) { if (!evaluate::IsConstantExpr(x)) { messages_.Say( @@ -52,18 +57,23 @@ class CheckHelper { symbolBeingChecked_->name()); } } else { - evaluate::CheckSpecificationExpr(x, messages_, DEREF(scope_)); + evaluate::CheckSpecificationExpr( + x, messages_, DEREF(scope_), context_.intrinsics(), specExprContext); } } - template void CheckSpecExpr(const std::optional &x) { + template + void CheckSpecExpr(const std::optional &x, + const evaluate::SpecificationExprContext specExprContext) { if (x) { - CheckSpecExpr(*x); + CheckSpecExpr(*x, specExprContext); } } - template void CheckSpecExpr(A &x) { + template + void CheckSpecExpr( + A &x, const evaluate::SpecificationExprContext specExprContext) { x = Fold(foldingContext_, std::move(x)); const A &constx{x}; - CheckSpecExpr(constx); + CheckSpecExpr(constx, specExprContext); } void CheckValue(const Symbol &, const DerivedTypeSpec *); void CheckVolatile( @@ -131,7 +141,8 @@ void CheckHelper::Check(const ParamValue &value, bool canBeAssumed) { " external function result"_err_en_US); } } else { - CheckSpecExpr(value.GetExplicit()); + CheckSpecExpr( + value.GetExplicit(), evaluate::SpecificationExprContext::TYPE_PARAM); } } @@ -384,15 +395,25 @@ void CheckHelper::CheckObjectEntity( CheckAssumedTypeEntity(symbol, details); symbolBeingChecked_ = nullptr; if (!details.coshape().empty()) { + bool isDeferredShape{details.coshape().IsDeferredShape()}; if (IsAllocatable(symbol)) { - if (!details.coshape().IsDeferredShape()) { // C827 - messages_.Say( - "ALLOCATABLE coarray must have a deferred coshape"_err_en_US); + if (!isDeferredShape) { // C827 + messages_.Say("'%s' is an ALLOCATABLE coarray and must have a deferred" + " coshape"_err_en_US, + symbol.name()); } + } else if (symbol.owner().IsDerivedType()) { // C746 + std::string deferredMsg{ + isDeferredShape ? "" : " and have a deferred coshape"}; + messages_.Say("Component '%s' is a coarray and must have the ALLOCATABLE" + " attribute%s"_err_en_US, + symbol.name(), deferredMsg); } else { if (!details.coshape().IsAssumedSize()) { // C828 messages_.Say( - "Non-ALLOCATABLE coarray must have an explicit coshape"_err_en_US); + "Component '%s' is a non-ALLOCATABLE coarray and must have" + " an explicit coshape"_err_en_US, + symbol.name()); } } } @@ -409,7 +430,8 @@ void CheckHelper::CheckObjectEntity( "An INTENT(OUT) dummy argument may not be, or contain, EVENT_TYPE or LOCK_TYPE"_err_en_US); } } - if (InPure() && !IsPointer(symbol) && !IsIntentIn(symbol) && + if (InPure() && !IsStmtFunction(DEREF(innermostSymbol_)) && + !IsPointer(symbol) && !IsIntentIn(symbol) && !symbol.attrs().test(Attr::VALUE)) { if (InFunction()) { // C1583 messages_.Say( diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 9a1c656afb97a..77312733cd2e8 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -261,7 +261,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) { case parser::OmpLoopDirective::Directive::TaskloopSimd: { PushContext(beginDir.source, OmpDirective::TASKLOOP_SIMD); SetContextAllowed( - taskloopAllowedClauses | simdAllowedClauses - OmpClause::REDUCTION); + (taskloopAllowedClauses | simdAllowedClauses) - OmpClause::REDUCTION); SetContextAllowedOnce(taskloopAllowedOnceClauses | simdAllowedOnceClauses); SetContextAllowedExclusive(taskloopAllowedExclusiveClauses); } break; diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 3431bc05392ef..9306f702aabbd 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -2092,13 +2092,14 @@ std::optional ExpressionAnalyzer::CheckCall( } semantics::CheckArguments(*chars, arguments, GetFoldingContext(), context_.FindScope(callSite), treatExternalAsImplicit); - if (!chars->attrs.test(characteristics::Procedure::Attr::Pure)) { + const Symbol *procSymbol{proc.GetSymbol()}; + if (procSymbol && !IsPureProcedure(*procSymbol)) { if (const semantics::Scope * pure{semantics::FindPureProcedureContaining( context_.FindScope(callSite))}) { Say(callSite, "Procedure '%s' referenced in pure subprogram '%s' must be pure too"_err_en_US, - DEREF(proc.GetSymbol()).name(), DEREF(pure->symbol()).name()); + procSymbol->name(), DEREF(pure->symbol()).name()); } } } diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 6d04c7f229ed1..e51c33988d0d7 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -3679,7 +3679,7 @@ bool DeclarationVisitor::Pre(const parser::DerivedTypeDef &x) { if (symbol->has() && !paramNames.count(name)) { SayDerivedType(name, "'%s' is not a type parameter of this derived type"_err_en_US, - currScope()); // C742 + currScope()); // C741 } } Walk(std::get>>(x.t)); @@ -3820,14 +3820,50 @@ void DeclarationVisitor::Post(const parser::ComponentDecl &x) { !attrs.HasAny({Attr::PUBLIC, Attr::PRIVATE})) { attrs.set(Attr::PRIVATE); } - if (!attrs.HasAny({Attr::POINTER, Attr::ALLOCATABLE})) { - if (const auto *declType{GetDeclTypeSpec()}) { - if (const auto *derived{declType->AsDerived()}) { + if (const auto *declType{GetDeclTypeSpec()}) { + if (const auto *derived{declType->AsDerived()}) { + if (!attrs.HasAny({Attr::POINTER, Attr::ALLOCATABLE})) { if (derivedTypeInfo_.type == &derived->typeSymbol()) { // C744 Say("Recursive use of the derived type requires " "POINTER or ALLOCATABLE"_err_en_US); } } + if (!coarraySpec().empty()) { // C747 + if (IsTeamType(derived)) { + Say("A coarray component may not be of type TEAM_TYPE from " + "ISO_FORTRAN_ENV"_err_en_US); + } else { + if (IsIsoCType(derived)) { + Say("A coarray component may not be of type C_PTR or C_FUNPTR from " + "ISO_C_BINDING"_err_en_US); + } + } + } + if (auto it{FindCoarrayUltimateComponent(*derived)}) { // C748 + std::string ultimateName{it.BuildResultDesignatorName()}; + // Strip off the leading "%" + if (ultimateName.length() > 1) { + ultimateName.erase(0, 1); + if (attrs.HasAny({Attr::POINTER, Attr::ALLOCATABLE})) { + evaluate::AttachDeclaration( + Say(name.source, + "A component with a POINTER or ALLOCATABLE attribute may " + "not " + "be of a type with a coarray ultimate component (named " + "'%s')"_err_en_US, + ultimateName), + derived->typeSymbol()); + } + if (!arraySpec().empty() || !coarraySpec().empty()) { + evaluate::AttachDeclaration( + Say(name.source, + "An array or coarray component may not be of a type with a " + "coarray ultimate component (named '%s')"_err_en_US, + ultimateName), + derived->typeSymbol()); + } + } + } } } if (OkToAddComponent(name)) { @@ -4741,7 +4777,7 @@ Symbol *DeclarationVisitor::MakeTypeSymbol( const SourceName &name, Details &&details) { Scope &derivedType{currScope()}; CHECK(derivedType.IsDerivedType()); - if (auto *symbol{FindInScope(derivedType, name)}) { + if (auto *symbol{FindInScope(derivedType, name)}) { // C742 Say2(name, "Type parameter, component, or procedure binding '%s'" " already defined in this type"_err_en_US, diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 249dcb27b65af..3b68beaa557fc 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -270,6 +270,24 @@ bool IsPureProcedure(const Symbol &symbol) { } else if (!IsProcedure(symbol)) { return false; } + if (IsStmtFunction(symbol)) { + // Section 15.7(1) states that a statement function is PURE if it does not + // reference an IMPURE procedure or a VOLATILE variable + const MaybeExpr &expr{symbol.get().stmtFunction()}; + if (expr) { + for (const Symbol &refSymbol : evaluate::CollectSymbols(*expr)) { + if (IsFunction(refSymbol) && !IsPureProcedure(refSymbol)) { + return false; + } + if (const Symbol * root{GetAssociationRoot(refSymbol)}) { + if (root->attrs().test(Attr::VOLATILE)) { + return false; + } + } + } + } + return true; // statement function was not found to be impure + } return symbol.attrs().test(Attr::PURE) || (symbol.attrs().test(Attr::ELEMENTAL) && !symbol.attrs().test(Attr::IMPURE)); @@ -1356,4 +1374,5 @@ void LabelEnforce::SayWithConstruct(SemanticsContext &context, context.Say(stmtLocation, message) .Attach(constructLocation, GetEnclosingConstructMsg()); } + } // namespace Fortran::semantics diff --git a/flang/lib/Semantics/type.cpp b/flang/lib/Semantics/type.cpp index 40518ce24ba16..80f994045749c 100644 --- a/flang/lib/Semantics/type.cpp +++ b/flang/lib/Semantics/type.cpp @@ -77,7 +77,9 @@ void DerivedTypeSpec::CookParameters(evaluate::FoldingContext &foldingContext) { name = *nextNameIter++; auto it{std::find_if(parameterDecls.begin(), parameterDecls.end(), [&](const Symbol &symbol) { return symbol.name() == name; })}; - CHECK(it != parameterDecls.end()); + if (it == parameterDecls.end()) { + break; + } attr = it->get().get().attr(); } else { messages.Say(name_, diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp index d505e170c4378..2afa40fbfb825 100644 --- a/flang/runtime/edit-output.cpp +++ b/flang/runtime/edit-output.cpp @@ -396,8 +396,8 @@ bool RealOutputEditing::Edit(const DataEdit &edit) { case 'B': case 'O': case 'Z': - return EditIntegerOutput( - io_, edit, decimal::BinaryFloatingPointNumber{x_}.raw); + return EditIntegerOutput(io_, edit, + decimal::BinaryFloatingPointNumber{x_}.raw()); case 'G': return Edit(EditForGOutput(edit)); default: diff --git a/flang/test/Fir/fir-ops.fir b/flang/test/Fir/fir-ops.fir index bdadf5cd6f58d..5821b7f291326 100644 --- a/flang/test/Fir/fir-ops.fir +++ b/flang/test/Fir/fir-ops.fir @@ -4,129 +4,174 @@ // UNSUPPORTED: !fir // CHECK-LABEL: func @it1() -> !fir.int<4> +// CHECK: func @box1() -> !fir.boxchar<2> +// CHECK: func @box2() -> !fir.boxproc<(i32, i32) -> i64> +// CHECK: func @box3() -> !fir.box> func @it1() -> !fir.int<4> -// CHECK-LABEL: func @box1() -> !fir.boxchar<2> func @box1() -> !fir.boxchar<2> -// CHECK-LABEL: func @box2() -> !fir.boxproc<(i32, i32) -> i64> func @box2() -> !fir.boxproc<(i32, i32) -> i64> -// CHECK-LABEL: func @box3() -> !fir.box> func @box3() -> !fir.box> // Fortran SUBROUTINE and FUNCTION // CHECK-LABEL: func @print_index3(index, index, index) -// CHECK-LABEL: func @user_i64(i64) -// CHECK-LABEL: func @user_tdesc(!fir.tdesc>) +// CHECK: func @user_i64(i64) +// CHECK: func @user_tdesc(!fir.tdesc>) func @print_index3(index, index, index) func @user_i64(i64) func @user_tdesc(!fir.tdesc>) // expect the void return to be omitted // CHECK-LABEL: func @store_tuple(tuple>) +// CHECK: func @get_method_box() -> !fir.box> +// CHECK: func @method_impl(!fir.box>) func @store_tuple(tuple>) -> () - -// CHECK-LABEL: func @get_method_box() -> !fir.box> -// CHECK-LABEL: func @method_impl(!fir.box>) func @get_method_box() -> !fir.box> func @method_impl(!fir.box>) // CHECK-LABEL: func @nop() -func @nop() - // CHECK-LABEL: func @get_func() -> (() -> ()) +func @nop() func @get_func() -> (() -> ()) -// CHECK-LABEL: @instructions +// CHECK-LABEL: func @instructions() { func @instructions() { - // CHECK: %[[A0:.*]] = fir.alloca !fir.array<10xi32> +// CHECK: [[VAL_0:%.*]] = fir.alloca !fir.array<10xi32> +// CHECK: [[VAL_1:%.*]] = fir.load [[VAL_0]] : !fir.ref> +// CHECK: [[VAL_2:%.*]] = fir.alloca i32 +// CHECK: [[VAL_3:%.*]] = constant 22 : i32 %0 = fir.alloca !fir.array<10xi32> - // CHECK: fir.load %[[A0]] : !fir.ref> %1 = fir.load %0 : !fir.ref> %2 = fir.alloca i32 %3 = constant 22 : i32 - // CHECK: fir.store %{{.*}} to %{{.*}} : !fir.ref + +// CHECK: fir.store [[VAL_3]] to [[VAL_2]] : !fir.ref +// CHECK: [[VAL_4:%.*]] = fir.undefined i32 +// CHECK: [[VAL_5:%.*]] = fir.allocmem !fir.array<100xf32> +// CHECK: [[VAL_6:%.*]] = fir.embox [[VAL_5]] : (!fir.heap>) -> !fir.box> fir.store %3 to %2 : !fir.ref - // CHECK: fir.undefined i32 %4 = fir.undefined i32 - // CHECK: %[[A5:.*]] = fir.allocmem !fir.array<100xf32> %5 = fir.allocmem !fir.array<100xf32> - // CHECK: %[[A6:.*]] = fir.embox %[[A5]] : (!fir.heap>) -> !fir.box> %6 = fir.embox %5 : (!fir.heap>) -> !fir.box> - // CHECK: fir.box_addr %{{.*}} : (!fir.box>) -> !fir.ref> + +// CHECK: [[VAL_7:%.*]] = fir.box_addr [[VAL_6]] : (!fir.box>) -> !fir.ref> +// CHECK: [[VAL_8:%.*]] = constant 0 : index +// CHECK: [[VAL_9:%.*]]:3 = fir.box_dims [[VAL_6]], [[VAL_8]] : (!fir.box>, index) -> (index, index, index) +// CHECK: fir.call @print_index3([[VAL_9]]#0, [[VAL_9]]#1, [[VAL_9]]#2) : (index, index, index) -> () +// CHECK: [[VAL_10:%.*]] = fir.call @it1() : () -> !fir.int<4> %7 = fir.box_addr %6 : (!fir.box>) -> !fir.ref> %c0 = constant 0 : index - // CHECK: %[[A8:.*]]:3 = fir.box_dims %{{.*}}, %{{.*}} : (!fir.box>, index) -> (index, index, index) %d1:3 = fir.box_dims %6, %c0 : (!fir.box>, index) -> (index, index, index) - // CHECK: fir.call @print_index3(%[[A8]]#0, %[[A8]]#1, %[[A8]]#2) : (index, index, index) fir.call @print_index3(%d1#0, %d1#1, %d1#2) : (index, index, index) -> () %8 = fir.call @it1() : () -> !fir.int<4> - // CHECK: fir.box_elesize %[[A6]] : (!fir.box>) -> i64 + +// CHECK: [[VAL_11:%.*]] = fir.box_elesize [[VAL_6]] : (!fir.box>) -> i64 +// CHECK: [[VAL_12:%.*]] = fir.box_isalloc [[VAL_6]] : (!fir.box>) -> i1 +// CHECK: [[VAL_13:%.*]] = fir.box_isarray [[VAL_6]] : (!fir.box>) -> i1 +// CHECK: [[VAL_14:%.*]] = fir.box_isptr [[VAL_6]] : (!fir.box>) -> i1 +// CHECK: [[VAL_15:%.*]] = fir.box_rank [[VAL_6]] : (!fir.box>) -> i64 %9 = fir.box_elesize %6 : (!fir.box>) -> i64 - // CHECK: fir.box_isalloc %[[A6]] : (!fir.box>) -> i1 %10 = fir.box_isalloc %6 : (!fir.box>) -> i1 - // CHECK: fir.box_isarray %[[A6]] : (!fir.box>) -> i1 %11 = fir.box_isarray %6 : (!fir.box>) -> i1 - // CHECK: fir.box_isptr %[[A6]] : (!fir.box>) -> i1 %12 = fir.box_isptr %6 : (!fir.box>) -> i1 - // CHECK: fir.box_rank %[[A6]] : (!fir.box>) -> i64 %13 = fir.box_rank %6 : (!fir.box>) -> i64 - // CHECK: fir.box_tdesc %[[A6]] : (!fir.box>) -> !fir.tdesc> + +// CHECK: [[VAL_16:%.*]] = fir.box_tdesc [[VAL_6]] : (!fir.box>) -> !fir.tdesc> +// CHECK: [[VAL_17:%.*]] = fir.call @box1() : () -> !fir.boxchar<2> +// CHECK: [[VAL_18:%.*]] = fir.boxchar_len [[VAL_17]] : (!fir.boxchar<2>) -> i32 +// CHECK: [[VAL_19:%.*]] = fir.call @box2() : () -> !fir.boxproc<(i32, i32) -> i64> +// CHECK: [[VAL_20:%.*]] = fir.boxproc_host [[VAL_19]] : (!fir.boxproc<(i32, i32) -> i64>) -> !fir.ref %14 = fir.box_tdesc %6 : (!fir.box>) -> !fir.tdesc> %15 = fir.call @box1() : () -> !fir.boxchar<2> - // CHECK: fir.boxchar_len %{{.*}} : (!fir.boxchar<2>) -> i32 %16 = fir.boxchar_len %15 : (!fir.boxchar<2>) -> i32 %17 = fir.call @box2() : () -> !fir.boxproc<(i32, i32) -> i64> - // CHECK: fir.boxproc_host %{{.*}} : (!fir.boxproc<(i32, i32) -> i64>) -> !fir.ref %18 = fir.boxproc_host %17 : (!fir.boxproc<(i32, i32) -> i64>) -> !fir.ref + +// CHECK: [[VAL_21:%.*]] = constant 10 : i32 +// CHECK: [[VAL_22:%.*]] = fir.coordinate_of [[VAL_5]], [[VAL_21]] : (!fir.heap>, i32) -> !fir.ref +// CHECK: [[VAL_23:%.*]] = fir.field_index f, !fir.type +// CHECK: [[VAL_24:%.*]] = fir.undefined !fir.type +// CHECK: [[VAL_25:%.*]] = fir.extract_value [[VAL_24]], [[VAL_23]] : (!fir.type, !fir.field) -> f32 %19 = constant 10 : i32 - // CHECK: fir.coordinate_of %{{.*}}, %{{.*}} : (!fir.heap>, i32) -> !fir.ref %20 = fir.coordinate_of %5, %19 : (!fir.heap>, i32) -> !fir.ref - // CHECK: fir.field_index f, !fir.type %21 = fir.field_index f, !fir.type - // CHECK: fir.undefined !fir.type %22 = fir.undefined !fir.type - // CHECK: fir.extract_value %{{.*}}, %{{.*}} : (!fir.type, !fir.field) -> f32 %23 = fir.extract_value %22, %21 : (!fir.type, !fir.field) -> f32 + +// CHECK: [[VAL_26:%.*]] = constant 1 : i32 +// CHECK: [[VAL_27:%.*]] = fir.gendims [[VAL_26]], [[VAL_21]], [[VAL_26]] : (i32, i32, i32) -> !fir.dims<1> +// CHECK: [[VAL_28:%.*]] = constant 1.0 +// CHECK: [[VAL_29:%.*]] = fir.insert_value [[VAL_24]], [[VAL_28]], [[VAL_23]] : (!fir.type, f32, !fir.field) -> !fir.type +// CHECK: [[VAL_30:%.*]] = fir.len_param_index f, !fir.type %c1 = constant 1 : i32 - // CHECK: fir.gendims %{{.*}}, %{{.*}}, %{{.*}} : (i32, i32, i32) -> !fir.dims<1> %24 = fir.gendims %c1, %19, %c1 : (i32, i32, i32) -> !fir.dims<1> %cf1 = constant 1.0 : f32 - // CHECK: fir.insert_value %{{.*}}, %{{.*}}, %{{.*}} : (!fir.type, f32, !fir.field) -> !fir.type %25 = fir.insert_value %22, %cf1, %21 : (!fir.type, f32, !fir.field) -> !fir.type - // CHECK: fir.len_param_index f, !fir.type %26 = fir.len_param_index f, !fir.type + +// CHECK: [[VAL_31:%.*]] = fir.call @box3() : () -> !fir.box> +// CHECK: [[VAL_32:%.*]] = fir.dispatch "method"([[VAL_31]]) : (!fir.box>) -> i32 +// CHECK: [[VAL_33:%.*]] = fir.convert [[VAL_32]] : (i32) -> i64 +// CHECK: [[VAL_34:%.*]] = fir.gentypedesc !fir.type +// CHECK: fir.call @user_tdesc([[VAL_34]]) : (!fir.tdesc>) -> () +// CHECK: [[VAL_35:%.*]] = fir.no_reassoc [[VAL_33]] : i64 %27 = fir.call @box3() : () -> !fir.box> - // CHECK: fir.dispatch "method"(%{{.*}}) : (!fir.box>) -> i32 %28 = fir.dispatch "method"(%27) : (!fir.box>) -> i32 - // CHECK: fir.convert %{{.*}} : (i32) -> i64 %29 = fir.convert %28 : (i32) -> i64 - // CHECK: fir.gentypedesc !fir.type %30 = fir.gentypedesc !fir.type fir.call @user_tdesc(%30) : (!fir.tdesc>) -> () - // CHECK: fir.no_reassoc %{{.*}} : i64 %31 = fir.no_reassoc %29 : i64 + +// CHECK: fir.call @user_i64([[VAL_35]]) : (i64) -> () +// CHECK: fir.freemem [[VAL_5]] : !fir.heap> +// CHECK: [[VAL_36:%.*]] = fir.call @get_func() : () -> (() -> ()) +// CHECK: fir.call [[VAL_36]]() : () -> () +// CHECK: [[VAL_37:%.*]] = fir.address_of(@it1) : !fir.ref<() -> !fir.int<4>> +// CHECK: return +// CHECK: } fir.call @user_i64(%31) : (i64) -> () - // CHECK: fir.freemem %{{.*}} : !fir.heap> fir.freemem %5 : !fir.heap> %32 = fir.call @get_func() : () -> (() -> ()) fir.call %32() : () -> () - // CHECK: fir.address_of(@it1) : !fir.ref<() -> !fir.int<4>> %33 = fir.address_of (@it1) : !fir.ref<() -> !fir.int<4>> return } -// CHECK-LABEL: @boxing_match +// CHECK-LABEL: func @boxing_match() { func @boxing_match() { +// CHECK: [[VAL_38:%.*]] = fir.alloca i32 +// CHECK: [[VAL_39:%.*]] = fir.alloca !fir.type +// CHECK: [[VAL_40:%.*]] = fir.alloca !fir.char<1> +// CHECK: [[VAL_41:%.*]] = fir.alloca tuple +// CHECK: [[VAL_42:%.*]] = fir.embox [[VAL_38]] : (!fir.ref) -> !fir.box +// CHECK: [[VAL_43:%.*]]:6 = fir.unbox [[VAL_42]] : (!fir.box) -> (!fir.ref, i32, i32, !fir.tdesc, i32, !fir.dims<0>) +// CHECK: [[VAL_44:%.*]] = constant 8 : i32 +// CHECK: [[VAL_45:%.*]] = fir.undefined !fir.char<1> +// CHECK: [[VAL_46:%.*]] = fir.emboxchar [[VAL_40]], [[VAL_44]] : (!fir.ref>, i32) -> !fir.boxchar<1> +// CHECK: [[VAL_47:%.*]]:2 = fir.unboxchar [[VAL_46]] : (!fir.boxchar<1>) -> (!fir.ref>, i32) +// CHECK: [[VAL_48:%.*]] = fir.undefined !fir.type +// CHECK: [[VAL_49:%.*]] = constant 0 : i32 +// CHECK: [[VAL_50:%.*]] = constant 12 : i32 +// CHECK: [[VAL_51:%.*]] = fir.insert_value [[VAL_48]], [[VAL_50]], [[VAL_49]] : (!fir.type, i32, i32) -> !fir.type +// CHECK: [[VAL_52:%.*]] = constant 1 : i32 +// CHECK: [[VAL_53:%.*]] = constant 4.213000e+01 : f64 +// CHECK: [[VAL_54:%.*]] = fir.insert_value [[VAL_48]], [[VAL_53]], [[VAL_52]] : (!fir.type, f64, i32) -> !fir.type +// CHECK: fir.store [[VAL_54]] to [[VAL_39]] : !fir.ref> +// CHECK: [[VAL_55:%.*]] = fir.emboxproc @method_impl, [[VAL_41]] : ((!fir.box>) -> (), !fir.ref>) -> !fir.boxproc<(!fir.box>) -> ()> +// CHECK: [[VAL_56:%.*]], [[VAL_57:%.*]] = fir.unboxproc [[VAL_55]] : (!fir.boxproc<(!fir.box>) -> ()>) -> ((!fir.box>) -> (), !fir.ref>>) +// CHECK: [[VAL_58:%.*]] = fir.call @box2() : () -> !fir.boxproc<(i32, i32) -> i64> +// CHECK: [[VAL_59:%.*]], [[VAL_60:%.*]] = fir.unboxproc [[VAL_58]] : (!fir.boxproc<(i32, i32) -> i64>) -> ((i32, i32) -> i64, !fir.ref>>) +// CHECK: [[VAL_61:%.*]] = fir.load [[VAL_60]] : !fir.ref>> +// CHECK: fir.call @store_tuple([[VAL_61]]) : (tuple>) -> () +// CHECK: return +// CHECK: } %0 = fir.alloca i32 %d6 = fir.alloca !fir.type %d3 = fir.alloca !fir.char<1> %e6 = fir.alloca tuple %1 = fir.embox %0 : (!fir.ref) -> !fir.box - // CHECK: fir.unbox %{{.*}} : (!fir.box) -> (!fir.ref, i32, i32, !fir.tdesc, i32, !fir.dims<0>) %2:6 = fir.unbox %1 : (!fir.box) -> (!fir.ref,i32,i32,!fir.tdesc,i32,!fir.dims<0>) %c8 = constant 8 : i32 %3 = fir.undefined !fir.char<1> - // CHECK: fir.emboxchar %{{.*}}, %{{.*}} : (!fir.ref>, i32) -> !fir.boxchar<1> - // CHECK: fir.unboxchar %{{.*}} : (!fir.boxchar<1>) -> (!fir.ref>, i32) %4 = fir.emboxchar %d3, %c8 : (!fir.ref>, i32) -> !fir.boxchar<1> %5:2 = fir.unboxchar %4 : (!fir.boxchar<1>) -> (!fir.ref>, i32) %6 = fir.undefined !fir.type @@ -139,8 +184,6 @@ func @boxing_match() { fir.store %a3 to %d6 : !fir.ref> %7 = fir.emboxproc @method_impl, %e6 : ((!fir.box>) -> (), !fir.ref>) -> !fir.boxproc<(!fir.box>) -> ()> %8:2 = fir.unboxproc %7 : (!fir.boxproc<(!fir.box>) -> ()>) -> ((!fir.box>) -> (), !fir.ref>>) - // CHECK: fir.emboxproc @method_impl, %{{.*}} : ((!fir.box>) -> (), !fir.ref>) -> !fir.boxproc<(!fir.box>) -> ()> - // CHECK: fir.unboxproc %{{.*}} : (!fir.boxproc<(!fir.box>) -> ()>) -> ((!fir.box>) -> (), !fir.ref>>) %9 = fir.call @box2() : () -> !fir.boxproc<(i32, i32) -> i64> %10:2 = fir.unboxproc %9 : (!fir.boxproc<(i32, i32) -> i64>) -> ((i32, i32) -> i64, !fir.ref>>) %11 = fir.load %10#1 : !fir.ref>> @@ -148,32 +191,61 @@ func @boxing_match() { return } -// CHECK-LABEL: @loop +// CHECK-LABEL: func @loop() { func @loop() { +// CHECK: [[VAL_62:%.*]] = constant 1 : index +// CHECK: [[VAL_63:%.*]] = constant 10 : index +// CHECK: [[VAL_64:%.*]] = constant true %c1 = constant 1 : index %c10 = constant 10 : index %ct = constant true - // CHECK: fir.loop %{{.*}} = %{{.*}} to %{{.*}} { - %i = fir.loop %i = %c1 to %c10 { - // CHECK: fir.where %{{.*}} { - fir.where %ct { + +// CHECK: fir.do_loop [[VAL_65:%.*]] = [[VAL_62]] to [[VAL_63]] step [[VAL_62]] { +// CHECK: fir.if [[VAL_64]] { +// CHECK: fir.call @nop() : () -> () +// CHECK: } else { +// CHECK: fir.call @nop() : () -> () +// CHECK: } +// CHECK: } +// CHECK: fir.unreachable +// CHECK: } + fir.do_loop %i = %c1 to %c10 step %c1 { + fir.if %ct { fir.call @nop() : () -> () - // CHECK: } otherwise { - } otherwise { + } else { fir.call @nop() : () -> () } } - // CHECK: fir.unreachable fir.unreachable } -// CHECK-LABEL: @bar_select +// CHECK: func @bar_select([[VAL_66:%.*]]: i32, [[VAL_67:%.*]]: i32) -> i32 { func @bar_select(%arg : i32, %arg2 : i32) -> i32 { +// CHECK: [[VAL_68:%.*]] = constant 1 : i32 +// CHECK: [[VAL_69:%.*]] = constant 2 : i32 +// CHECK: [[VAL_70:%.*]] = constant 3 : i32 +// CHECK: [[VAL_71:%.*]] = constant 4 : i32 %0 = constant 1 : i32 %1 = constant 2 : i32 %2 = constant 3 : i32 %3 = constant 4 : i32 - // CHECK: fir.select %{{.*}} : i32 [1, ^bb1(%{{.*}} : i32), 2, ^bb2(%{{.*}}, %{{.*}}, %{{.*}} : i32, i32, i32), -3, ^bb3(%{{.*}}, %{{.*}} : i32, i32), 4, ^bb4(%{{.*}} : i32), unit, ^bb5] + +// CHECK: fir.select [[VAL_66]] : i32 [1, ^bb1([[VAL_68]] : i32), 2, ^bb2([[VAL_70]], [[VAL_66]], [[VAL_67]] : i32, i32, i32), -3, ^bb3([[VAL_67]], [[VAL_70]] : i32, i32), 4, ^bb4([[VAL_69]] : i32), unit, ^bb5] +// CHECK: ^bb1([[VAL_72:%.*]]: i32): +// CHECK: return [[VAL_72]] : i32 +// CHECK: ^bb2([[VAL_73:%.*]]: i32, [[VAL_74:%.*]]: i32, [[VAL_75:%.*]]: i32): +// CHECK: [[VAL_76:%.*]] = addi [[VAL_73]], [[VAL_74]] : i32 +// CHECK: [[VAL_77:%.*]] = addi [[VAL_76]], [[VAL_75]] : i32 +// CHECK: return [[VAL_77]] : i32 +// CHECK: ^bb3([[VAL_78:%.*]]: i32, [[VAL_79:%.*]]: i32): +// CHECK: [[VAL_80:%.*]] = addi [[VAL_78]], [[VAL_79]] : i32 +// CHECK: return [[VAL_80]] : i32 +// CHECK: ^bb4([[VAL_81:%.*]]: i32): +// CHECK: return [[VAL_81]] : i32 +// CHECK: ^bb5: +// CHECK: [[VAL_82:%.*]] = constant 0 : i32 +// CHECK: return [[VAL_82]] : i32 +// CHECK: } fir.select %arg:i32 [ 1,^bb1(%0:i32), 2,^bb2(%2,%arg,%arg2:i32,i32,i32), -3,^bb3(%arg2,%2:i32,i32), 4,^bb4(%1:i32), unit,^bb5 ] ^bb1(%a : i32) : return %a : i32 @@ -191,13 +263,25 @@ func @bar_select(%arg : i32, %arg2 : i32) -> i32 { return %zero : i32 } -// CHECK-LABEL: @bar_select_rank +// CHECK-LABEL: func @bar_select_rank( +// CHECK-SAME: [[VAL_83:%.*]]: i32, [[VAL_84:%.*]]: i32) -> i32 { func @bar_select_rank(%arg : i32, %arg2 : i32) -> i32 { +// CHECK: [[VAL_85:%.*]] = constant 1 : i32 +// CHECK: [[VAL_86:%.*]] = constant 2 : i32 +// CHECK: [[VAL_87:%.*]] = constant 3 : i32 +// CHECK: [[VAL_88:%.*]] = constant 4 : i32 %0 = constant 1 : i32 %1 = constant 2 : i32 %2 = constant 3 : i32 %3 = constant 4 : i32 - // CHECK: fir.select_rank %{{.*}} : i32 [1, ^bb1(%{{.*}} : i32), 2, ^bb2(%{{.*}}, %{{.*}}, %{{.*}} : i32, i32, i32), 3, ^bb3(%{{.*}}, %{{.*}} : i32, i32), -1, ^bb4(%{{.*}} : i32), unit, ^bb5] + +// CHECK: fir.select_rank [[VAL_83]] : i32 [1, ^bb1([[VAL_85]] : i32), 2, ^bb2([[VAL_87]], [[VAL_83]], [[VAL_84]] : i32, i32, i32), 3, ^bb3([[VAL_84]], [[VAL_87]] : i32, i32), -1, ^bb4([[VAL_86]] : i32), unit, ^bb5] +// CHECK: ^bb1([[VAL_89:%.*]]: i32): +// CHECK: return [[VAL_89]] : i32 +// CHECK: ^bb2([[VAL_90:%.*]]: i32, [[VAL_91:%.*]]: i32, [[VAL_92:%.*]]: i32): +// CHECK: [[VAL_93:%.*]] = addi [[VAL_90]], [[VAL_91]] : i32 +// CHECK: [[VAL_94:%.*]] = addi [[VAL_93]], [[VAL_92]] : i32 +// CHECK: return [[VAL_94]] : i32 fir.select_rank %arg:i32 [ 1,^bb1(%0:i32), 2,^bb2(%2,%arg,%arg2:i32,i32,i32), 3,^bb3(%arg2,%2:i32,i32), -1,^bb4(%1:i32), unit,^bb5 ] ^bb1(%a : i32) : return %a : i32 @@ -205,26 +289,56 @@ func @bar_select_rank(%arg : i32, %arg2 : i32) -> i32 { %4 = addi %b, %b2 : i32 %5 = addi %4, %b3 : i32 return %5 : i32 + +// CHECK: ^bb3([[VAL_95:%.*]]: i32, [[VAL_96:%.*]]: i32): +// CHECK: [[VAL_97:%.*]] = addi [[VAL_95]], [[VAL_96]] : i32 +// CHECK: return [[VAL_97]] : i32 +// CHECK: ^bb4([[VAL_98:%.*]]: i32): +// CHECK: return [[VAL_98]] : i32 ^bb3(%c:i32, %c2:i32) : %6 = addi %c, %c2 : i32 return %6 : i32 ^bb4(%d : i32) : return %d : i32 + +// CHECK: ^bb5: +// CHECK: [[VAL_99:%.*]] = constant 0 : i32 +// CHECK: [[VAL_100:%.*]] = fir.call @get_method_box() : () -> !fir.box> +// CHECK: fir.dispatch "method"([[VAL_100]]) : (!fir.box>) -> () ^bb5 : %zero = constant 0 : i32 %7 = fir.call @get_method_box() : () -> !fir.box> fir.dispatch method(%7) : (!fir.box>) -> () + +// CHECK: return [[VAL_99]] : i32 +// CHECK: } return %zero : i32 } -// CHECK-LABEL: @bar_select_type +// CHECK-LABEL: func @bar_select_type( +// CHECK-SAME: [[VAL_101:%.*]]: !fir.box}>>) -> i32 { func @bar_select_type(%arg : !fir.box}>>) -> i32 { + +// CHECK: [[VAL_102:%.*]] = constant 1 : i32 +// CHECK: [[VAL_103:%.*]] = constant 2 : i32 +// CHECK: [[VAL_104:%.*]] = constant 3 : i32 +// CHECK: [[VAL_105:%.*]] = constant 4 : i32 %0 = constant 1 : i32 %1 = constant 2 : i32 %2 = constant 3 : i32 %3 = constant 4 : i32 - // CHECK: fir.select_type %{{.*}} : !fir.box}>> [#fir.instance>, ^bb1(%{{.*}} : i32), #fir.instance>, ^bb2(%{{.*}} : i32), #fir.subsumed>, ^bb3(%{{.*}} : i32), #fir.instance>, ^bb4(%{{.*}} : i32), unit, ^bb5] + +// CHECK: fir.select_type [[VAL_101]] : !fir.box}>> [#fir.instance>, ^bb1([[VAL_102]] : i32), #fir.instance>, ^bb2([[VAL_104]] : i32), #fir.subsumed>, ^bb3([[VAL_104]] : i32), #fir.instance>, ^bb4([[VAL_103]] : i32), unit, ^bb5] fir.select_type %arg : !fir.box}>> [ #fir.instance>,^bb1(%0:i32), #fir.instance>,^bb2(%2:i32), #fir.subsumed>,^bb3(%2:i32), #fir.instance>,^bb4(%1:i32), unit,^bb5 ] + +// CHECK: ^bb1([[VAL_106:%.*]]: i32): +// CHECK: return [[VAL_106]] : i32 +// CHECK: ^bb2([[VAL_107:%.*]]: i32): +// CHECK: return [[VAL_107]] : i32 +// CHECK: ^bb3([[VAL_108:%.*]]: i32): +// CHECK: return [[VAL_108]] : i32 +// CHECK: ^bb4([[VAL_109:%.*]]: i32): +// CHECK: return [[VAL_109]] : i32 ^bb1(%a : i32) : return %a : i32 ^bb2(%b : i32) : @@ -233,19 +347,43 @@ func @bar_select_type(%arg : !fir.box i32 { +// CHECK: [[VAL_113:%.*]] = constant 1 : i32 +// CHECK: [[VAL_114:%.*]] = constant 2 : i32 +// CHECK: [[VAL_115:%.*]] = constant 3 : i32 +// CHECK: [[VAL_116:%.*]] = constant 4 : i32 func @bar_select_case(%arg : i32, %arg2 : i32) -> i32 { %0 = constant 1 : i32 %1 = constant 2 : i32 %2 = constant 3 : i32 %3 = constant 4 : i32 - // CHECK: fir.select_case %{{.*}} : i32 [#fir.point, %{{.*}}, ^bb1(%{{.*}} : i32), #fir.lower, %{{.*}}, ^bb2(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i32, i32, i32, i32), #fir.interval, %{{.*}}, %{{.*}}, ^bb3(%{{.*}}, %{{.*}} : i32, i32), #fir.upper, %{{.*}}, ^bb4(%{{.*}} : i32), unit, ^bb5] + +// CHECK: fir.select_case [[VAL_111]] : i32 [#fir.point, [[VAL_113]], ^bb1([[VAL_113]] : i32), #fir.lower, [[VAL_114]], ^bb2([[VAL_115]], [[VAL_111]], [[VAL_112]], [[VAL_114]] : i32, i32, i32, i32), #fir.interval, [[VAL_115]], [[VAL_116]], ^bb3([[VAL_115]], [[VAL_112]] : i32, i32), #fir.upper, [[VAL_111]], ^bb4([[VAL_114]] : i32), unit, ^bb5] fir.select_case %arg : i32 [#fir.point, %0, ^bb1(%0:i32), #fir.lower, %1, ^bb2(%2,%arg,%arg2,%1:i32,i32,i32,i32), #fir.interval, %2, %3, ^bb3(%2,%arg2:i32,i32), #fir.upper, %arg, ^bb4(%1:i32), unit, ^bb5] + +// CHECK: ^bb1([[VAL_117:%.*]]: i32): +// CHECK: return [[VAL_117]] : i32 +// CHECK: ^bb2([[VAL_118:%.*]]: i32, [[VAL_119:%.*]]: i32, [[VAL_120:%.*]]: i32, [[VAL_121:%.*]]: i32): +// CHECK: [[VAL_122:%.*]] = addi [[VAL_118]], [[VAL_119]] : i32 +// CHECK: [[VAL_123:%.*]] = muli [[VAL_122]], [[VAL_120]] : i32 +// CHECK: [[VAL_124:%.*]] = addi [[VAL_123]], [[VAL_121]] : i32 +// CHECK: return [[VAL_124]] : i32 +// CHECK: ^bb3([[VAL_125:%.*]]: i32, [[VAL_126:%.*]]: i32): +// CHECK: [[VAL_127:%.*]] = addi [[VAL_125]], [[VAL_126]] : i32 +// CHECK: return [[VAL_127]] : i32 +// CHECK: ^bb4([[VAL_128:%.*]]: i32): +// CHECK: return [[VAL_128]] : i32 ^bb1(%a : i32) : return %a : i32 ^bb2(%b : i32, %b2:i32, %b3:i32, %b4:i32) : @@ -258,146 +396,211 @@ func @bar_select_case(%arg : i32, %arg2 : i32) -> i32 { return %7 : i32 ^bb4(%d : i32) : return %d : i32 + +// CHECK: ^bb5: +// CHECK: [[VAL_129:%.*]] = constant 0 : i32 +// CHECK: return [[VAL_129]] : i32 +// CHECK: } ^bb5 : %zero = constant 0 : i32 return %zero : i32 } -// CHECK-LABEL: @global_var +// CHECK-LABEL: fir.global @global_var : i32 { +// CHECK: [[VAL_130:%.*]] = constant 1 : i32 +// CHECK: fir.has_value [[VAL_130]] : i32 +// CHECK: } fir.global @global_var : i32 { %0 = constant 1 : i32 fir.has_value %0 : i32 } -// CHECK-LABEL: @global_constant +// CHECK-LABEL: fir.global @global_constant constant : i32 { +// CHECK: [[VAL_131:%.*]] = constant 934 : i32 +// CHECK: fir.has_value [[VAL_131]] : i32 +// CHECK: } fir.global @global_constant constant : i32 { %0 = constant 934 : i32 fir.has_value %0 : i32 } -// CHECK-LABEL: @global_derived +// CHECK-LABEL: fir.global @global_derived : !fir.type { +// CHECK: fir.global_len "f", 1 : i32 +// CHECK: [[VAL_132:%.*]] = fir.undefined !fir.type +// CHECK: fir.has_value [[VAL_132]] : !fir.type +// CHECK: } fir.global @global_derived : !fir.type { - // CHECK: fir.global_len "f", 1 : i32 fir.global_len f, 1 : i32 %0 = fir.undefined !fir.type fir.has_value %0 : !fir.type } -// CHECK-LABEL: @dispatch_tbl +// CHECK-LABEL: fir.dispatch_table @dispatch_tbl { +// CHECK: fir.dt_entry "method", @method_impl +// CHECK: } fir.dispatch_table @dispatch_tbl { - // CHECK: fir.dt_entry "method", @method_impl fir.dt_entry "method", @method_impl } -// CHECK-LABEL: @compare_real +// CHECK-LABEL: func @compare_real( +// CHECK-SAME: [[VAL_133:%.*]]: !fir.real<16>, [[VAL_134:%.*]]: !fir.real<16>) { func @compare_real(%a : !fir.real<16>, %b : !fir.real<16>) { - // CHECK: fir.cmpf "false", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "oeq", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "ogt", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "oge", %{{.*}}, %{{.*}} : !fir.real<16> + +// CHECK: [[VAL_135:%.*]] = fir.cmpf "false", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_136:%.*]] = fir.cmpf "oeq", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_137:%.*]] = fir.cmpf "ogt", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_138:%.*]] = fir.cmpf "oge", [[VAL_133]], [[VAL_134]] : !fir.real<16> %d0 = fir.cmpf "false", %a, %b : !fir.real<16> %d1 = fir.cmpf "oeq", %a, %b : !fir.real<16> %d2 = fir.cmpf "ogt", %a, %b : !fir.real<16> %d3 = fir.cmpf "oge", %a, %b : !fir.real<16> - // CHECK: fir.cmpf "olt", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "ole", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "one", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "ord", %{{.*}}, %{{.*}} : !fir.real<16> + +// CHECK: [[VAL_139:%.*]] = fir.cmpf "olt", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_140:%.*]] = fir.cmpf "ole", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_141:%.*]] = fir.cmpf "one", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_142:%.*]] = fir.cmpf "ord", [[VAL_133]], [[VAL_134]] : !fir.real<16> %a0 = fir.cmpf "olt", %a, %b : !fir.real<16> %a1 = fir.cmpf "ole", %a, %b : !fir.real<16> %a2 = fir.cmpf "one", %a, %b : !fir.real<16> %a3 = fir.cmpf "ord", %a, %b : !fir.real<16> - // CHECK: fir.cmpf "ueq", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "ugt", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "uge", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "ult", %{{.*}}, %{{.*}} : !fir.real<16> + +// CHECK: [[VAL_143:%.*]] = fir.cmpf "ueq", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_144:%.*]] = fir.cmpf "ugt", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_145:%.*]] = fir.cmpf "uge", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_146:%.*]] = fir.cmpf "ult", [[VAL_133]], [[VAL_134]] : !fir.real<16> %b0 = fir.cmpf "ueq", %a, %b : !fir.real<16> %b1 = fir.cmpf "ugt", %a, %b : !fir.real<16> %b2 = fir.cmpf "uge", %a, %b : !fir.real<16> %b3 = fir.cmpf "ult", %a, %b : !fir.real<16> - // CHECK: fir.cmpf "ule", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "une", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "uno", %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.cmpf "true", %{{.*}}, %{{.*}} : !fir.real<16> + +// CHECK: [[VAL_147:%.*]] = fir.cmpf "ule", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_148:%.*]] = fir.cmpf "une", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_149:%.*]] = fir.cmpf "uno", [[VAL_133]], [[VAL_134]] : !fir.real<16> +// CHECK: [[VAL_150:%.*]] = fir.cmpf "true", [[VAL_133]], [[VAL_134]] : !fir.real<16> %c0 = fir.cmpf "ule", %a, %b : !fir.real<16> %c1 = fir.cmpf "une", %a, %b : !fir.real<16> %c2 = fir.cmpf "uno", %a, %b : !fir.real<16> %c3 = fir.cmpf "true", %a, %b : !fir.real<16> + +// CHECK: return +// CHECK: } return } -// CHECK-LABEL: @compare_complex +// CHECK-LABEL: func @compare_complex( +// CHECK-SAME: [[VAL_151:%.*]]: !fir.complex<16>, [[VAL_152:%.*]]: !fir.complex<16>) { func @compare_complex(%a : !fir.complex<16>, %b : !fir.complex<16>) { - // CHECK: fir.cmpc "false", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "oeq", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "ogt", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "oge", %{{.*}}, %{{.*}} : !fir.complex<16> + +// CHECK: [[VAL_153:%.*]] = fir.cmpc "false", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_154:%.*]] = fir.cmpc "oeq", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_155:%.*]] = fir.cmpc "ogt", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_156:%.*]] = fir.cmpc "oge", [[VAL_151]], [[VAL_152]] : !fir.complex<16> %d0 = fir.cmpc "false", %a, %b : !fir.complex<16> %d1 = fir.cmpc "oeq", %a, %b : !fir.complex<16> %d2 = fir.cmpc "ogt", %a, %b : !fir.complex<16> %d3 = fir.cmpc "oge", %a, %b : !fir.complex<16> - // CHECK: fir.cmpc "olt", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "ole", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "one", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "ord", %{{.*}}, %{{.*}} : !fir.complex<16> + +// CHECK: [[VAL_157:%.*]] = fir.cmpc "olt", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_158:%.*]] = fir.cmpc "ole", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_159:%.*]] = fir.cmpc "one", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_160:%.*]] = fir.cmpc "ord", [[VAL_151]], [[VAL_152]] : !fir.complex<16> %a0 = fir.cmpc "olt", %a, %b : !fir.complex<16> %a1 = fir.cmpc "ole", %a, %b : !fir.complex<16> %a2 = fir.cmpc "one", %a, %b : !fir.complex<16> %a3 = fir.cmpc "ord", %a, %b : !fir.complex<16> - // CHECK: fir.cmpc "ueq", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "ugt", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "uge", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "ult", %{{.*}}, %{{.*}} : !fir.complex<16> + +// CHECK: [[VAL_161:%.*]] = fir.cmpc "ueq", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_162:%.*]] = fir.cmpc "ugt", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_163:%.*]] = fir.cmpc "uge", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_164:%.*]] = fir.cmpc "ult", [[VAL_151]], [[VAL_152]] : !fir.complex<16> %b0 = fir.cmpc "ueq", %a, %b : !fir.complex<16> %b1 = fir.cmpc "ugt", %a, %b : !fir.complex<16> %b2 = fir.cmpc "uge", %a, %b : !fir.complex<16> %b3 = fir.cmpc "ult", %a, %b : !fir.complex<16> - // CHECK: fir.cmpc "ule", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "une", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "uno", %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.cmpc "true", %{{.*}}, %{{.*}} : !fir.complex<16> + +// CHECK: [[VAL_165:%.*]] = fir.cmpc "ule", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_166:%.*]] = fir.cmpc "une", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_167:%.*]] = fir.cmpc "uno", [[VAL_151]], [[VAL_152]] : !fir.complex<16> +// CHECK: [[VAL_168:%.*]] = fir.cmpc "true", [[VAL_151]], [[VAL_152]] : !fir.complex<16> %c0 = fir.cmpc "ule", %a, %b : !fir.complex<16> %c1 = fir.cmpc "une", %a, %b : !fir.complex<16> %c2 = fir.cmpc "uno", %a, %b : !fir.complex<16> %c3 = fir.cmpc "true", %a, %b : !fir.complex<16> +// CHECK: return +// CHECK: } return } -// CHECK-LABEL: @arith_real +// CHECK-LABEL: func @arith_real( +// CHECK-SAME: [[VAL_169:%.*]]: !fir.real<16>, [[VAL_170:%.*]]: !fir.real<16>) -> !fir.real<16> { func @arith_real(%a : !fir.real<16>, %b : !fir.real<16>) -> !fir.real<16> { + +// CHECK: [[VAL_171:%.*]] = constant 1.0 +// CHECK: [[VAL_172:%.*]] = fir.convert [[VAL_171]] : (f32) -> !fir.real<16> +// CHECK: [[VAL_173:%.*]] = fir.negf [[VAL_169]] : !fir.real<16> +// CHECK: [[VAL_174:%.*]] = fir.addf [[VAL_172]], [[VAL_173]] : !fir.real<16> +// CHECK: [[VAL_175:%.*]] = fir.subf [[VAL_174]], [[VAL_170]] : !fir.real<16> +// CHECK: [[VAL_176:%.*]] = fir.mulf [[VAL_173]], [[VAL_175]] : !fir.real<16> +// CHECK: [[VAL_177:%.*]] = fir.divf [[VAL_176]], [[VAL_169]] : !fir.real<16> %c1 = constant 1.0 : f32 %0 = fir.convert %c1 : (f32) -> !fir.real<16> - // CHECK: %[[R1:.*]] = fir.negf %{{.*}} : !fir.real<16> - // CHECK: fir.addf %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: %[[R3:.*]] = fir.subf %{{.*}}, %{{.*}} : !fir.real<16> - // CHECK: fir.mulf %[[R1]], %[[R3]] : !fir.real<16> - // CHECK: fir.divf %{{.*}}, %{{.*}} : !fir.real<16> %1 = fir.negf %a : !fir.real<16> %2 = fir.addf %0, %1 : !fir.real<16> %3 = fir.subf %2, %b : !fir.real<16> %4 = fir.mulf %1, %3 : !fir.real<16> %5 = fir.divf %4, %a : !fir.real<16> +// CHECK: return [[VAL_177]] : !fir.real<16> +// CHECK: } return %5 : !fir.real<16> } -// CHECK-LABEL: @arith_complex +// CHECK-LABEL: func @arith_complex( +// CHECK-SAME: [[VAL_178:%.*]]: !fir.complex<16>, [[VAL_179:%.*]]: !fir.complex<16>) -> !fir.complex<16> { func @arith_complex(%a : !fir.complex<16>, %b : !fir.complex<16>) -> !fir.complex<16> { - // CHECK: fir.negc %{{.*}} : !fir.complex<16> - // CHECK: fir.addc %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.subc %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.mulc %{{.*}}, %{{.*}} : !fir.complex<16> - // CHECK: fir.divc %{{.*}}, %{{.*}} : !fir.complex<16> +// CHECK: [[VAL_180:%.*]] = fir.negc [[VAL_178]] : !fir.complex<16> +// CHECK: [[VAL_181:%.*]] = fir.addc [[VAL_179]], [[VAL_180]] : !fir.complex<16> +// CHECK: [[VAL_182:%.*]] = fir.subc [[VAL_181]], [[VAL_179]] : !fir.complex<16> +// CHECK: [[VAL_183:%.*]] = fir.mulc [[VAL_180]], [[VAL_182]] : !fir.complex<16> +// CHECK: [[VAL_184:%.*]] = fir.divc [[VAL_183]], [[VAL_178]] : !fir.complex<16> %1 = fir.negc %a : !fir.complex<16> %2 = fir.addc %b, %1 : !fir.complex<16> %3 = fir.subc %2, %b : !fir.complex<16> %4 = fir.mulc %1, %3 : !fir.complex<16> %5 = fir.divc %4, %a : !fir.complex<16> +// CHECK: return [[VAL_184]] : !fir.complex<16> +// CHECK: } return %5 : !fir.complex<16> } -// CHECK-LABEL: @character_literal +// CHECK-LABEL: func @character_literal() -> !fir.array<13x!fir.char<1>> { func @character_literal() -> !fir.array<13 x !fir.char<1>> { - // CHECK: fir.string_lit "Hello, World!"(13) : !fir.char<1> +// CHECK: [[VAL_185:%.*]] = fir.string_lit "Hello, World!"(13) : !fir.char<1> %0 = fir.string_lit "Hello, World!"(13) : !fir.char<1> +// CHECK: return [[VAL_185]] : !fir.array<13x!fir.char<1>> return %0 : !fir.array<13 x !fir.char<1>> +// CHECK: } +} + +// CHECK-LABEL: func @earlyexit2(i32) -> i1 +func @earlyexit2(%a : i32) -> i1 + +// CHECK-LABEL: func @early_exit( +// CHECK-SAME: [[VAL_186:%.*]]: i1, [[VAL_187:%.*]]: i32) -> i1 { +func @early_exit(%ok : i1, %k : i32) -> i1 { +// CHECK: [[VAL_188:%.*]] = constant 1 : index +// CHECK: [[VAL_189:%.*]] = constant 100 : index + %c1 = constant 1 : index + %c100 = constant 100 : index + +// CHECK: [[VAL_190:%.*]], [[VAL_191:%.*]] = fir.iterate_while ([[VAL_192:%.*]] = [[VAL_188]] to [[VAL_189]] step [[VAL_188]]) and ([[VAL_193:%.*]] = [[VAL_186]]) iter_args([[VAL_194:%.*]] = [[VAL_187]]) -> (i32) { +// CHECK: [[VAL_195:%.*]] = call @earlyexit2([[VAL_194]]) : (i32) -> i1 +// CHECK: fir.result [[VAL_195]], [[VAL_194]] : i1, i32 +// CHECK: } + %newOk:2 = fir.iterate_while (%i = %c1 to %c100 step %c1) and (%ok_ = %ok) iter_args(%v = %k) -> (i32) { + %stop = call @earlyexit2(%v) : (i32) -> i1 + fir.result %stop, %v : i1, i32 + } +// CHECK: return [[VAL_190]] : i1 +// CHECK: } + return %newOk#0 : i1 } diff --git a/flang/test/Semantics/allocate11.f90 b/flang/test/Semantics/allocate11.f90 index 594bd1ded385f..01b9944019ae3 100644 --- a/flang/test/Semantics/allocate11.f90 +++ b/flang/test/Semantics/allocate11.f90 @@ -38,6 +38,7 @@ subroutine C937(var) type B type(A) y + !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'y%x') type(B), pointer :: forward real :: u end type @@ -47,6 +48,7 @@ subroutine C937(var) end type type D + !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'x') type(A), pointer :: potential end type diff --git a/flang/test/Semantics/call12.f90 b/flang/test/Semantics/call12.f90 index e25a2608c4411..65da46b067d6c 100644 --- a/flang/test/Semantics/call12.f90 +++ b/flang/test/Semantics/call12.f90 @@ -15,7 +15,7 @@ module m real, pointer :: p end type type :: hasCoarray - real :: co[*] + real, allocatable :: co[:] end type contains pure function test(ptr, in, hpd) diff --git a/flang/test/Semantics/call14.f90 b/flang/test/Semantics/call14.f90 index b874e6b009125..ee5086511de3b 100644 --- a/flang/test/Semantics/call14.f90 +++ b/flang/test/Semantics/call14.f90 @@ -3,7 +3,7 @@ module m type :: hasCoarray - real :: coarray[*] + real, allocatable :: coarray[:] end type contains !ERROR: VALUE attribute may apply only to a dummy data object diff --git a/flang/test/Semantics/misc-declarations.f90 b/flang/test/Semantics/misc-declarations.f90 index 7680eed793bce..f627836b3732c 100644 --- a/flang/test/Semantics/misc-declarations.f90 +++ b/flang/test/Semantics/misc-declarations.f90 @@ -4,12 +4,12 @@ ! - 8.5.19 constraints on the VOLATILE attribute module m - !ERROR: ALLOCATABLE coarray must have a deferred coshape + !ERROR: 'mustbedeferred' is an ALLOCATABLE coarray and must have a deferred coshape real, allocatable :: mustBeDeferred[*] ! C827 - !ERROR: Non-ALLOCATABLE coarray must have an explicit coshape + !ERROR: Component 'mustbeexplicit' is a non-ALLOCATABLE coarray and must have an explicit coshape real :: mustBeExplicit[:] ! C828 type :: hasCoarray - real :: coarray[*] + real, allocatable :: coarray[:] end type real :: coarray[*] type(hasCoarray) :: coarrayComponent diff --git a/flang/test/Semantics/modfile24.f90 b/flang/test/Semantics/modfile24.f90 index ec446f9e8d3c3..45f6c0545627f 100644 --- a/flang/test/Semantics/modfile24.f90 +++ b/flang/test/Semantics/modfile24.f90 @@ -36,8 +36,8 @@ module m2 ! coarray-spec in components and with non-constants bounds module m3 type t - real :: c[1:10,1:*] - complex, codimension[5,*] :: d + real, allocatable :: c[:,:] + complex, allocatable, codimension[:,:] :: d end type real, allocatable :: e[:,:,:] contains @@ -50,8 +50,8 @@ subroutine s(a, b, n) !Expect: m3.mod !module m3 ! type::t -! real(4)::c[1_8:10_8,1_8:*] -! complex(4)::d[1_8:5_8,1_8:*] +! real(4),allocatable::c[:,:] +! complex(4),allocatable::d[:,:] ! end type ! real(4),allocatable::e[:,:,:] !contains diff --git a/flang/test/Semantics/resolve33.f90 b/flang/test/Semantics/resolve33.f90 index 176404b9db63e..7df5ba935ab0c 100644 --- a/flang/test/Semantics/resolve33.f90 +++ b/flang/test/Semantics/resolve33.f90 @@ -2,6 +2,12 @@ ! Derived type parameters ! C731 The same type-param-name shall not appear more than once in a given ! derived-type-stmt. +! C741 A type-param-name in a type-param-def-stmt in a derived-type-def shall +! be one of the type-paramnames in the derived-type-stmt of that +! derived-type-def. +! C742 Each type-param-name in the derived-type-stmt in a derived-type-def +! shall appear exactly once as a type-param-name in a type-param-def-stmt +! in that derived-type-def. module m !ERROR: Duplicate type parameter name: 'a' @@ -30,4 +36,11 @@ module m !ERROR: Type parameter, component, or procedure binding 'a' already defined in this type integer, len :: a end type + !ERROR: No definition found for type parameter 'k' + !ERROR: No definition found for type parameter 'l' + type :: t6(k, l) + !ERROR: Must be a constant value + character(kind=k, len=l) :: d3 + end type + type(t6(2, 10)) :: x3 end module diff --git a/flang/test/Semantics/resolve44.f90 b/flang/test/Semantics/resolve44.f90 index 2d8b701787537..41ab06ffb6c6a 100644 --- a/flang/test/Semantics/resolve44.f90 +++ b/flang/test/Semantics/resolve44.f90 @@ -1,5 +1,8 @@ ! RUN: %B/test/Semantics/test_errors.sh %s %flang %t ! Error tests for recursive use of derived types. +! C744 If neither the POINTER nor the ALLOCATABLE attribute is specified, the +! declaration-type-spec in the component-def-stmt shall specify an intrinsic +! type or a previously defined derived type. program main type :: recursive1 diff --git a/flang/test/Semantics/resolve88.f90 b/flang/test/Semantics/resolve88.f90 new file mode 100644 index 0000000000000..50135297241c4 --- /dev/null +++ b/flang/test/Semantics/resolve88.f90 @@ -0,0 +1,75 @@ +! RUN: %B/test/Semantics/test_errors.sh %s %flang %t +! C746, C747, and C748 +module m + use ISO_FORTRAN_ENV + use ISO_C_BINDING + + ! C746 If a coarray-spec appears, it shall be a deferred-coshape-spec-list and + ! the component shall have the ALLOCATABLE attribute. + + type testCoArrayType + real, allocatable, codimension[:] :: allocatableField + !ERROR: Component 'deferredfield' is a coarray and must have the ALLOCATABLE attribute + real, codimension[:] :: deferredField + !ERROR: 'pointerfield' may not have the POINTER attribute because it is a coarray + !ERROR: Component 'pointerfield' is a coarray and must have the ALLOCATABLE attribute + real, pointer, codimension[:] :: pointerField + !ERROR: Component 'realfield' is a coarray and must have the ALLOCATABLE attribute and have a deferred coshape + real, codimension[*] :: realField + !ERROR: 'realfield2' is an ALLOCATABLE coarray and must have a deferred coshape + real, allocatable, codimension[*] :: realField2 + end type testCoArrayType + + ! C747 If a coarray-spec appears, the component shall not be of type C_PTR or + ! C_FUNPTR from the intrinsic module ISO_C_BINDING (18.2), or of type + ! TEAM_TYPE from the intrinsic module ISO_FORTRAN_ENV (16.10.2). + + type goodCoarrayType + real, allocatable, codimension[:] :: field + end type goodCoarrayType + + type goodTeam_typeCoarrayType + type(team_type), allocatable :: field + end type goodTeam_typeCoarrayType + + type goodC_ptrCoarrayType + type(c_ptr), allocatable :: field + end type goodC_ptrCoarrayType + + type goodC_funptrCoarrayType + type(c_funptr), allocatable :: field + end type goodC_funptrCoarrayType + + type team_typeCoarrayType + !ERROR: A coarray component may not be of type TEAM_TYPE from ISO_FORTRAN_ENV + type(team_type), allocatable, codimension[:] :: field + end type team_typeCoarrayType + + type c_ptrCoarrayType + !ERROR: A coarray component may not be of type C_PTR or C_FUNPTR from ISO_C_BINDING + type(c_ptr), allocatable, codimension[:] :: field + end type c_ptrCoarrayType + + type c_funptrCoarrayType + !ERROR: A coarray component may not be of type C_PTR or C_FUNPTR from ISO_C_BINDING + type(c_funptr), allocatable, codimension[:] :: field + end type c_funptrCoarrayType + +! C748 A data component whose type has a coarray ultimate component shall be a +! nonpointer nonallocatable scalar and shall not be a coarray. + + type coarrayType + real, allocatable, codimension[:] :: goodCoarrayField + end type coarrayType + + type testType + type(coarrayType) :: goodField + !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'goodcoarrayfield') + type(coarrayType), pointer :: pointerField + !ERROR: A component with a POINTER or ALLOCATABLE attribute may not be of a type with a coarray ultimate component (named 'goodcoarrayfield') + type(coarrayType), allocatable :: allocatableField + !ERROR: An array or coarray component may not be of a type with a coarray ultimate component (named 'goodcoarrayfield') + type(coarrayType), dimension(3) :: arrayField + end type testType + +end module m diff --git a/flang/test/Semantics/resolve89.f90 b/flang/test/Semantics/resolve89.f90 new file mode 100644 index 0000000000000..883970f30edf8 --- /dev/null +++ b/flang/test/Semantics/resolve89.f90 @@ -0,0 +1,110 @@ +! RUN: %B/test/Semantics/test_errors.sh %s %flang %t +! C750 Each bound in the explicit-shape-spec shall be a specification +! expression in which there are no references to specification functions or +! the intrinsic functions ALLOCATED, ASSOCIATED, EXTENDS_- TYPE_OF, PRESENT, +! or SAME_TYPE_AS, every specification inquiry reference is a constant +! expression, and the value does not depend on the value of a variable. +impure function impureFunc() + integer :: impureFunc + + impureFunc = 3 +end function impureFunc + +pure function pureFunc() + integer :: pureFunc + + pureFunc = 3 +end function pureFunc + +module m + real, allocatable :: mVar +end module m + +subroutine s(iArg, allocArg, pointerArg, arrayArg, ioArg, optionalArg) + use m + implicit logical(l) + integer, intent(in) :: iArg + real, allocatable, intent(in) :: allocArg + real, pointer, intent(in) :: pointerArg + integer, dimension(:), intent(in) :: arrayArg + integer, intent(inout) :: ioArg + real, optional, intent(in) :: optionalArg + + ! These declarations are OK since they're not in a derived type + real :: realVar + real, volatile :: volatileVar + real, dimension(merge(1, 2, allocated(allocArg))) :: realVar1 + real, dimension(merge(1, 2, associated(pointerArg))) :: realVar2 + real, dimension(merge(1, 2, is_contiguous(arrayArg))) :: realVar3 + real, dimension(ioArg) :: realVar4 + real, dimension(merge(1, 2, present(optionalArg))) :: realVar5 + + ! statement functions referenced below + iVolatileStmtFunc() = 3 * volatileVar + iImpureStmtFunc() = 3 * impureFunc() + iPureStmtFunc() = 3 * pureFunc() + + ! This is OK + real, dimension(merge(1, 2, allocated(mVar))) :: rVar + + + integer :: var = 3 + !ERROR: Invalid specification expression: reference to impure function 'ivolatilestmtfunc' + real, dimension(iVolatileStmtFunc()) :: arrayVarWithVolatile + !ERROR: Invalid specification expression: reference to impure function 'iimpurestmtfunc' + real, dimension(iImpureStmtFunc()) :: arrayVarWithImpureFunction + !ERROR: Invalid specification expression: reference to statement function 'ipurestmtfunc' + real, dimension(iPureStmtFunc()) :: arrayVarWithPureFunction + real, dimension(iabs(iArg)) :: arrayVarWithIntrinsic + + type arrayType + !ERROR: Invalid specification expression: reference to variable 'var' not allowed for derived type components + real, dimension(var) :: varField + !ERROR: Invalid specification expression: reference to impure function 'ivolatilestmtfunc' + real, dimension(iVolatileStmtFunc()) :: arrayFieldWithVolatile + !ERROR: Invalid specification expression: reference to impure function 'iimpurestmtfunc' + real, dimension(iImpureStmtFunc()) :: arrayFieldWithImpureFunction + !ERROR: Invalid specification expression: reference to statement function 'ipurestmtfunc' + real, dimension(iPureStmtFunc()) :: arrayFieldWithPureFunction + !ERROR: Invalid specification expression: reference to variable 'iarg' not allowed for derived type components + real, dimension(iabs(iArg)) :: arrayFieldWithIntrinsic + !ERROR: Invalid specification expression: reference to intrinsic 'allocated' not allowed for derived type components + real, dimension(merge(1, 2, allocated(allocArg))) :: realField1 + !ERROR: Invalid specification expression: reference to intrinsic 'associated' not allowed for derived type components + real, dimension(merge(1, 2, associated(pointerArg))) :: realField2 + !ERROR: Invalid specification expression: non-constant reference to inquiry intrinsic 'is_contiguous' not allowed for derived type components + real, dimension(merge(1, 2, is_contiguous(arrayArg))) :: realField3 + !ERROR: Invalid specification expression: reference to variable 'ioarg' not allowed for derived type components + real, dimension(ioArg) :: realField4 + !ERROR: Invalid specification expression: reference to intrinsic 'present' not allowed for derived type components + real, dimension(merge(1, 2, present(optionalArg))) :: realField5 + end type arrayType + +end subroutine s + +subroutine s1() + ! C750, check for a constant specification inquiry that's a type parameter + ! inquiry which are defined in 9.4.5 + type derived(kindParam, lenParam) + integer, kind :: kindParam = 3 + integer, len :: lenParam = 3 + end type + + contains + subroutine inner (derivedArg) + type(derived), intent(in), dimension(3) :: derivedArg + integer :: localInt + + type(derived), parameter :: localderived = derived() + + type localDerivedType + ! OK because the specification inquiry is a constant + integer, dimension(localDerived%kindParam) :: goodField + !ERROR: Invalid specification expression: non-constant reference to a type parameter inquiry not allowed for derived type components + integer, dimension(derivedArg%lenParam) :: badField + end type localDerivedType + + ! OK because we're not defining a component + integer, dimension(derivedArg%kindParam) :: localVar + end subroutine inner +end subroutine s1 diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 439f9710ef660..c1aa851097b7c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -57,18 +57,14 @@ config.substitutions.append(('%B', config.flang_obj_root)) # For each occurrence of a flang tool name, replace it with the full path to -# the build directory holding that tool. We explicitly specify the directories -# to search to ensure that we get the tools just built and not some random -# tools that might happen to be in the user's PATH. -tool_dirs = [config.llvm_tools_dir, config.flang_tools_dir] -flang_includes = "-I" + config.flang_intrinsic_modules_dir - -tools = [ToolSubst('%flang', command=FindTool('flang'), unresolved='fatal'), - ToolSubst('%f18', command=FindTool('f18'), unresolved='fatal'), - ToolSubst('%f18_with_includes', command=FindTool('f18'), - extra_args=[flang_includes], unresolved='fatal')] - -llvm_config.add_tool_substitutions(tools, tool_dirs) +# the build directory holding that tool. +tools = [ + ToolSubst('%flang', command=FindTool('flang'), unresolved='fatal'), + ToolSubst('%f18', command=FindTool('f18'), unresolved='fatal'), + ToolSubst('%f18_with_includes', command=FindTool('f18'), + extra_args=["-I" + config.flang_intrinsic_modules_dir], unresolved='fatal') +] +llvm_config.add_tool_substitutions(tools, [config.flang_llvm_tools_dir]) # Enable libpgmath testing result = lit_config.params.get("LIBPGMATH") diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index 92bd926ab5cac..e8e2945a2cbf0 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -7,7 +7,7 @@ config.flang_obj_root = "@FLANG_BINARY_DIR@" config.flang_src_dir = "@FLANG_SOURCE_DIR@" config.flang_tools_dir = "@FLANG_TOOLS_DIR@" config.flang_intrinsic_modules_dir = "@FLANG_INTRINSIC_MODULES_DIR@" -config.flang_llvm_tools_dir = "@LLVM_RUNTIME_OUTPUT_INTDIR@" +config.flang_llvm_tools_dir = "@CMAKE_BINARY_DIR@/bin" config.python_executable = "@PYTHON_EXECUTABLE@" # Support substitution of the tools_dir with user parameters. This is diff --git a/flang/tools/f18-parse-demo/CMakeLists.txt b/flang/tools/f18-parse-demo/CMakeLists.txt index ab13d602542f2..d60d7d6d5cbde 100644 --- a/flang/tools/f18-parse-demo/CMakeLists.txt +++ b/flang/tools/f18-parse-demo/CMakeLists.txt @@ -5,5 +5,6 @@ add_flang_tool(f18-parse-demo target_link_libraries(f18-parse-demo PRIVATE + FortranCommon FortranParser ) diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 70e09ef55144e..86434b253befd 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -1,15 +1,18 @@ +set(LLVM_LINK_COMPONENTS + Support + ) add_flang_tool(f18 dump.cpp f18.cpp -) + ) target_link_libraries(f18 PRIVATE + FortranCommon FortranParser FortranEvaluate FortranSemantics FortranLower - LLVMSupport ) set(MODULES diff --git a/flang/unittests/Evaluate/CMakeLists.txt b/flang/unittests/Evaluate/CMakeLists.txt index 54c816ef6c55b..21aa2edfccb74 100644 --- a/flang/unittests/Evaluate/CMakeLists.txt +++ b/flang/unittests/Evaluate/CMakeLists.txt @@ -41,6 +41,7 @@ add_executable(expression-test ) target_link_libraries(expression-test + FortranCommon FortranEvaluateTesting FortranEvaluate FortranSemantics @@ -64,6 +65,7 @@ add_executable(intrinsics-test ) target_link_libraries(intrinsics-test + FortranCommon FortranEvaluateTesting FortranEvaluate FortranDecimal @@ -130,6 +132,7 @@ add_executable(folding-test ) target_link_libraries(folding-test + FortranCommon FortranEvaluateTesting FortranEvaluate FortranSemantics diff --git a/libc/cmake/modules/LLVMLibCLibraryRules.cmake b/libc/cmake/modules/LLVMLibCLibraryRules.cmake index ca79ad123ce4a..4c9f9a6ae982f 100644 --- a/libc/cmake/modules/LLVMLibCLibraryRules.cmake +++ b/libc/cmake/modules/LLVMLibCLibraryRules.cmake @@ -32,6 +32,40 @@ function(get_object_files_for_entrypoint_library result) set(${result} ${object_files} PARENT_SCOPE) endfunction() +# This is a helper function and not a build rule. Given an entrypoint object +# target, it returns the object file produced by this target in |result|. +# If the given entrypoint target is an alias, then it traverses up to the +# aliasee to get the object file. +function(get_entrypoint_object_file entrypoint_target result) + get_target_property(target_type ${entrypoint_target} "TARGET_TYPE") + if(NOT (${target_type} STREQUAL ${ENTRYPOINT_OBJ_TARGET_TYPE})) + message(FATAL_ERROR + "Expected an target added using `add_entrypoint_object` rule.") + endif() + + get_target_property(objfile ${entrypoint_target} "OBJECT_FILE") + if(objfile) + set(${result} ${objfile} PARENT_SCOPE) + return() + endif() + + # If the entrypoint is an alias, fetch the object file from the aliasee. + get_target_property(is_alias ${entrypoint_target} "IS_ALIAS") + if(is_alias) + get_target_property(aliasee ${entrypoint_target} "DEPS") + if(NOT aliasee) + message(FATAL_ERROR + "Entrypoint alias ${entrypoint_target} does not have an aliasee.") + endif() + get_entrypoint_object_file(${aliasee} objfile) + set(${result} ${objfile} PARENT_SCOPE) + return() + endif() + + message(FATAL_ERROR + "Entrypoint ${entrypoint_target} does not produce an object file.") +endfunction(get_entrypoint_object_file) + # A rule to build a library from a collection of entrypoint objects. # Usage: # add_entrypoint_library( @@ -62,7 +96,7 @@ function(add_entrypoint_library target_name) message(FATAL_ERROR "Dependency '${dep}' of 'add_entrypoint_collection' is " "not an 'add_entrypoint_object' target.") endif() - get_target_property(objfile ${dep} "OBJECT_FILE") + get_entrypoint_object_file(${dep} objfile) list(APPEND obj_list ${objfile}) endforeach(dep) list(REMOVE_DUPLICATES obj_list) diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake index 947154ce392e1..e4bf15723129d 100644 --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -110,6 +110,7 @@ function(add_entrypoint_object target_name) ${fq_target_name} PROPERTIES "TARGET_TYPE" ${ENTRYPOINT_OBJ_TARGET_TYPE} + "IS_ALIAS" "YES" "OBJECT_FILE" "" "OBJECT_FILE_RAW" "" "DEPS" "${fq_dep_name}" diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 16bb0d79fe930..7d81c0d7e95be 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -117,6 +117,20 @@ def IsNanMacro : MacroDef<"isnan"> { }]; } +def FloatT : TypeDecl<"float_t"> { + let Decl = [{ + #define __need_float_t + #include <__llvm-libc-stdc-types.h> + }]; +} + +def DoubleT : TypeDecl<"double_t"> { + let Decl = [{ + #define __need_double_t + #include <__llvm-libc-stdc-types.h> + }]; +} + def MathAPI : PublicAPI<"math.h"> { let Macros = [ SimpleMacroDef<"MATH_ERRNO", "1">, @@ -130,6 +144,10 @@ def MathAPI : PublicAPI<"math.h"> { IsInfMacro, IsNanMacro, ]; + let TypeDeclarations = [ + DoubleT, + FloatT, + ]; let Functions = [ "cosf", "round", diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index f9564b323494e..4dcd69a62bfbf 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -11,6 +11,12 @@ add_header( __posix-types.h ) +add_header( + stdc_types + HDR + __llvm-libc-stdc-types.h +) + add_header( ctype HDR @@ -25,6 +31,7 @@ add_gen_header( GEN_HDR math.h DEPENDS .llvm_libc_common_h + .stdc_types ) add_gen_header( diff --git a/libc/include/__llvm-libc-stdc-types.h b/libc/include/__llvm-libc-stdc-types.h new file mode 100644 index 0000000000000..8e3bad652df02 --- /dev/null +++ b/libc/include/__llvm-libc-stdc-types.h @@ -0,0 +1,37 @@ +//===-- Definitions of common types from the C standard. ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This header file does not have a header guard. It is internal to LLVM libc +// and intended to be used to pick specific definitions without polluting the +// public headers with unnecessary definitions. + +#undef __LLVM_LIBC_FLOAT_T +#undef __LLVM_LIBC_DOUBLE_T + +#if !defined(__FLT_EVAL_METHOD__) || __FLT_EVAL_METHOD__ == 0 +#define __LLVM_LIBC_FLOAT_T float +#define __LLVM_LIBC_DOUBLE_T double +#elif __FLT_EVAL_METHOD__ == 1 +#define __LLVM_LIBC_FLOAT_T double +#define __LLVM_LIBC_DOUBLE_T double +#elif __FLT_EVAL_METHOD__ == 2 +#define __LLVM_LIBC_FLOAT_T long double +#define __LLVM_LIBC_DOUBLE_T long double +#else +#error "Unsupported __FLT_EVAL_METHOD__ value." +#endif + +#if defined(__need_float_t) && !defined(__llvm_libc_float_t_defined) +typedef __LLVM_LIBC_FLOAT_T float_t; +#define __llvm_libc_float_t_defined +#endif // __need_float_t + +#if defined(__need_double_t) && !defined(__llvm_libc_double_t_defined) +typedef __LLVM_LIBC_DOUBLE_T double_t; +#define __llvm_libc_double_t_defined +#endif // __need_double_t diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt index 5b31ad231fc0b..561ab38bc94d0 100644 --- a/libc/lib/CMakeLists.txt +++ b/libc/lib/CMakeLists.txt @@ -26,6 +26,7 @@ add_entrypoint_library( libc.src.string.memcpy libc.src.string.strcpy libc.src.string.strcat + libc.src.string.strlen # sys/mman.h entrypoints libc.src.sys.mman.mmap diff --git a/libc/test/config/linux/x86_64/syscall_test.cpp b/libc/test/config/linux/x86_64/syscall_test.cpp index b2487332947ea..160255c511f03 100644 --- a/libc/test/config/linux/x86_64/syscall_test.cpp +++ b/libc/test/config/linux/x86_64/syscall_test.cpp @@ -15,30 +15,30 @@ TEST(X86_64_SyscallTest, APITest) { // We only do a signature test here. Actual functionality tests are // done by the unit tests of the syscall wrappers like mmap. - using __llvm_libc::cpp::function; + using __llvm_libc::cpp::Function; - function f([](long n) { return __llvm_libc::syscall(n); }); - function f1( + Function f([](long n) { return __llvm_libc::syscall(n); }); + Function f1( [](long n, long a1) { return __llvm_libc::syscall(n, a1); }); - function f2( + Function f2( [](long n, long a1, long a2) { return __llvm_libc::syscall(n, a1, a2); }); - function f3( + Function f3( [](long n, long a1, long a2, long a3) { return __llvm_libc::syscall(n, a1, a2, a3); }); - function f4( + Function f4( [](long n, long a1, long a2, long a3, long a4) { return __llvm_libc::syscall(n, a1, a2, a3, a4); }); - function f5( + Function f5( [](long n, long a1, long a2, long a3, long a4, long a5) { return __llvm_libc::syscall(n, a1, a2, a3, a4, a5); }); - function f6( + Function f6( [](long n, long a1, long a2, long a3, long a4, long a5, long a6) { return __llvm_libc::syscall(n, a1, a2, a3, a4, a5, a6); }); - function notLongType( + Function notLongType( [](long n, void *a1) { return __llvm_libc::syscall(n, a1); }); } diff --git a/libc/test/src/math/cosf_test.cpp b/libc/test/src/math/cosf_test.cpp index 94c66cda1b0fe..54bba168cadf4 100644 --- a/libc/test/src/math/cosf_test.cpp +++ b/libc/test/src/math/cosf_test.cpp @@ -76,7 +76,7 @@ TEST(CosfTest, InFloatRange) { float x = as_float(v); if (isnan(x) || isinf(x)) continue; - EXPECT_TRUE(mpfr::equalsCos(x, __llvm_libc::cosf(x), tolerance)); + ASSERT_MPFR_MATCH(mpfr::OP_Cos, x, __llvm_libc::cosf(x), tolerance); } } @@ -84,12 +84,12 @@ TEST(CosfTest, InFloatRange) { TEST(CosfTest, SmallValues) { float x = as_float(0x17800000); float result = __llvm_libc::cosf(x); - EXPECT_TRUE(mpfr::equalsCos(x, result, tolerance)); + EXPECT_MPFR_MATCH(mpfr::OP_Cos, x, result, tolerance); EXPECT_EQ(FloatBits::One, as_uint32_bits(result)); - x = as_float(0x00400000); + x = as_float(0x0040000); result = __llvm_libc::cosf(x); - EXPECT_TRUE(mpfr::equalsCos(x, result, tolerance)); + EXPECT_MPFR_MATCH(mpfr::OP_Cos, x, result, tolerance); EXPECT_EQ(FloatBits::One, as_uint32_bits(result)); } @@ -98,6 +98,6 @@ TEST(CosfTest, SmallValues) { TEST(CosfTest, SDCOMP_26094) { for (uint32_t v : sdcomp26094Values) { float x = as_float(v); - EXPECT_TRUE(mpfr::equalsCos(x, __llvm_libc::cosf(x), tolerance)); + ASSERT_MPFR_MATCH(mpfr::OP_Cos, x, __llvm_libc::cosf(x), tolerance); } } diff --git a/libc/test/src/math/sincosf_test.cpp b/libc/test/src/math/sincosf_test.cpp index 36e6b4a129a7c..93b827a2ad374 100644 --- a/libc/test/src/math/sincosf_test.cpp +++ b/libc/test/src/math/sincosf_test.cpp @@ -87,8 +87,8 @@ TEST(SinCosfTest, InFloatRange) { float sin, cos; __llvm_libc::sincosf(x, &sin, &cos); - EXPECT_TRUE(mpfr::equalsCos(x, cos, tolerance)); - EXPECT_TRUE(mpfr::equalsSin(x, sin, tolerance)); + ASSERT_MPFR_MATCH(mpfr::OP_Cos, x, cos, tolerance); + ASSERT_MPFR_MATCH(mpfr::OP_Sin, x, sin, tolerance); } } @@ -98,16 +98,16 @@ TEST(SinCosfTest, SmallValues) { float x = as_float(bits); float result_cos, result_sin; __llvm_libc::sincosf(x, &result_sin, &result_cos); - EXPECT_TRUE(mpfr::equalsCos(x, result_cos, tolerance)); - EXPECT_TRUE(mpfr::equalsSin(x, result_sin, tolerance)); + EXPECT_MPFR_MATCH(mpfr::OP_Cos, x, result_cos, tolerance); + EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, result_sin, tolerance); EXPECT_EQ(FloatBits::One, as_uint32_bits(result_cos)); EXPECT_EQ(bits, as_uint32_bits(result_sin)); bits = 0x00400000; x = as_float(bits); __llvm_libc::sincosf(x, &result_sin, &result_cos); - EXPECT_TRUE(mpfr::equalsCos(x, result_cos, tolerance)); - EXPECT_TRUE(mpfr::equalsSin(x, result_sin, tolerance)); + EXPECT_MPFR_MATCH(mpfr::OP_Cos, x, result_cos, tolerance); + EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, result_sin, tolerance); EXPECT_EQ(FloatBits::One, as_uint32_bits(result_cos)); EXPECT_EQ(bits, as_uint32_bits(result_sin)); } @@ -119,7 +119,7 @@ TEST(SinCosfTest, SDCOMP_26094) { float x = as_float(v); float sin, cos; __llvm_libc::sincosf(x, &sin, &cos); - EXPECT_TRUE(mpfr::equalsCos(x, cos, tolerance)); - EXPECT_TRUE(mpfr::equalsSin(x, sin, tolerance)); + EXPECT_MPFR_MATCH(mpfr::OP_Cos, x, cos, tolerance); + EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, sin, tolerance); } } diff --git a/libc/test/src/math/sinf_test.cpp b/libc/test/src/math/sinf_test.cpp index e4c6e818b57a3..c0ce0755964c9 100644 --- a/libc/test/src/math/sinf_test.cpp +++ b/libc/test/src/math/sinf_test.cpp @@ -76,13 +76,13 @@ TEST(SinfTest, InFloatRange) { float x = as_float(v); if (isnan(x) || isinf(x)) continue; - EXPECT_TRUE(mpfr::equalsSin(x, __llvm_libc::sinf(x), tolerance)); + ASSERT_MPFR_MATCH(mpfr::OP_Sin, x, __llvm_libc::sinf(x), tolerance); } } TEST(SinfTest, SpecificBitPatterns) { float x = as_float(0xc70d39a1); - EXPECT_TRUE(mpfr::equalsSin(x, __llvm_libc::sinf(x), tolerance)); + EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, __llvm_libc::sinf(x), tolerance); } // For small values, sin(x) is x. @@ -90,13 +90,13 @@ TEST(SinfTest, SmallValues) { uint32_t bits = 0x17800000; float x = as_float(bits); float result = __llvm_libc::sinf(x); - EXPECT_TRUE(mpfr::equalsSin(x, result, tolerance)); + EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, result, tolerance); EXPECT_EQ(bits, as_uint32_bits(result)); bits = 0x00400000; x = as_float(bits); result = __llvm_libc::sinf(x); - EXPECT_TRUE(mpfr::equalsSin(x, result, tolerance)); + EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, result, tolerance); EXPECT_EQ(bits, as_uint32_bits(result)); } @@ -105,6 +105,6 @@ TEST(SinfTest, SmallValues) { TEST(SinfTest, SDCOMP_26094) { for (uint32_t v : sdcomp26094Values) { float x = as_float(v); - EXPECT_TRUE(mpfr::equalsSin(x, __llvm_libc::sinf(x), tolerance)); + EXPECT_MPFR_MATCH(mpfr::OP_Sin, x, __llvm_libc::sinf(x), tolerance); } } diff --git a/libc/utils/CPP/Functional.h b/libc/utils/CPP/Functional.h index 70d3fe9867a84..da73e4cb19802 100644 --- a/libc/utils/CPP/Functional.h +++ b/libc/utils/CPP/Functional.h @@ -12,14 +12,14 @@ namespace __llvm_libc { namespace cpp { -template class function; +template class Function; -template class function { +template class Function { Ret (*func)(Params...) = nullptr; public: - constexpr function() = default; - template constexpr function(Func &&f) : func(f) {} + constexpr Function() = default; + template constexpr Function(Func &&f) : func(f) {} constexpr Ret operator()(Params... params) { return func(params...); } }; diff --git a/libc/utils/CPP/TypeTraits.h b/libc/utils/CPP/TypeTraits.h index 81e8e68f09d69..dfc16b00ab745 100644 --- a/libc/utils/CPP/TypeTraits.h +++ b/libc/utils/CPP/TypeTraits.h @@ -46,6 +46,22 @@ template struct IsPointerType : public TrueValue {}; template struct IsSame : public FalseValue {}; template struct IsSame : public TrueValue {}; +template struct TypeIdentity { typedef T Type; }; + +template struct RemoveCV : public TypeIdentity {}; +template struct RemoveCV : public TypeIdentity {}; +template struct RemoveCV : public TypeIdentity {}; +template +struct RemoveCV : public TypeIdentity {}; + +template using RemoveCVType = typename RemoveCV::Type; + +template struct IsFloatingPointType { + static constexpr bool Value = IsSame>::Value || + IsSame>::Value || + IsSame>::Value; +}; + } // namespace cpp } // namespace __llvm_libc diff --git a/libc/utils/MPFRWrapper/CMakeLists.txt b/libc/utils/MPFRWrapper/CMakeLists.txt index 8de737485681e..218d5af9fc281 100644 --- a/libc/utils/MPFRWrapper/CMakeLists.txt +++ b/libc/utils/MPFRWrapper/CMakeLists.txt @@ -12,7 +12,8 @@ if(LIBC_TESTS_CAN_USE_MPFR) MPFRUtils.cpp MPFRUtils.h ) - target_link_libraries(libcMPFRWrapper -lmpfr -lgmp) + add_dependencies(libcMPFRWrapper libc.utils.CPP.standalone_cpp LibcUnitTest LLVMSupport) + target_link_libraries(libcMPFRWrapper -lmpfr -lgmp LibcUnitTest LLVMSupport) else() message(WARNING "Math tests using MPFR will be skipped.") endif() diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 7bd849934fc77..75ee2adaff5ae 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -8,8 +8,10 @@ #include "MPFRUtils.h" -#include +#include "llvm/ADT/StringRef.h" + #include +#include namespace __llvm_libc { namespace testing { @@ -25,11 +27,38 @@ class MPFRNumber { public: MPFRNumber() { mpfr_init2(value, mpfrPrecision); } - explicit MPFRNumber(float x) { + // We use explicit EnableIf specializations to disallow implicit + // conversions. Implicit conversions can potentially lead to loss of + // precision. + template ::Value, int> = 0> + explicit MPFRNumber(XType x) { mpfr_init2(value, mpfrPrecision); mpfr_set_flt(value, x, MPFR_RNDN); } + template ::Value, int> = 0> + explicit MPFRNumber(XType x) { + mpfr_init2(value, mpfrPrecision); + mpfr_set_d(value, x, MPFR_RNDN); + } + + template ::Value, int> = 0> + MPFRNumber(Operation op, XType rawValue) { + mpfr_init2(value, mpfrPrecision); + MPFRNumber mpfrInput(rawValue); + switch (op) { + case OP_Cos: + mpfr_cos(value, mpfrInput.value, MPFR_RNDN); + break; + case OP_Sin: + mpfr_sin(value, mpfrInput.value, MPFR_RNDN); + break; + } + } + MPFRNumber(const MPFRNumber &other) { mpfr_set(value, other.value, MPFR_RNDN); } @@ -59,38 +88,51 @@ class MPFRNumber { return mpfr_lessequal_p(difference.value, tolerance.value); } + std::string str() const { + // 200 bytes should be more than sufficient to hold a 100-digit number + // plus additional bytes for the decimal point, '-' sign etc. + constexpr size_t printBufSize = 200; + char buffer[printBufSize]; + mpfr_snprintf(buffer, printBufSize, "%100.50Rf", value); + llvm::StringRef ref(buffer); + ref = ref.trim(); + return ref.str(); + } + // These functions are useful for debugging. float asFloat() const { return mpfr_get_flt(value, MPFR_RNDN); } double asDouble() const { return mpfr_get_d(value, MPFR_RNDN); } void dump(const char *msg) const { mpfr_printf("%s%.128Rf\n", msg, value); } +}; -public: - static MPFRNumber cos(float x) { - MPFRNumber result; - MPFRNumber mpfrX(x); - mpfr_cos(result.value, mpfrX.value, MPFR_RNDN); - return result; - } +namespace internal { + +template +void MPFRMatcher::explainError(testutils::StreamWrapper &OS) { + MPFRNumber mpfrResult(operation, input); + MPFRNumber mpfrInput(input); + MPFRNumber mpfrMatchValue(matchValue); + OS << "Match value not within tolerance value of MPFR result:\n" + << "Operation input: " << mpfrInput.str() << '\n' + << " Match value: " << mpfrMatchValue.str() << '\n' + << " MPFR result: " << mpfrResult.str() << '\n'; +} - static MPFRNumber sin(float x) { - MPFRNumber result; - MPFRNumber mpfrX(x); - mpfr_sin(result.value, mpfrX.value, MPFR_RNDN); - return result; - } +template void MPFRMatcher::explainError(testutils::StreamWrapper &); +template void MPFRMatcher::explainError(testutils::StreamWrapper &); + +template +bool compare(Operation op, T input, T libcResult, const Tolerance &t) { + MPFRNumber mpfrResult(op, input); + MPFRNumber mpfrInput(input); + MPFRNumber mpfrLibcResult(libcResult); + return mpfrResult.isEqual(mpfrLibcResult, t); }; -bool equalsCos(float input, float libcOutput, const Tolerance &t) { - MPFRNumber mpfrResult = MPFRNumber::cos(input); - MPFRNumber libcResult(libcOutput); - return mpfrResult.isEqual(libcResult, t); -} +template bool compare(Operation, float, float, const Tolerance &); +template bool compare(Operation, double, double, const Tolerance &); -bool equalsSin(float input, float libcOutput, const Tolerance &t) { - MPFRNumber mpfrResult = MPFRNumber::sin(input); - MPFRNumber libcResult(libcOutput); - return mpfrResult.isEqual(libcResult, t); -} +} // namespace internal } // namespace mpfr } // namespace testing diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index 9f56ccc61fe63..31afd39b28957 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -9,6 +9,9 @@ #ifndef LLVM_LIBC_UTILS_TESTUTILS_MPFRUTILS_H #define LLVM_LIBC_UTILS_TESTUTILS_MPFRUTILS_H +#include "utils/CPP/TypeTraits.h" +#include "utils/UnitTest/Test.h" + #include namespace __llvm_libc { @@ -36,16 +39,57 @@ struct Tolerance { uint32_t bits; }; -// Return true if |libcOutput| is within the tolerance |t| of the cos(x) -// value as evaluated by MPFR. -bool equalsCos(float x, float libcOutput, const Tolerance &t); +enum Operation { + OP_Cos, + OP_Sin, +}; + +namespace internal { + +template +bool compare(Operation op, T input, T libcOutput, const Tolerance &t); + +template class MPFRMatcher : public testing::Matcher { + static_assert(__llvm_libc::cpp::IsFloatingPointType::Value, + "MPFRMatcher can only be used with floating point values."); + + Operation operation; + T input; + Tolerance tolerance; + T matchValue; + +public: + MPFRMatcher(Operation op, T testInput, Tolerance &t) + : operation(op), input(testInput), tolerance(t) {} -// Return true if |libcOutput| is within the tolerance |t| of the sin(x) -// value as evaluated by MPFR. -bool equalsSin(float x, float libcOutput, const Tolerance &t); + bool match(T libcResult) { + matchValue = libcResult; + return internal::compare(operation, input, libcResult, tolerance); + } + + void explainError(testutils::StreamWrapper &OS) override; +}; + +} // namespace internal + +template +internal::MPFRMatcher getMPFRMatcher(Operation op, T input, Tolerance t) { + static_assert( + __llvm_libc::cpp::IsFloatingPointType::Value, + "getMPFRMatcher can only be used to match floating point results."); + return internal::MPFRMatcher(op, input, t); +} } // namespace mpfr } // namespace testing } // namespace __llvm_libc +#define EXPECT_MPFR_MATCH(op, input, matchValue, tolerance) \ + EXPECT_THAT(matchValue, __llvm_libc::testing::mpfr::getMPFRMatcher( \ + op, input, tolerance)) + +#define ASSERT_MPFR_MATCH(op, input, matchValue, tolerance) \ + ASSERT_THAT(matchValue, __llvm_libc::testing::mpfr::getMPFRMatcher( \ + op, input, tolerance)) + #endif // LLVM_LIBC_UTILS_TESTUTILS_MPFRUTILS_H diff --git a/libc/utils/benchmarks/CMakeLists.txt b/libc/utils/benchmarks/CMakeLists.txt index d5926d67ab687..49eba9bccbea2 100644 --- a/libc/utils/benchmarks/CMakeLists.txt +++ b/libc/utils/benchmarks/CMakeLists.txt @@ -24,6 +24,7 @@ ExternalProject_Add(google-benchmark -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER} -DCMAKE_CXX_FLAGS:STRING=${GOOGLE_BENCHMARK_TARGET_FLAGS} + -DCMAKE_CXX_STANDARD:STRING=14 -DCMAKE_BUILD_TYPE:STRING=RELEASE -DCMAKE_INSTALL_PREFIX:PATH= -DBENCHMARK_ENABLE_TESTING:BOOL=OFF) diff --git a/libc/utils/benchmarks/JSON.cpp b/libc/utils/benchmarks/JSON.cpp index 175aa2209ef20..2f37041449837 100644 --- a/libc/utils/benchmarks/JSON.cpp +++ b/libc/utils/benchmarks/JSON.cpp @@ -17,9 +17,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MathExtras.h" + #include #include #include +#include #include namespace llvm { @@ -48,7 +50,7 @@ static Error fromJson(const json::Value &V, double &Out) { static Error fromJson(const json::Value &V, std::string &Out) { if (auto S = V.getAsString()) { - Out = *S; + Out = std::string(*S); return Error::success(); } return createStringError(errc::io_error, "Can't parse String"); diff --git a/libc/utils/benchmarks/LibcBenchmark.cpp b/libc/utils/benchmarks/LibcBenchmark.cpp index b1ab79ec1e3c9..cef595d75e0d9 100644 --- a/libc/utils/benchmarks/LibcBenchmark.cpp +++ b/libc/utils/benchmarks/LibcBenchmark.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "LibcBenchmark.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Host.h" namespace llvm { diff --git a/libc/utils/benchmarks/LibcMemoryBenchmarkMain.cpp b/libc/utils/benchmarks/LibcMemoryBenchmarkMain.cpp index e30597187c52c..79774c3526a04 100644 --- a/libc/utils/benchmarks/LibcMemoryBenchmarkMain.cpp +++ b/libc/utils/benchmarks/LibcMemoryBenchmarkMain.cpp @@ -17,6 +17,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include + namespace llvm { namespace libc_benchmarks { @@ -61,7 +63,7 @@ void Main() { size_t Steps = 0; for (auto FunctionName : Runner->getFunctionNames()) { FunctionMeasurements FM; - FM.Name = FunctionName; + FM.Name = std::string(FunctionName); for (size_t Run = 0; Run < Runs; ++Run) { for (uint32_t Size = SR.From; Size <= SR.To; Size += SR.Step) { const auto Result = Runner->benchmark(S.Options, FunctionName, Size); diff --git a/libc/utils/testutils/StreamWrapper.cpp b/libc/utils/testutils/StreamWrapper.cpp index b8a693d767ce7..f6318a9934018 100644 --- a/libc/utils/testutils/StreamWrapper.cpp +++ b/libc/utils/testutils/StreamWrapper.cpp @@ -10,6 +10,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include namespace __llvm_libc { namespace testutils { @@ -41,6 +42,7 @@ template StreamWrapper & template StreamWrapper & StreamWrapper::operator<<(unsigned long long t); template StreamWrapper &StreamWrapper::operator<<(bool t); +template StreamWrapper &StreamWrapper::operator<<(std::string t); } // namespace testutils } // namespace __llvm_libc diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index b05cad79d76ac..bdb2d56da8539 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -41,11 +41,33 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL endif() if (LIBCXX_STANDALONE_BUILD) - include(FindPythonInterp) - if( NOT PYTHONINTERP_FOUND ) - message(WARNING "Failed to find python interpreter. " - "The libc++ test suite will be disabled.") - set(LLVM_INCLUDE_TESTS OFF) + if(CMAKE_VERSION VERSION_LESS 3.12) + include(FindPythonInterp) + if( NOT PYTHONINTERP_FOUND ) + message(WARNING "Failed to find python interpreter. " + "The libc++ test suite will be disabled.") + set(LLVM_INCLUDE_TESTS OFF) + else() + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) + set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) + endif() + else() + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") + endif() + + # Treat python2 as python3 + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) + endif() endif() endif() @@ -190,7 +212,7 @@ set(ENABLE_LINKER_SCRIPT_DEFAULT_VALUE OFF) if (LLVM_HAVE_LINK_VERSION_SCRIPT AND NOT LIBCXX_STATICALLY_LINK_ABI_IN_SHARED_LIBRARY AND NOT LIBCXX_CXX_ABI_LIBNAME STREQUAL "none" AND NOT LIBCXX_CXX_ABI_LIBNAME STREQUAL "default" - AND PYTHONINTERP_FOUND + AND Python3_EXECUTABLE AND LIBCXX_ENABLE_SHARED) set(ENABLE_LINKER_SCRIPT_DEFAULT_VALUE ON) endif() @@ -349,7 +371,7 @@ endif() # Warn users that LIBCXX_ENABLE_STATIC_ABI_LIBRARY is an experimental option. if (LIBCXX_ENABLE_STATIC_ABI_LIBRARY) message(WARNING "LIBCXX_ENABLE_STATIC_ABI_LIBRARY is an experimental option") - if (LIBCXX_ENABLE_STATIC AND NOT PYTHONINTERP_FOUND) + if (LIBCXX_ENABLE_STATIC AND NOT Python3_EXECUTABLE) message(FATAL_ERROR "LIBCXX_ENABLE_STATIC_ABI_LIBRARY requires python but it was not found.") endif() endif() @@ -735,6 +757,8 @@ function(cxx_link_system_libraries target) if (LIBCXX_BUILTINS_LIBRARY) target_link_libraries(${target} PRIVATE "${LIBCXX_BUILTINS_LIBRARY}") endif() + elseif (LIBCXX_HAS_GCC_LIB) + target_link_libraries(${target} PRIVATE gcc) elseif (LIBCXX_HAS_GCC_S_LIB) target_link_libraries(${target} PRIVATE gcc_s) endif() diff --git a/libcxx/cmake/Modules/HandleLibCXXABI.cmake b/libcxx/cmake/Modules/HandleLibCXXABI.cmake index a62efc0afa952..973d87584e4dc 100644 --- a/libcxx/cmake/Modules/HandleLibCXXABI.cmake +++ b/libcxx/cmake/Modules/HandleLibCXXABI.cmake @@ -122,11 +122,11 @@ elseif ("${LIBCXX_CXX_ABI_LIBNAME}" STREQUAL "libcxxrt") "cxxrt" "cxxrt" "cxxabi.h;unwind.h;unwind-arm.h;unwind-itanium.h" "" ) elseif ("${LIBCXX_CXX_ABI_LIBNAME}" STREQUAL "vcruntime") - # Nothing TODO + # Nothing to do elseif ("${LIBCXX_CXX_ABI_LIBNAME}" STREQUAL "none") list(APPEND LIBCXX_COMPILE_FLAGS "-D_LIBCPP_BUILDING_HAS_NO_ABI_LIBRARY") elseif ("${LIBCXX_CXX_ABI_LIBNAME}" STREQUAL "default") - # Nothing TODO + # Nothing to do else() message(FATAL_ERROR "Unsupported c++ abi: '${LIBCXX_CXX_ABI_LIBNAME}'. \ diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake index 89e5d17c60f25..cbb4dfd16daca 100644 --- a/libcxx/cmake/config-ix.cmake +++ b/libcxx/cmake/config-ix.cmake @@ -16,7 +16,11 @@ if (NOT LIBCXX_USE_COMPILER_RT) if(WIN32 AND NOT MINGW) set(LIBCXX_HAS_GCC_S_LIB NO) else() - check_library_exists(gcc_s __gcc_personality_v0 "" LIBCXX_HAS_GCC_S_LIB) + if(ANDROID) + check_library_exists(gcc __gcc_personality_v0 "" LIBCXX_HAS_GCC_LIB) + else() + check_library_exists(gcc_s __gcc_personality_v0 "" LIBCXX_HAS_GCC_S_LIB) + endif() endif() endif() @@ -37,6 +41,8 @@ if (LIBCXX_SUPPORTS_NODEFAULTLIBS_FLAG) list(APPEND CMAKE_REQUIRED_FLAGS -rtlib=compiler-rt) find_compiler_rt_library(builtins LIBCXX_BUILTINS_LIBRARY) list(APPEND CMAKE_REQUIRED_LIBRARIES "${LIBCXX_BUILTINS_LIBRARY}") + elseif (LIBCXX_HAS_GCC_LIB) + list(APPEND CMAKE_REQUIRED_LIBRARIES gcc) elseif (LIBCXX_HAS_GCC_S_LIB) list(APPEND CMAKE_REQUIRED_LIBRARIES gcc_s) endif () diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 739e5a4c549f4..937683d9cc123 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -186,7 +186,7 @@ Status ------------------------------------------------- ----------------- ``__cpp_lib_endian`` ``201907L`` ------------------------------------------------- ----------------- - ``__cpp_lib_erase_if`` ``201811L`` + ``__cpp_lib_erase_if`` ``202002L`` ------------------------------------------------- ----------------- ``__cpp_lib_generic_unordered_lookup`` *unimplemented* ------------------------------------------------- ----------------- diff --git a/libcxx/docs/TestingLibcxx.rst b/libcxx/docs/TestingLibcxx.rst index 26dfefccdec8a..78134bca8f84a 100644 --- a/libcxx/docs/TestingLibcxx.rst +++ b/libcxx/docs/TestingLibcxx.rst @@ -188,12 +188,6 @@ default. Run the tests using the given sanitizer. If LLVM_USE_SANITIZER was given when building libc++ then that sanitizer will be used by default. -.. option:: color_diagnostics - - Enable the use of colorized compile diagnostics. If the color_diagnostics - option is specified or the environment variable LIBCXX_COLOR_DIAGNOSTICS is - present then color diagnostics will be enabled. - .. option:: llvm_unwinder Enable the use of LLVM unwinder instead of libgcc. @@ -211,12 +205,6 @@ Environment Variables Specify the site configuration to use when running the tests. Also see `libcxx_site_config`. -.. envvar:: LIBCXX_COLOR_DIAGNOSTICS - - If ``LIBCXX_COLOR_DIAGNOSTICS`` is defined then the test suite will attempt - to use color diagnostic outputs from the compiler. - Also see `color_diagnostics`. - Writing Tests ------------- diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 433bf67fbcb91..250d804f800e6 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -187,7 +187,7 @@ if (LIBCXX_NEEDS_SITE_CONFIG) # Generate a custom __config header. The new header is created # by prepending __config_site to the current __config header. add_custom_command(OUTPUT ${LIBCXX_BINARY_DIR}/__generated_config - COMMAND ${PYTHON_EXECUTABLE} ${LIBCXX_SOURCE_DIR}/utils/cat_files.py + COMMAND ${Python3_EXECUTABLE} ${LIBCXX_SOURCE_DIR}/utils/cat_files.py ${LIBCXX_BINARY_DIR}/__config_site ${LIBCXX_SOURCE_DIR}/include/__config -o ${LIBCXX_BINARY_DIR}/__generated_config diff --git a/libcxx/include/deque b/libcxx/include/deque index 115b1b6427016..c2ea5f2dbe6de 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -150,9 +150,11 @@ template noexcept(noexcept(x.swap(y))); template - void erase(deque& c, const U& value); // C++20 + typename deque::size_type + erase(deque& c, const U& value); // C++20 template - void erase_if(deque& c, Predicate pred); // C++20 + typename deque::size_type + erase_if(deque& c, Predicate pred); // C++20 } // std @@ -3021,14 +3023,20 @@ swap(deque<_Tp, _Allocator>& __x, deque<_Tp, _Allocator>& __y) #if _LIBCPP_STD_VER > 17 template -inline _LIBCPP_INLINE_VISIBILITY -void erase(deque<_Tp, _Allocator>& __c, const _Up& __v) -{ __c.erase(_VSTD::remove(__c.begin(), __c.end(), __v), __c.end()); } +inline _LIBCPP_INLINE_VISIBILITY typename deque<_Tp, _Allocator>::size_type +erase(deque<_Tp, _Allocator>& __c, const _Up& __v) { + auto __old_size = __c.size(); + __c.erase(_VSTD::remove(__c.begin(), __c.end(), __v), __c.end()); + return __old_size - __c.size(); +} template -inline _LIBCPP_INLINE_VISIBILITY -void erase_if(deque<_Tp, _Allocator>& __c, _Predicate __pred) -{ __c.erase(_VSTD::remove_if(__c.begin(), __c.end(), __pred), __c.end()); } +inline _LIBCPP_INLINE_VISIBILITY typename deque<_Tp, _Allocator>::size_type +erase_if(deque<_Tp, _Allocator>& __c, _Predicate __pred) { + auto __old_size = __c.size(); + __c.erase(_VSTD::remove_if(__c.begin(), __c.end(), __pred), __c.end()); + return __old_size - __c.size(); +} #endif diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index 3905745931fde..3bd8db8b7d4d3 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -169,9 +169,11 @@ template noexcept(noexcept(x.swap(y))); template - void erase(forward_list& c, const U& value); // C++20 + typename forward_list::size_type + erase(forward_list& c, const U& value); // C++20 template - void erase_if(forward_list& c, Predicate pred); // C++20 + typename forward_list::size_type + erase_if(forward_list& c, Predicate pred); // C++20 } // std @@ -1765,13 +1767,17 @@ swap(forward_list<_Tp, _Alloc>& __x, forward_list<_Tp, _Alloc>& __y) #if _LIBCPP_STD_VER > 17 template inline _LIBCPP_INLINE_VISIBILITY -void erase_if(forward_list<_Tp, _Allocator>& __c, _Predicate __pred) -{ __c.remove_if(__pred); } + typename forward_list<_Tp, _Allocator>::size_type + erase_if(forward_list<_Tp, _Allocator>& __c, _Predicate __pred) { + return __c.remove_if(__pred); +} template inline _LIBCPP_INLINE_VISIBILITY -void erase(forward_list<_Tp, _Allocator>& __c, const _Up& __v) -{ _VSTD::erase_if(__c, [&](auto& __elem) { return __elem == __v; }); } + typename forward_list<_Tp, _Allocator>::size_type + erase(forward_list<_Tp, _Allocator>& __c, const _Up& __v) { + return _VSTD::erase_if(__c, [&](auto& __elem) { return __elem == __v; }); +} #endif _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/functional b/libcxx/include/functional index 62b7d097be63d..67be9ee86a64e 100644 --- a/libcxx/include/functional +++ b/libcxx/include/functional @@ -3163,15 +3163,19 @@ using unwrap_ref_decay_t = typename unwrap_ref_decay<_Tp>::type; #endif // > C++17 template -inline void __libcpp_erase_if_container( _Container& __c, _Predicate __pred) -{ - for (typename _Container::iterator __iter = __c.begin(), __last = __c.end(); __iter != __last;) - { - if (__pred(*__iter)) - __iter = __c.erase(__iter); - else - ++__iter; - } +inline typename _Container::size_type +__libcpp_erase_if_container(_Container& __c, _Predicate __pred) { + typename _Container::size_type __old_size = __c.size(); + + const typename _Container::iterator __last = __c.end(); + for (typename _Container::iterator __iter = __c.begin(); __iter != __last;) { + if (__pred(*__iter)) + __iter = __c.erase(__iter); + else + ++__iter; + } + + return __old_size - __c.size(); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/list b/libcxx/include/list index ae318ead31dab..55b45f1a67d4f 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -170,9 +170,11 @@ template noexcept(noexcept(x.swap(y))); template - void erase(list& c, const U& value); // C++20 + typename list::size_type + erase(list& c, const U& value); // C++20 template - void erase_if(list& c, Predicate pred); // C++20 + typename list::size_type + erase_if(list& c, Predicate pred); // C++20 } // std @@ -2471,14 +2473,16 @@ swap(list<_Tp, _Alloc>& __x, list<_Tp, _Alloc>& __y) #if _LIBCPP_STD_VER > 17 template -inline _LIBCPP_INLINE_VISIBILITY -void erase_if(list<_Tp, _Allocator>& __c, _Predicate __pred) -{ __c.remove_if(__pred); } +inline _LIBCPP_INLINE_VISIBILITY typename list<_Tp, _Allocator>::size_type +erase_if(list<_Tp, _Allocator>& __c, _Predicate __pred) { + return __c.remove_if(__pred); +} template -inline _LIBCPP_INLINE_VISIBILITY -void erase(list<_Tp, _Allocator>& __c, const _Up& __v) -{ _VSTD::erase_if(__c, [&](auto& __elem) { return __elem == __v; }); } +inline _LIBCPP_INLINE_VISIBILITY typename list<_Tp, _Allocator>::size_type +erase(list<_Tp, _Allocator>& __c, const _Up& __v) { + return _VSTD::erase_if(__c, [&](auto& __elem) { return __elem == __v; }); +} #endif _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/map b/libcxx/include/map index b6f89bf5ee54d..d2b82591368ba 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -254,7 +254,8 @@ swap(map& x, map& y) noexcept(noexcept(x.swap(y))); template - void erase_if(map& c, Predicate pred); // C++20 +typename map::size_type +erase_if(map& c, Predicate pred); // C++20 template , @@ -469,7 +470,8 @@ swap(multimap& x, noexcept(noexcept(x.swap(y))); template - void erase_if(multimap& c, Predicate pred); // C++20 +typename multimap::size_type +erase_if(multimap& c, Predicate pred); // C++20 } // std @@ -1653,10 +1655,13 @@ swap(map<_Key, _Tp, _Compare, _Allocator>& __x, } #if _LIBCPP_STD_VER > 17 -template +template inline _LIBCPP_INLINE_VISIBILITY -void erase_if(map<_Key, _Tp, _Compare, _Allocator>& __c, _Predicate __pred) -{ __libcpp_erase_if_container(__c, __pred); } + typename map<_Key, _Tp, _Compare, _Allocator>::size_type + erase_if(map<_Key, _Tp, _Compare, _Allocator>& __c, _Predicate __pred) { + return __libcpp_erase_if_container(__c, __pred); +} #endif @@ -2235,10 +2240,14 @@ swap(multimap<_Key, _Tp, _Compare, _Allocator>& __x, } #if _LIBCPP_STD_VER > 17 -template +template inline _LIBCPP_INLINE_VISIBILITY -void erase_if(multimap<_Key, _Tp, _Compare, _Allocator>& __c, _Predicate __pred) -{ __libcpp_erase_if_container(__c, __pred); } + typename multimap<_Key, _Tp, _Compare, _Allocator>::size_type + erase_if(multimap<_Key, _Tp, _Compare, _Allocator>& __c, + _Predicate __pred) { + return __libcpp_erase_if_container(__c, __pred); +} #endif _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/regex b/libcxx/include/regex index c917c401293d0..433ba0ffe1fa1 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -5940,6 +5940,9 @@ basic_regex<_CharT, _Traits>::__search( match_results& __m, regex_constants::match_flag_type __flags) const { + if (__flags & regex_constants::match_prev_avail) + __flags &= ~(regex_constants::match_not_bol | regex_constants::match_not_bow); + __m.__init(1 + mark_count(), __first, __last, __flags & regex_constants::__no_update_pos); if (__match_at_start(__first, __last, __m, __flags, diff --git a/libcxx/include/set b/libcxx/include/set index ac3fbbe02fc37..d58455bfe219d 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -216,7 +216,8 @@ swap(set& x, set& y) noexcept(noexcept(x.swap(y))); template - void erase_if(set& c, Predicate pred); // C++20 +typename set::size_type +erase_if(set& c, Predicate pred); // C++20 template , class Allocator = allocator> @@ -417,7 +418,8 @@ swap(multiset& x, multiset& y) noexcept(noexcept(x.swap(y))); template - void erase_if(multiset& c, Predicate pred); // C++20 +typename multiset::size_type +erase_if(multiset& c, Predicate pred); // C++20 } // std @@ -960,8 +962,10 @@ swap(set<_Key, _Compare, _Allocator>& __x, #if _LIBCPP_STD_VER > 17 template inline _LIBCPP_INLINE_VISIBILITY -void erase_if(set<_Key, _Compare, _Allocator>& __c, _Predicate __pred) -{ __libcpp_erase_if_container(__c, __pred); } + typename set<_Key, _Compare, _Allocator>::size_type + erase_if(set<_Key, _Compare, _Allocator>& __c, _Predicate __pred) { + return __libcpp_erase_if_container(__c, __pred); +} #endif template , @@ -1484,8 +1488,10 @@ swap(multiset<_Key, _Compare, _Allocator>& __x, #if _LIBCPP_STD_VER > 17 template inline _LIBCPP_INLINE_VISIBILITY -void erase_if(multiset<_Key, _Compare, _Allocator>& __c, _Predicate __pred) -{ __libcpp_erase_if_container(__c, __pred); } + typename multiset<_Key, _Compare, _Allocator>::size_type + erase_if(multiset<_Key, _Compare, _Allocator>& __c, _Predicate __pred) { + return __libcpp_erase_if_container(__c, __pred); +} #endif _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/string b/libcxx/include/string index 6938ee48b0f82..179080dbb5798 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -437,9 +437,11 @@ basic_istream& getline(basic_istream& is, basic_string& str); template -void erase(basic_string& c, const U& value); // C++20 +typename basic_string::size_type +erase(basic_string& c, const U& value); // C++20 template -void erase_if(basic_string& c, Predicate pred); // C++20 +typename basic_string::size_type +erase_if(basic_string& c, Predicate pred); // C++20 typedef basic_string string; typedef basic_string wstring; @@ -4379,15 +4381,25 @@ getline(basic_istream<_CharT, _Traits>&& __is, #endif // _LIBCPP_CXX03_LANG #if _LIBCPP_STD_VER > 17 -template +template inline _LIBCPP_INLINE_VISIBILITY -void erase(basic_string<_CharT, _Traits, _Allocator>& __str, const _Up& __v) -{ __str.erase(_VSTD::remove(__str.begin(), __str.end(), __v), __str.end()); } + typename basic_string<_CharT, _Traits, _Allocator>::size_type + erase(basic_string<_CharT, _Traits, _Allocator>& __str, const _Up& __v) { + auto __old_size = __str.size(); + __str.erase(_VSTD::remove(__str.begin(), __str.end(), __v), __str.end()); + return __old_size - __str.size(); +} -template +template inline _LIBCPP_INLINE_VISIBILITY -void erase_if(basic_string<_CharT, _Traits, _Allocator>& __str, _Predicate __pred) -{ __str.erase(_VSTD::remove_if(__str.begin(), __str.end(), __pred), __str.end()); } + typename basic_string<_CharT, _Traits, _Allocator>::size_type + erase_if(basic_string<_CharT, _Traits, _Allocator>& __str, + _Predicate __pred) { + auto __old_size = __str.size(); + __str.erase(_VSTD::remove_if(__str.begin(), __str.end(), __pred), + __str.end()); + return __old_size - __str.size(); +} #endif #if _LIBCPP_DEBUG_LEVEL >= 2 diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index ad17f776c9388..6156cfddd7bd5 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -386,10 +386,12 @@ template noexcept(noexcept(x.swap(y))); template - void erase_if(unordered_set& c, Predicate pred); // C++20 + typename unordered_map::size_type + erase_if(unordered_map& c, Predicate pred); // C++20 template - void erase_if(unordered_multiset& c, Predicate pred); // C++20 + typename unordered_multimap::size_type + erase_if(unordered_multimap& c, Predicate pred); // C++20 template bool @@ -1704,10 +1706,14 @@ swap(unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>& __x, } #if _LIBCPP_STD_VER > 17 -template +template inline _LIBCPP_INLINE_VISIBILITY -void erase_if(unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>& __c, _Predicate __pred) -{ __libcpp_erase_if_container(__c, __pred); } + typename unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::size_type + erase_if(unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>& __c, + _Predicate __pred) { + return __libcpp_erase_if_container(__c, __pred); +} #endif template @@ -2402,10 +2408,14 @@ swap(unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>& __x, } #if _LIBCPP_STD_VER > 17 -template +template inline _LIBCPP_INLINE_VISIBILITY -void erase_if(unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>& __c, _Predicate __pred) -{ __libcpp_erase_if_container(__c, __pred); } + typename unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::size_type + erase_if(unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>& __c, + _Predicate __pred) { + return __libcpp_erase_if_container(__c, __pred); +} #endif template diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index 68f777a4ea3eb..6c4ad938006f9 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -341,10 +341,12 @@ template noexcept(noexcept(x.swap(y))); template - void erase_if(unordered_set& c, Predicate pred); // C++20 + typename unordered_set::size_type + erase_if(unordered_set& c, Predicate pred); // C++20 template - void erase_if(unordered_multiset& c, Predicate pred); // C++20 + typename unordered_multiset::size_type + erase_if(unordered_multiset& c, Predicate pred); // C++20 template @@ -1006,10 +1008,14 @@ swap(unordered_set<_Value, _Hash, _Pred, _Alloc>& __x, } #if _LIBCPP_STD_VER > 17 -template +template inline _LIBCPP_INLINE_VISIBILITY -void erase_if(unordered_set<_Value, _Hash, _Pred, _Alloc>& __c, _Predicate __pred) -{ __libcpp_erase_if_container(__c, __pred); } + typename unordered_set<_Value, _Hash, _Pred, _Alloc>::size_type + erase_if(unordered_set<_Value, _Hash, _Pred, _Alloc>& __c, + _Predicate __pred) { + return __libcpp_erase_if_container(__c, __pred); +} #endif template @@ -1637,10 +1643,14 @@ swap(unordered_multiset<_Value, _Hash, _Pred, _Alloc>& __x, } #if _LIBCPP_STD_VER > 17 -template +template inline _LIBCPP_INLINE_VISIBILITY -void erase_if(unordered_multiset<_Value, _Hash, _Pred, _Alloc>& __c, _Predicate __pred) -{ __libcpp_erase_if_container(__c, __pred); } + typename unordered_multiset<_Value, _Hash, _Pred, _Alloc>::size_type + erase_if(unordered_multiset<_Value, _Hash, _Pred, _Alloc>& __c, + _Predicate __pred) { + return __libcpp_erase_if_container(__c, __pred); +} #endif template diff --git a/libcxx/include/vector b/libcxx/include/vector index af96bffb069cf..7d0fec87c4fa5 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -261,9 +261,11 @@ void swap(vector& x, vector& y) noexcept(noexcept(x.swap(y))); template - void erase(vector& c, const U& value); // C++20 +typename vector::size_type +erase(vector& c, const U& value); // C++20 template - void erase_if(vector& c, Predicate pred); // C++20 +typename vector::size_type +erase_if(vector& c, Predicate pred); // C++20 } // std @@ -3389,14 +3391,20 @@ swap(vector<_Tp, _Allocator>& __x, vector<_Tp, _Allocator>& __y) #if _LIBCPP_STD_VER > 17 template -inline _LIBCPP_INLINE_VISIBILITY -void erase(vector<_Tp, _Allocator>& __c, const _Up& __v) -{ __c.erase(_VSTD::remove(__c.begin(), __c.end(), __v), __c.end()); } +inline _LIBCPP_INLINE_VISIBILITY typename vector<_Tp, _Allocator>::size_type +erase(vector<_Tp, _Allocator>& __c, const _Up& __v) { + auto __old_size = __c.size(); + __c.erase(_VSTD::remove(__c.begin(), __c.end(), __v), __c.end()); + return __old_size - __c.size(); +} template -inline _LIBCPP_INLINE_VISIBILITY -void erase_if(vector<_Tp, _Allocator>& __c, _Predicate __pred) -{ __c.erase(_VSTD::remove_if(__c.begin(), __c.end(), __pred), __c.end()); } +inline _LIBCPP_INLINE_VISIBILITY typename vector<_Tp, _Allocator>::size_type +erase_if(vector<_Tp, _Allocator>& __c, _Predicate __pred) { + auto __old_size = __c.size(); + __c.erase(_VSTD::remove_if(__c.begin(), __c.end(), __pred), __c.end()); + return __old_size - __c.size(); +} #endif _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/version b/libcxx/include/version index ab62a853ceb5a..c2e99ccd9a900 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -44,7 +44,7 @@ __cpp_lib_constexpr_swap_algorithms 201806L __cpp_lib_destroying_delete 201806L __cpp_lib_enable_shared_from_this 201603L __cpp_lib_endian 201907L -__cpp_lib_erase_if 201811L +__cpp_lib_erase_if 202002L __cpp_lib_exchange_function 201304L @@ -226,7 +226,7 @@ __cpp_lib_void_t 201411L # define __cpp_lib_destroying_delete 201806L # endif # define __cpp_lib_endian 201907L -# define __cpp_lib_erase_if 201811L +# define __cpp_lib_erase_if 202002L // # define __cpp_lib_generic_unordered_lookup 201811L # define __cpp_lib_interpolate 201902L # if !defined(_LIBCPP_HAS_NO_BUILTIN_IS_CONSTANT_EVALUATED) diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index b55946f6cac38..898ec027f3265 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -283,7 +283,7 @@ if (LIBCXX_ENABLE_STATIC) endif() add_custom_command(TARGET cxx_static POST_BUILD COMMAND - ${PYTHON_EXECUTABLE} ${LIBCXX_SOURCE_DIR}/utils/merge_archives.py + ${Python3_EXECUTABLE} ${LIBCXX_SOURCE_DIR}/utils/merge_archives.py ARGS -o $ --ar "${CMAKE_AR}" diff --git a/libcxx/src/chrono.cpp b/libcxx/src/chrono.cpp index 2d5f172aff7ff..9e8a1b32b41d3 100644 --- a/libcxx/src/chrono.cpp +++ b/libcxx/src/chrono.cpp @@ -25,8 +25,8 @@ #endif #else #if !defined(CLOCK_REALTIME) || !defined(_LIBCPP_USE_CLOCK_GETTIME) -#include // for gettimeofday and timeval -#endif // !defined(CLOCK_REALTIME) +#include // for gettimeofday and timeval +#endif #endif // defined(_LIBCPP_WIN32API) #if !defined(_LIBCPP_HAS_NO_MONOTONIC_CLOCK) diff --git a/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.depr_in_cxx14.verify.cpp b/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.depr_in_cxx14.verify.cpp index 2dc80131c6057..012234d97eac6 100644 --- a/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.depr_in_cxx14.verify.cpp +++ b/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.depr_in_cxx14.verify.cpp @@ -19,7 +19,6 @@ // UNSUPPORTED: clang-4.0 // UNSUPPORTED: c++98, c++03, c++11 -// REQUIRES: verify-support // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE diff --git a/libcxx/test/libcxx/atomics/atomics.align/align.pass.sh.cpp b/libcxx/test/libcxx/atomics/atomics.align/align.pass.sh.cpp index 1b2eebcb7ea84..835475e10a83b 100644 --- a/libcxx/test/libcxx/atomics/atomics.align/align.pass.sh.cpp +++ b/libcxx/test/libcxx/atomics/atomics.align/align.pass.sh.cpp @@ -28,11 +28,14 @@ #include #include -template struct atomic_test : public std::__atomic_base { +template +struct atomic_test : public std::__atomic_base { atomic_test() { - if (this->is_lock_free()) - assert(alignof(this->__a_) >= sizeof(this->__a_) && + if (this->is_lock_free()) { + using AtomicImpl = decltype(this->__a_); + assert(alignof(AtomicImpl) >= sizeof(AtomicImpl) && "expected natural alignment for lock-free type"); + } } }; diff --git a/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp b/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp index 7bc21851b712c..9017471fede7d 100644 --- a/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp +++ b/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp @@ -10,7 +10,7 @@ // when -fdelayed-template-parsing is enabled, like it is on Windows. // XFAIL: LIBCXX-WINDOWS-FIXME -// REQUIRES: verify-support, diagnose-if-support +// REQUIRES: diagnose-if-support // UNSUPPORTED: libcpp-has-no-threads // diff --git a/libcxx/test/libcxx/containers/associative/non_const_comparator.incomplete.verify.cpp b/libcxx/test/libcxx/containers/associative/non_const_comparator.incomplete.verify.cpp index 8e3ec7c635898..e8c02dc8dfeed 100644 --- a/libcxx/test/libcxx/containers/associative/non_const_comparator.incomplete.verify.cpp +++ b/libcxx/test/libcxx/containers/associative/non_const_comparator.incomplete.verify.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support // Test that libc++ does not generate a warning diagnostic about the comparator // too early for containers of incomplete types. diff --git a/libcxx/test/libcxx/containers/associative/non_const_comparator.verify.cpp b/libcxx/test/libcxx/containers/associative/non_const_comparator.verify.cpp index 9a9a1c6d6d12c..b4db1949cbdf3 100644 --- a/libcxx/test/libcxx/containers/associative/non_const_comparator.verify.cpp +++ b/libcxx/test/libcxx/containers/associative/non_const_comparator.verify.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03 -// REQUIRES: diagnose-if-support, verify-support +// REQUIRES: diagnose-if-support // Test that libc++ generates a warning diagnostic when the container is // provided a non-const callable comparator. diff --git a/libcxx/test/libcxx/containers/unord/non_const_comparator.incomplete.verify.cpp b/libcxx/test/libcxx/containers/unord/non_const_comparator.incomplete.verify.cpp index b2525fbf0f039..ff83c01e47df9 100644 --- a/libcxx/test/libcxx/containers/unord/non_const_comparator.incomplete.verify.cpp +++ b/libcxx/test/libcxx/containers/unord/non_const_comparator.incomplete.verify.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support // Test that libc++ does not generate a warning diagnostic about the comparator // or the hasher too early for containers of incomplete types. diff --git a/libcxx/test/libcxx/containers/unord/non_const_comparator.verify.cpp b/libcxx/test/libcxx/containers/unord/non_const_comparator.verify.cpp index c1027adbc8e45..a1440562cbd74 100644 --- a/libcxx/test/libcxx/containers/unord/non_const_comparator.verify.cpp +++ b/libcxx/test/libcxx/containers/unord/non_const_comparator.verify.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03 -// REQUIRES: diagnose-if-support, verify-support +// REQUIRES: diagnose-if-support // Test that libc++ generates a warning diagnostic when the container is // provided a non-const callable comparator or a non-const hasher. diff --git a/libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.depr_in_cxx11.verify.cpp b/libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.depr_in_cxx11.verify.cpp index 31019f3e9fc16..65e995c1a5737 100644 --- a/libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.depr_in_cxx11.verify.cpp +++ b/libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.depr_in_cxx11.verify.cpp @@ -20,7 +20,6 @@ // UNSUPPORTED: clang-4.0 // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR diff --git a/libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.verify.cpp b/libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.verify.cpp index eedcb3e8c95bc..8f848653401a8 100644 --- a/libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.verify.cpp +++ b/libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.verify.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: verify-support // UNSUPPORTED: c++98 || c++03 #include diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/address.depr_in_cxx17.verify.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/address.depr_in_cxx17.verify.cpp index 8b919e4f3b54d..2cef6ab3a7b9f 100644 --- a/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/address.depr_in_cxx17.verify.cpp +++ b/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/address.depr_in_cxx17.verify.cpp @@ -15,7 +15,6 @@ // Deprecated in C++17 // UNSUPPORTED: c++98, c++03, c++11, c++14 -// REQUIRES: verify-support // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.verify.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.verify.cpp index 8c132e5f06c4b..d3f88d21ba0e1 100644 --- a/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.verify.cpp +++ b/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.verify.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support // diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.depr_in_cxx17.verify.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.depr_in_cxx17.verify.cpp index 844bb553e968e..7318876d8e7b1 100644 --- a/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.depr_in_cxx17.verify.cpp +++ b/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.depr_in_cxx17.verify.cpp @@ -14,7 +14,6 @@ // Deprecated in C++17 // UNSUPPORTED: c++98, c++03, c++11, c++14 -// REQUIRES: verify-support // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.depr_in_cxx17.verify.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.depr_in_cxx17.verify.cpp index 73e22feffaa90..4c6eaa85324de 100644 --- a/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.depr_in_cxx17.verify.cpp +++ b/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.depr_in_cxx17.verify.cpp @@ -28,7 +28,6 @@ // Deprecated in C++17 // UNSUPPORTED: c++98, c++03, c++11, c++14 -// REQUIRES: verify-support // Clang 6 does not handle the deprecated attribute on template members properly, // so the rebind check below fails. diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.depr_in_cxx17.verify.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.depr_in_cxx17.verify.cpp index 8133fff3b7ce4..6f692b5cdd393 100644 --- a/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.depr_in_cxx17.verify.cpp +++ b/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.depr_in_cxx17.verify.cpp @@ -22,7 +22,6 @@ // Deprecated in C++17 // UNSUPPORTED: c++98, c++03, c++11, c++14 -// REQUIRES: verify-support // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS diff --git a/libcxx/test/libcxx/depr/depr.function.objects/adaptors.depr_in_cxx11.verify.cpp b/libcxx/test/libcxx/depr/depr.function.objects/adaptors.depr_in_cxx11.verify.cpp index c5f155269a1a7..ee789e86ce863 100644 --- a/libcxx/test/libcxx/depr/depr.function.objects/adaptors.depr_in_cxx11.verify.cpp +++ b/libcxx/test/libcxx/depr/depr.function.objects/adaptors.depr_in_cxx11.verify.cpp @@ -10,7 +10,6 @@ // UNSUPPORTED: clang-4.0 // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS diff --git a/libcxx/test/libcxx/diagnostics/enable_nodiscard.verify.cpp b/libcxx/test/libcxx/diagnostics/enable_nodiscard.verify.cpp index 0888684ec354b..687f170f5b972 100644 --- a/libcxx/test/libcxx/diagnostics/enable_nodiscard.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/enable_nodiscard.verify.cpp @@ -16,8 +16,6 @@ // GCC 7 is the first version to introduce [[nodiscard]] // UNSUPPORTED: gcc-5, gcc-6 -// REQUIRES: verify-support - // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_NODISCARD #include <__config> diff --git a/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_after_cxx17.verify.cpp b/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_after_cxx17.verify.cpp index 1a21b48c009b5..99b917badfe93 100644 --- a/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_after_cxx17.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_after_cxx17.verify.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support // GCC 7 is the first version to introduce [[nodiscard]] // UNSUPPORTED: gcc-5, gcc-6 diff --git a/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_nodiscard_ext.verify.cpp b/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_nodiscard_ext.verify.cpp index 888785e31d94e..f1ae16d20a72e 100644 --- a/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_nodiscard_ext.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_nodiscard_ext.verify.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support // GCC 7 is the first version to introduce [[nodiscard]] // UNSUPPORTED: gcc-5, gcc-6 diff --git a/libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.verify.cpp b/libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.verify.cpp index 39861eddd85ca..ec6711d03a8d9 100644 --- a/libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.verify.cpp @@ -11,7 +11,6 @@ // #define _LIBCPP_NODISCARD_AFTER_CXX17 [[nodiscard]] // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include <__config> diff --git a/libcxx/test/libcxx/diagnostics/nodiscard_extensions.verify.cpp b/libcxx/test/libcxx/diagnostics/nodiscard_extensions.verify.cpp index 6dbeb7f71024a..cad21fc8e8776 100644 --- a/libcxx/test/libcxx/diagnostics/nodiscard_extensions.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/nodiscard_extensions.verify.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support // AppleClang9 doesn't yet support C++17's implicitly synthesized deduction // guides from existing ctors, needed by default_searcher() below. diff --git a/libcxx/test/libcxx/experimental/filesystem/deprecated.verify.cpp b/libcxx/test/libcxx/experimental/filesystem/deprecated.verify.cpp index 4497bc2faf6a3..7ba2e5c7516bf 100644 --- a/libcxx/test/libcxx/experimental/filesystem/deprecated.verify.cpp +++ b/libcxx/test/libcxx/experimental/filesystem/deprecated.verify.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: verify-support // UNSUPPORTED: c++98, c++03 // diff --git a/libcxx/test/libcxx/selftest/dsl/dsl.sh.py b/libcxx/test/libcxx/selftest/dsl/dsl.sh.py new file mode 100644 index 0000000000000..c8f99846c3738 --- /dev/null +++ b/libcxx/test/libcxx/selftest/dsl/dsl.sh.py @@ -0,0 +1,308 @@ +#===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===----------------------------------------------------------------------===## + +# RUN: %{python} %s %S %T %{escaped_exec} \ +# RUN: %{escaped_cxx} \ +# RUN: %{escaped_flags} \ +# RUN: %{escaped_compile_flags} \ +# RUN: %{escaped_link_flags} +# END. + +import base64 +import copy +import os +import platform +import subprocess +import sys +import unittest +from os.path import dirname + +# Allow importing 'lit' and the 'libcxx' module. Make sure we put the lit +# path first so we don't find any system-installed version. +monorepoRoot = dirname(dirname(dirname(dirname(dirname(dirname(__file__)))))) +sys.path = [os.path.join(monorepoRoot, 'libcxx', 'utils'), + os.path.join(monorepoRoot, 'llvm', 'utils', 'lit')] + sys.path +import libcxx.test.dsl as dsl +import lit.LitConfig +import lit.util + +# Steal some parameters from the config running this test so that we can +# bootstrap our own TestingConfig. +SOURCE_ROOT, EXEC_PATH, EXEC, CXX, FLAGS, COMPILE_FLAGS, LINK_FLAGS = sys.argv[1:8] +sys.argv[1:8] = [] + +class SetupConfigs(unittest.TestCase): + """ + Base class for the tests below -- it creates a fake TestingConfig. + """ + def setUp(self): + """ + Create a fake TestingConfig that can be populated however we wish for + the purpose of running unit tests below. We pre-populate it with the + minimum required substitutions. + """ + self.litConfig = lit.LitConfig.LitConfig( + progname='lit', + path=[], + quiet=False, + useValgrind=False, + valgrindLeakCheck=False, + valgrindArgs=[], + noExecute=False, + debug=False, + isWindows=platform.system() == 'Windows', + params={}) + + self.config = lit.TestingConfig.TestingConfig.fromdefaults(self.litConfig) + self.config.test_source_root = SOURCE_ROOT + self.config.test_exec_root = EXEC_PATH + base64Decode = lambda s: lit.util.to_string(base64.b64decode(s)) + self.config.substitutions = [ + ('%{cxx}', base64Decode(CXX)), + ('%{flags}', base64Decode(FLAGS)), + ('%{compile_flags}', base64Decode(COMPILE_FLAGS)), + ('%{link_flags}', base64Decode(LINK_FLAGS)), + ('%{exec}', base64Decode(EXEC)) + ] + + def getSubstitution(self, substitution): + """ + Return a given substitution from the TestingConfig. It is an error if + there is no such substitution. + """ + found = [x for (s, x) in self.config.substitutions if s == substitution] + assert len(found) == 1 + return found[0] + + +class TestHasCompileFlag(SetupConfigs): + """ + Tests for libcxx.test.dsl.hasCompileFlag + """ + def test_no_flag_should_work(self): + self.assertTrue(dsl.hasCompileFlag(self.config, '')) + + def test_flag_exists(self): + self.assertTrue(dsl.hasCompileFlag(self.config, '-O1')) + + def test_nonexistent_flag(self): + self.assertFalse(dsl.hasCompileFlag(self.config, '-this_is_not_a_flag_any_compiler_has')) + + def test_multiple_flags(self): + self.assertTrue(dsl.hasCompileFlag(self.config, '-O1 -Dhello')) + + +class TestHasLocale(SetupConfigs): + """ + Tests for libcxx.test.dsl.hasLocale + """ + def test_doesnt_explode(self): + # It's really hard to test that a system has a given locale, so at least + # make sure we don't explode when we try to check it. + try: + dsl.hasLocale(self.config, 'en_US.UTF-8') + except subprocess.CalledProcessError: + self.fail("checking for hasLocale should not explode") + + def test_nonexistent_locale(self): + self.assertFalse(dsl.hasLocale(self.config, 'for_sure_this_is_not_an_existing_locale')) + + +class TestCompilerMacros(SetupConfigs): + """ + Tests for libcxx.test.dsl.compilerMacros + """ + def test_basic(self): + macros = dsl.compilerMacros(self.config) + self.assertIsInstance(macros, dict) + self.assertGreater(len(macros), 0) + for (k, v) in macros.items(): + self.assertIsInstance(k, str) + self.assertIsInstance(v, str) + + def test_no_flag(self): + macros = dsl.compilerMacros(self.config) + self.assertIn('__cplusplus', macros.keys()) + + def test_empty_flag(self): + macros = dsl.compilerMacros(self.config, '') + self.assertIn('__cplusplus', macros.keys()) + + def test_with_flag(self): + macros = dsl.compilerMacros(self.config, '-DFOO=3') + self.assertIn('__cplusplus', macros.keys()) + self.assertEqual(macros['FOO'], '3') + + def test_with_flags(self): + macros = dsl.compilerMacros(self.config, '-DFOO=3 -DBAR=hello') + self.assertIn('__cplusplus', macros.keys()) + self.assertEqual(macros['FOO'], '3') + self.assertEqual(macros['BAR'], 'hello') + + +class TestFeatureTestMacros(SetupConfigs): + """ + Tests for libcxx.test.dsl.featureTestMacros + """ + def test_basic(self): + macros = dsl.featureTestMacros(self.config) + self.assertIsInstance(macros, dict) + self.assertGreater(len(macros), 0) + for (k, v) in macros.items(): + self.assertIsInstance(k, str) + self.assertIsInstance(v, int) + + +class TestFeature(SetupConfigs): + """ + Tests for libcxx.test.dsl.Feature + """ + def test_trivial(self): + feature = dsl.Feature(name='name') + origSubstitutions = copy.deepcopy(self.config.substitutions) + self.assertTrue(feature.isSupported(self.config)) + feature.enableIn(self.config) + self.assertEqual(origSubstitutions, self.config.substitutions) + self.assertIn('name', self.config.available_features) + + def test_name_can_be_a_callable(self): + feature = dsl.Feature(name=lambda cfg: (self.assertIs(self.config, cfg), 'name')[1]) + assert feature.isSupported(self.config) + feature.enableIn(self.config) + self.assertIn('name', self.config.available_features) + + def test_adding_compile_flag(self): + feature = dsl.Feature(name='name', compileFlag='-foo') + origLinkFlags = copy.deepcopy(self.getSubstitution('%{link_flags}')) + assert feature.isSupported(self.config) + feature.enableIn(self.config) + self.assertIn('name', self.config.available_features) + self.assertIn('-foo', self.getSubstitution('%{compile_flags}')) + self.assertEqual(origLinkFlags, self.getSubstitution('%{link_flags}')) + + def test_compile_flag_can_be_a_callable(self): + feature = dsl.Feature(name='name', + compileFlag=lambda cfg: (self.assertIs(self.config, cfg), '-foo')[1]) + assert feature.isSupported(self.config) + feature.enableIn(self.config) + self.assertIn('-foo', self.getSubstitution('%{compile_flags}')) + + def test_adding_link_flag(self): + feature = dsl.Feature(name='name', linkFlag='-foo') + origCompileFlags = copy.deepcopy(self.getSubstitution('%{compile_flags}')) + assert feature.isSupported(self.config) + feature.enableIn(self.config) + self.assertIn('name', self.config.available_features) + self.assertIn('-foo', self.getSubstitution('%{link_flags}')) + self.assertEqual(origCompileFlags, self.getSubstitution('%{compile_flags}')) + + def test_link_flag_can_be_a_callable(self): + feature = dsl.Feature(name='name', + linkFlag=lambda cfg: (self.assertIs(self.config, cfg), '-foo')[1]) + assert feature.isSupported(self.config) + feature.enableIn(self.config) + self.assertIn('-foo', self.getSubstitution('%{link_flags}')) + + def test_adding_both_flags(self): + feature = dsl.Feature(name='name', compileFlag='-hello', linkFlag='-world') + assert feature.isSupported(self.config) + feature.enableIn(self.config) + self.assertIn('name', self.config.available_features) + + self.assertIn('-hello', self.getSubstitution('%{compile_flags}')) + self.assertNotIn('-world', self.getSubstitution('%{compile_flags}')) + + self.assertIn('-world', self.getSubstitution('%{link_flags}')) + self.assertNotIn('-hello', self.getSubstitution('%{link_flags}')) + + def test_unsupported_feature(self): + feature = dsl.Feature(name='name', when=lambda _: False) + self.assertFalse(feature.isSupported(self.config)) + # Also make sure we assert if we ever try to add it to a config + self.assertRaises(AssertionError, lambda: feature.enableIn(self.config)) + + def test_is_supported_gets_passed_the_config(self): + feature = dsl.Feature(name='name', when=lambda cfg: (self.assertIs(self.config, cfg), True)[1]) + self.assertTrue(feature.isSupported(self.config)) + + +class TestParameter(SetupConfigs): + """ + Tests for libcxx.test.dsl.Parameter + """ + def test_empty_name_should_blow_up(self): + self.assertRaises(ValueError, lambda: dsl.Parameter(name='', choices=['c++03'], type=str, help='', feature=lambda _: None)) + + def test_empty_choices_should_blow_up(self): + self.assertRaises(ValueError, lambda: dsl.Parameter(name='std', choices=[], type=str, help='', feature=lambda _: None)) + + def test_name_is_set_correctly(self): + param = dsl.Parameter(name='std', choices=['c++03'], type=str, help='', feature=lambda _: None) + self.assertEqual(param.name, 'std') + + def test_no_value_provided_on_command_line_and_no_default_value(self): + param = dsl.Parameter(name='std', choices=['c++03'], type=str, help='', feature=lambda _: None) + self.assertRaises(ValueError, lambda: param.getFeature(self.config, self.litConfig.params)) + + def test_no_value_provided_on_command_line_and_default_value(self): + param = dsl.Parameter(name='std', choices=['c++03'], type=str, help='', default='c++03', + feature=lambda std: dsl.Feature(name=std)) + param.getFeature(self.config, self.litConfig.params).enableIn(self.config) + self.assertIn('c++03', self.config.available_features) + + def test_value_provided_on_command_line_and_no_default_value(self): + self.litConfig.params['std'] = 'c++03' + param = dsl.Parameter(name='std', choices=['c++03'], type=str, help='', + feature=lambda std: dsl.Feature(name=std)) + param.getFeature(self.config, self.litConfig.params).enableIn(self.config) + self.assertIn('c++03', self.config.available_features) + + def test_value_provided_on_command_line_and_default_value(self): + self.litConfig.params['std'] = 'c++11' + param = dsl.Parameter(name='std', choices=['c++03', 'c++11'], type=str, default='c++03', help='', + feature=lambda std: dsl.Feature(name=std)) + param.getFeature(self.config, self.litConfig.params).enableIn(self.config) + self.assertIn('c++11', self.config.available_features) + self.assertNotIn('c++03', self.config.available_features) + + def test_feature_is_None(self): + self.litConfig.params['std'] = 'c++03' + param = dsl.Parameter(name='std', choices=['c++03'], type=str, help='', + feature=lambda _: None) + feature = param.getFeature(self.config, self.litConfig.params) + self.assertIsNone(feature) + + def test_boolean_value_parsed_from_trueish_string_parameter(self): + self.litConfig.params['enable_exceptions'] = "True" + param = dsl.Parameter(name='enable_exceptions', choices=[True, False], type=bool, help='', + feature=lambda exceptions: None if exceptions else ValueError()) + self.assertIsNone(param.getFeature(self.config, self.litConfig.params)) + + def test_boolean_value_from_true_boolean_parameter(self): + self.litConfig.params['enable_exceptions'] = True + param = dsl.Parameter(name='enable_exceptions', choices=[True, False], type=bool, help='', + feature=lambda exceptions: None if exceptions else ValueError()) + self.assertIsNone(param.getFeature(self.config, self.litConfig.params)) + + def test_boolean_value_parsed_from_falseish_string_parameter(self): + self.litConfig.params['enable_exceptions'] = "False" + param = dsl.Parameter(name='enable_exceptions', choices=[True, False], type=bool, help='', + feature=lambda exceptions: None if exceptions else dsl.Feature(name="-fno-exceptions")) + param.getFeature(self.config, self.litConfig.params).enableIn(self.config) + self.assertIn('-fno-exceptions', self.config.available_features) + + def test_boolean_value_from_false_boolean_parameter(self): + self.litConfig.params['enable_exceptions'] = False + param = dsl.Parameter(name='enable_exceptions', choices=[True, False], type=bool, help='', + feature=lambda exceptions: None if exceptions else dsl.Feature(name="-fno-exceptions")) + param.getFeature(self.config, self.litConfig.params).enableIn(self.config) + self.assertIn('-fno-exceptions', self.config.available_features) + + +if __name__ == '__main__': + unittest.main(verbosity=2) diff --git a/libcxx/test/libcxx/selftest/dsl/lit.local.cfg b/libcxx/test/libcxx/selftest/dsl/lit.local.cfg new file mode 100644 index 0000000000000..8cf3543690f48 --- /dev/null +++ b/libcxx/test/libcxx/selftest/dsl/lit.local.cfg @@ -0,0 +1,21 @@ +# Since we try to pass substitutions as-is to some tests, we must "escape" +# them in case they contain other substitutions. Otherwise, the substitutions +# will be fully expanded when passed to the tests. For example, we want an +# %{exec} substitution that contains `--dependencies %{file_dependencies}` +# to be passed as-is, without substituting the file dependencies. This way, +# the test itself can use populate %{file_dependencies} as it sees fit, and +# %{exec} will respect it. +# +# To solve this problem, we add base64 encoded versions of substitutions just +# in this directory. We then base64-decode them from the tests when we need to. +# Another option would be to have a way to prevent expansion in Lit itself. +import base64 +import lit.util +base64Encode = lambda s: lit.util.to_string(base64.b64encode(lit.util.to_bytes(s))) +escaped = [(k.replace('%{', '%{escaped_'), base64Encode(v)) for (k, v) in config.substitutions] +config.substitutions.extend(escaped) + +# The tests in this directory need to run Python +import pipes +import sys +config.substitutions.append(('%{python}', pipes.quote(sys.executable))) diff --git a/libcxx/test/libcxx/selftest/newformat/convenience_substitutions/verify.sh.cpp b/libcxx/test/libcxx/selftest/newformat/convenience_substitutions/verify.sh.cpp deleted file mode 100644 index 664d25826a8dc..0000000000000 --- a/libcxx/test/libcxx/selftest/newformat/convenience_substitutions/verify.sh.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Make sure that we provide the %{verify} substitution. We can only test -// this when the verify-support feature is enabled, and it's difficult to -// check that it's enabled when it should be, so we just trust that it is. - -// REQUIRES: verify-support -// RUN: test -n "%{verify}" - -// RUN: %{cxx} %s %{flags} %{compile_flags} -fsyntax-only %{verify} - -// expected-no-diagnostics diff --git a/libcxx/test/libcxx/selftest/newformat/fail.cpp/lit.local.cfg b/libcxx/test/libcxx/selftest/newformat/fail.cpp/lit.local.cfg new file mode 100644 index 0000000000000..2cd7606f4c4e3 --- /dev/null +++ b/libcxx/test/libcxx/selftest/newformat/fail.cpp/lit.local.cfg @@ -0,0 +1,6 @@ +import libcxx.test.newformat + +# The tests in this directory need to know whether Clang-verify is supported +# to work properly. +if libcxx.test.newformat._supportsVerify(config): + config.available_features.add('verify-support') diff --git a/libcxx/test/libcxx/selftest/newformat/pass.cpp/werror.pass.cpp b/libcxx/test/libcxx/selftest/newformat/pass.cpp/werror.pass.cpp new file mode 100644 index 0000000000000..31b7453164ea9 --- /dev/null +++ b/libcxx/test/libcxx/selftest/newformat/pass.cpp/werror.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// XFAIL: * + +// Make sure that even a simple unused variable warning is treated as an +// error in the test suite. This is to make sure the test suite always runs +// with -Werror. + +// ADDITIONAL_COMPILE_FLAGS: -Wunused-variable + +// TODO: We don't enable -Werror on GCC right now, because too many tests fail. +// UNSUPPORTED: gcc + +int main() { + int foo; +} diff --git a/libcxx/test/libcxx/selftest/newformat/sh.cpp/werror.sh.cpp b/libcxx/test/libcxx/selftest/newformat/sh.cpp/werror.sh.cpp new file mode 100644 index 0000000000000..5b84275bf6871 --- /dev/null +++ b/libcxx/test/libcxx/selftest/newformat/sh.cpp/werror.sh.cpp @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// XFAIL: * + +// Make sure that even a simple unused variable warning is treated as an +// error in the test suite, including in .sh.cpp tests. + +// TODO: We don't enable -Werror on GCC right now, because too many tests fail. +// UNSUPPORTED: gcc + +// FILE_DEPENDENCIES: %t.exe +// RUN: %{build} -Wunused-variable +// RUN: %{run} + +int main() { + int foo; +} diff --git a/libcxx/test/libcxx/selftest/newformat/verify.cpp/no-diagnostics-unmarked.verify.cpp b/libcxx/test/libcxx/selftest/newformat/verify.cpp/no-diagnostics-unmarked.verify.cpp index e0ca921f34389..18ff670653822 100644 --- a/libcxx/test/libcxx/selftest/newformat/verify.cpp/no-diagnostics-unmarked.verify.cpp +++ b/libcxx/test/libcxx/selftest/newformat/verify.cpp/no-diagnostics-unmarked.verify.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: verify-support - // XFAIL: * // Make sure the test DOES NOT pass if there are no diagnostics, but we didn't diff --git a/libcxx/test/libcxx/selftest/newformat/verify.cpp/no-diagnostics.verify.cpp b/libcxx/test/libcxx/selftest/newformat/verify.cpp/no-diagnostics.verify.cpp index 1f6124cf8dd95..d622c786ee371 100644 --- a/libcxx/test/libcxx/selftest/newformat/verify.cpp/no-diagnostics.verify.cpp +++ b/libcxx/test/libcxx/selftest/newformat/verify.cpp/no-diagnostics.verify.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: verify-support - // Make sure the test passes if we expected no diagnostics // expected-no-diagnostics diff --git a/libcxx/test/libcxx/selftest/newformat/verify.cpp/no-werror.verify.cpp b/libcxx/test/libcxx/selftest/newformat/verify.cpp/no-werror.verify.cpp new file mode 100644 index 0000000000000..4b89be558525e --- /dev/null +++ b/libcxx/test/libcxx/selftest/newformat/verify.cpp/no-werror.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Make sure clang-verify tests distinguish warnings from errors. + +// ADDITIONAL_COMPILE_FLAGS: -Wunused-variable + +int main() { + int foo; // expected-warning {{unused variable}} +} diff --git a/libcxx/test/libcxx/selftest/newformat/verify.cpp/right-diagnostic.verify.cpp b/libcxx/test/libcxx/selftest/newformat/verify.cpp/right-diagnostic.verify.cpp index d3afd9745761c..8bca568707dc4 100644 --- a/libcxx/test/libcxx/selftest/newformat/verify.cpp/right-diagnostic.verify.cpp +++ b/libcxx/test/libcxx/selftest/newformat/verify.cpp/right-diagnostic.verify.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: verify-support - // Make sure the test passes if the expected diagnostic is correct. struct Foo { }; diff --git a/libcxx/test/libcxx/selftest/newformat/verify.cpp/wrong-diagnostic.verify.cpp b/libcxx/test/libcxx/selftest/newformat/verify.cpp/wrong-diagnostic.verify.cpp index 02f9353cf18ac..3bc2ba841ad9a 100644 --- a/libcxx/test/libcxx/selftest/newformat/verify.cpp/wrong-diagnostic.verify.cpp +++ b/libcxx/test/libcxx/selftest/newformat/verify.cpp/wrong-diagnostic.verify.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: verify-support - // XFAIL: * // Make sure the test DOES NOT pass if the expected diagnostic is wrong. diff --git a/libcxx/test/libcxx/thread/atomic.availability.verify.cpp b/libcxx/test/libcxx/thread/atomic.availability.verify.cpp index ad99a870a048a..3595fa0f5fe67 100644 --- a/libcxx/test/libcxx/thread/atomic.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/atomic.availability.verify.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03, c++11 -// REQUIRES: verify-support // REQUIRES: with_system_cxx_lib=macosx // REQUIRES: availability=macosx10.9 || availability=macosx10.10 || availability=macosx10.11 || availability=macosx10.12 || availability=macosx10.13 || availability=macosx10.14 || availability=macosx10.15 diff --git a/libcxx/test/libcxx/thread/barrier.availability.verify.cpp b/libcxx/test/libcxx/thread/barrier.availability.verify.cpp index e796e109a113c..3b07fa81b00a7 100644 --- a/libcxx/test/libcxx/thread/barrier.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/barrier.availability.verify.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03, c++11 -// REQUIRES: verify-support // REQUIRES: with_system_cxx_lib=macosx // REQUIRES: availability=macosx10.9 || availability=macosx10.10 || availability=macosx10.11 || availability=macosx10.12 || availability=macosx10.13 || availability=macosx10.14 || availability=macosx10.15 diff --git a/libcxx/test/libcxx/thread/latch.availability.verify.cpp b/libcxx/test/libcxx/thread/latch.availability.verify.cpp index 12eb7b3dbfb48..b1b49ebe6fd39 100644 --- a/libcxx/test/libcxx/thread/latch.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/latch.availability.verify.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03, c++11 -// REQUIRES: verify-support // REQUIRES: with_system_cxx_lib=macosx // REQUIRES: availability=macosx10.9 || availability=macosx10.10 || availability=macosx10.11 || availability=macosx10.12 || availability=macosx10.13 || availability=macosx10.14 || availability=macosx10.15 diff --git a/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp b/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp index 534569f0d5978..9325f505b7ab8 100644 --- a/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03, c++11 -// REQUIRES: verify-support // REQUIRES: with_system_cxx_lib=macosx // REQUIRES: availability=macosx10.9 || availability=macosx10.10 || availability=macosx10.11 || availability=macosx10.12 || availability=macosx10.13 || availability=macosx10.14 || availability=macosx10.15 diff --git a/libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.verify.cpp b/libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.verify.cpp index f848d5ed6223a..7028d561e4832 100644 --- a/libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.verify.cpp +++ b/libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.verify.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: libcpp-has-no-threads -// REQUIRES: verify-support // [[nodiscard]] on constructors isn't supported by all compilers // UNSUPPORTED: clang-6, clang-7, clang-8, clang-9 diff --git a/libcxx/test/libcxx/utilities/function.objects/func.wrap/depr_in_cxx03.verify.cpp b/libcxx/test/libcxx/utilities/function.objects/func.wrap/depr_in_cxx03.verify.cpp index f8a162ce1beef..544a43e656b1c 100644 --- a/libcxx/test/libcxx/utilities/function.objects/func.wrap/depr_in_cxx03.verify.cpp +++ b/libcxx/test/libcxx/utilities/function.objects/func.wrap/depr_in_cxx03.verify.cpp @@ -12,7 +12,6 @@ // Check that libc++'s emulation of std::function is deprecated in C++03 // REQUIRES: c++98 || c++03 -// REQUIRES: verify-support #include #include "test_macros.h" diff --git a/libcxx/test/std/containers/associative/map/PR28469_undefined_behavior_segfault.sh.cpp b/libcxx/test/std/containers/associative/map/PR28469_undefined_behavior_segfault.sh.cpp index 83aa36dff3ddb..b63fdf455f227 100644 --- a/libcxx/test/std/containers/associative/map/PR28469_undefined_behavior_segfault.sh.cpp +++ b/libcxx/test/std/containers/associative/map/PR28469_undefined_behavior_segfault.sh.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// UNSUPPORTED: c++98, c++03 + // FILE_DEPENDENCIES: %t.exe // RUN: %{build} -O2 // RUN: %{run} diff --git a/libcxx/test/std/containers/associative/map/map.access/empty.verify.cpp b/libcxx/test/std/containers/associative/map/map.access/empty.verify.cpp index 2ef8cdf17d0d5..07d5654b446e2 100644 --- a/libcxx/test/std/containers/associative/map/map.access/empty.verify.cpp +++ b/libcxx/test/std/containers/associative/map/map.access/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/associative/map/map.cons/default_recursive.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/default_recursive.pass.cpp index 610ca109ef5ac..8b95aefc55149 100644 --- a/libcxx/test/std/containers/associative/map/map.cons/default_recursive.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.cons/default_recursive.pass.cpp @@ -21,12 +21,11 @@ struct X std::map m; std::map::iterator i; std::map::const_iterator ci; +#if TEST_STD_VER <= 17 + // These reverse_iterator specializations require X to be complete in C++20. std::map::reverse_iterator ri; std::map::const_reverse_iterator cri; +#endif // TEST_STD_VER <= 17 }; -int main(int, char**) -{ - - return 0; -} +int main(int, char**) { return 0; } diff --git a/libcxx/test/std/containers/associative/map/map.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/associative/map/map.erasure/erase_if.pass.cpp index af2d35c66ec0d..c890936ebd708 100644 --- a/libcxx/test/std/containers/associative/map/map.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.erasure/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(map& c, Predicate pred); +// typename map::size_type +// erase_if(map& c, Predicate pred); #include @@ -29,13 +30,11 @@ M make (Init vals) } template -void -test0(Init vals, Pred p, Init expected) -{ - M s = make (vals); - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - assert(s == make(expected)); +void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { + M s = make(vals); + ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + assert(s == make(expected)); } template @@ -48,22 +47,22 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0({}, is1, {}); + test0({}, is1, {}, 0); - test0({1}, is1, {}); - test0({1}, is2, {1}); + test0({1}, is1, {}, 1); + test0({1}, is2, {1}, 0); - test0({1,2}, is1, {2}); - test0({1,2}, is2, {1}); - test0({1,2}, is3, {1,2}); + test0({1, 2}, is1, {2}, 1); + test0({1, 2}, is2, {1}, 1); + test0({1, 2}, is3, {1, 2}, 0); - test0({1,2,3}, is1, {2,3}); - test0({1,2,3}, is2, {1,3}); - test0({1,2,3}, is3, {1,2}); - test0({1,2,3}, is4, {1,2,3}); + test0({1, 2, 3}, is1, {2, 3}, 1); + test0({1, 2, 3}, is2, {1, 3}, 1); + test0({1, 2, 3}, is3, {1, 2}, 1); + test0({1, 2, 3}, is4, {1, 2, 3}, 0); - test0({1,2,3}, True, {}); - test0({1,2,3}, False, {1,2,3}); + test0({1, 2, 3}, True, {}, 3); + test0({1, 2, 3}, False, {1, 2, 3}, 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/associative/multimap/empty.verify.cpp b/libcxx/test/std/containers/associative/multimap/empty.verify.cpp index 644d1ab97a538..5970528f7e2b5 100644 --- a/libcxx/test/std/containers/associative/multimap/empty.verify.cpp +++ b/libcxx/test/std/containers/associative/multimap/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/default_recursive.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/default_recursive.pass.cpp index e468ebf023b1a..94003f350eba6 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.cons/default_recursive.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/default_recursive.pass.cpp @@ -21,12 +21,11 @@ struct X std::multimap m; std::multimap::iterator i; std::multimap::const_iterator ci; +#if TEST_STD_VER <= 17 + // These reverse_iterator specializations require X to be complete in C++20. std::multimap::reverse_iterator ri; std::multimap::const_reverse_iterator cri; +#endif // TEST_STD_VER <= 17 }; -int main(int, char**) -{ - - return 0; -} +int main(int, char**) { return 0; } diff --git a/libcxx/test/std/containers/associative/multimap/multimap.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.erasure/erase_if.pass.cpp index 6f2d56a9730dc..d573c2ef3f51f 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.erasure/erase_if.pass.cpp @@ -9,8 +9,9 @@ // -// template -// void erase_if(multimap& c, Predicate pred); +// template +// typename multimap::size_type +// erase_if(multimap& c, Predicate pred); #include @@ -29,13 +30,11 @@ M make (Init vals) } template -void -test0(Init vals, Pred p, Init expected) -{ - M s = make (vals); - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - assert(s == make(expected)); +void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { + M s = make(vals); + ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + assert(s == make(expected)); } template @@ -48,33 +47,33 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0({}, is1, {}); - - test0({1}, is1, {}); - test0({1}, is2, {1}); - - test0({1,2}, is1, {2}); - test0({1,2}, is2, {1}); - test0({1,2}, is3, {1,2}); - test0({1,1}, is1, {}); - test0({1,1}, is3, {1,1}); - - test0({1,2,3}, is1, {2,3}); - test0({1,2,3}, is2, {1,3}); - test0({1,2,3}, is3, {1,2}); - test0({1,2,3}, is4, {1,2,3}); - - test0({1,1,1}, is1, {}); - test0({1,1,1}, is2, {1,1,1}); - test0({1,1,2}, is1, {2}); - test0({1,1,2}, is2, {1,1}); - test0({1,1,2}, is3, {1,1,2}); - test0({1,2,2}, is1, {2,2}); - test0({1,2,2}, is2, {1}); - test0({1,2,2}, is3, {1,2,2}); - - test0({1,2,3}, True, {}); - test0({1,2,3}, False, {1,2,3}); + test0({}, is1, {}, 0); + + test0({1}, is1, {}, 1); + test0({1}, is2, {1}, 0); + + test0({1, 2}, is1, {2}, 1); + test0({1, 2}, is2, {1}, 1); + test0({1, 2}, is3, {1, 2}, 0); + test0({1, 1}, is1, {}, 2); + test0({1, 1}, is3, {1, 1}, 0); + + test0({1, 2, 3}, is1, {2, 3}, 1); + test0({1, 2, 3}, is2, {1, 3}, 1); + test0({1, 2, 3}, is3, {1, 2}, 1); + test0({1, 2, 3}, is4, {1, 2, 3}, 0); + + test0({1, 1, 1}, is1, {}, 3); + test0({1, 1, 1}, is2, {1, 1, 1}, 0); + test0({1, 1, 2}, is1, {2}, 2); + test0({1, 1, 2}, is2, {1, 1}, 1); + test0({1, 1, 2}, is3, {1, 1, 2}, 0); + test0({1, 2, 2}, is1, {2, 2}, 1); + test0({1, 2, 2}, is2, {1}, 2); + test0({1, 2, 2}, is3, {1, 2, 2}, 0); + + test0({1, 2, 3}, True, {}, 3); + test0({1, 2, 3}, False, {1, 2, 3}, 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/associative/multiset/empty.verify.cpp b/libcxx/test/std/containers/associative/multiset/empty.verify.cpp index 5b1fc9ba6d4b2..86c8b7c82a1e7 100644 --- a/libcxx/test/std/containers/associative/multiset/empty.verify.cpp +++ b/libcxx/test/std/containers/associative/multiset/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/associative/multiset/multiset.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.erasure/erase_if.pass.cpp index 1591fa1f2f813..73b451a1314f9 100644 --- a/libcxx/test/std/containers/associative/multiset/multiset.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/associative/multiset/multiset.erasure/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(multiset& c, Predicate pred); +// typename multiset::size_type +// erase_if(multiset& c, Predicate pred); #include @@ -19,12 +20,10 @@ #include "min_allocator.h" template -void -test0(S s, Pred p, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - assert(s == expected); +void test0(S s, Pred p, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + assert(s == expected); } template @@ -37,33 +36,33 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0(S(), is1, S()); + test0(S(), is1, S(), 0); - test0(S({1}), is1, S()); - test0(S({1}), is2, S({1})); + test0(S({1}), is1, S(), 1); + test0(S({1}), is2, S({1}), 0); - test0(S({1,2}), is1, S({2})); - test0(S({1,2}), is2, S({1})); - test0(S({1,2}), is3, S({1,2})); - test0(S({1,1}), is1, S()); - test0(S({1,1}), is3, S({1,1})); + test0(S({1, 2}), is1, S({2}), 1); + test0(S({1, 2}), is2, S({1}), 1); + test0(S({1, 2}), is3, S({1, 2}), 0); + test0(S({1, 1}), is1, S(), 2); + test0(S({1, 1}), is3, S({1, 1}), 0); - test0(S({1,2,3}), is1, S({2,3})); - test0(S({1,2,3}), is2, S({1,3})); - test0(S({1,2,3}), is3, S({1,2})); - test0(S({1,2,3}), is4, S({1,2,3})); + test0(S({1, 2, 3}), is1, S({2, 3}), 1); + test0(S({1, 2, 3}), is2, S({1, 3}), 1); + test0(S({1, 2, 3}), is3, S({1, 2}), 1); + test0(S({1, 2, 3}), is4, S({1, 2, 3}), 0); - test0(S({1,1,1}), is1, S()); - test0(S({1,1,1}), is2, S({1,1,1})); - test0(S({1,1,2}), is1, S({2})); - test0(S({1,1,2}), is2, S({1,1})); - test0(S({1,1,2}), is3, S({1,1,2})); - test0(S({1,2,2}), is1, S({2,2})); - test0(S({1,2,2}), is2, S({1})); - test0(S({1,2,2}), is3, S({1,2,2})); + test0(S({1, 1, 1}), is1, S(), 3); + test0(S({1, 1, 1}), is2, S({1, 1, 1}), 0); + test0(S({1, 1, 2}), is1, S({2}), 2); + test0(S({1, 1, 2}), is2, S({1, 1}), 1); + test0(S({1, 1, 2}), is3, S({1, 1, 2}), 0); + test0(S({1, 2, 2}), is1, S({2, 2}), 1); + test0(S({1, 2, 2}), is2, S({1}), 2); + test0(S({1, 2, 2}), is3, S({1, 2, 2}), 0); - test0(S({1,2,3}), True, S()); - test0(S({1,2,3}), False, S({1,2,3})); + test0(S({1, 2, 3}), True, S(), 3); + test0(S({1, 2, 3}), False, S({1, 2, 3}), 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/associative/set/empty.verify.cpp b/libcxx/test/std/containers/associative/set/empty.verify.cpp index 33bf12cdae5ff..d1a1daeba4904 100644 --- a/libcxx/test/std/containers/associative/set/empty.verify.cpp +++ b/libcxx/test/std/containers/associative/set/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/associative/set/set.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/associative/set/set.erasure/erase_if.pass.cpp index 519db6d6f4a1c..933fc4348c0f9 100644 --- a/libcxx/test/std/containers/associative/set/set.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/associative/set/set.erasure/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(set& c, Predicate pred); +// typename set::size_type +// erase_if(set& c, Predicate pred); #include @@ -19,12 +20,10 @@ #include "min_allocator.h" template -void -test0(S s, Pred p, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - assert(s == expected); +void test0(S s, Pred p, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + assert(s == expected); } template @@ -37,22 +36,22 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0(S(), is1, S()); + test0(S(), is1, S(), 0); - test0(S({1}), is1, S()); - test0(S({1}), is2, S({1})); + test0(S({1}), is1, S(), 1); + test0(S({1}), is2, S({1}), 0); - test0(S({1,2}), is1, S({2})); - test0(S({1,2}), is2, S({1})); - test0(S({1,2}), is3, S({1,2})); + test0(S({1, 2}), is1, S({2}), 1); + test0(S({1, 2}), is2, S({1}), 1); + test0(S({1, 2}), is3, S({1, 2}), 0); - test0(S({1,2,3}), is1, S({2,3})); - test0(S({1,2,3}), is2, S({1,3})); - test0(S({1,2,3}), is3, S({1,2})); - test0(S({1,2,3}), is4, S({1,2,3})); + test0(S({1, 2, 3}), is1, S({2, 3}), 1); + test0(S({1, 2, 3}), is2, S({1, 3}), 1); + test0(S({1, 2, 3}), is3, S({1, 2}), 1); + test0(S({1, 2, 3}), is4, S({1, 2, 3}), 0); - test0(S({1,2,3}), True, S()); - test0(S({1,2,3}), False, S({1,2,3})); + test0(S({1, 2, 3}), True, S(), 3); + test0(S({1, 2, 3}), False, S({1, 2, 3}), 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.verify.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.verify.cpp index 1c00755cbc2d5..85df85ccd7a25 100644 --- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.verify.cpp +++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.verify.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.verify.cpp index e094421f7c97a..d9119a5c3519e 100644 --- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.verify.cpp +++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.verify.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.verify.cpp index e419b0d7dee6c..4f43b5eee87e3 100644 --- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.verify.cpp +++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/sequences/array/empty.verify.cpp b/libcxx/test/std/containers/sequences/array/empty.verify.cpp index 96f8a2be5b2db..cf1fe5f1c0d2a 100644 --- a/libcxx/test/std/containers/sequences/array/empty.verify.cpp +++ b/libcxx/test/std/containers/sequences/array/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/sequences/deque/deque.capacity/empty.verify.cpp b/libcxx/test/std/containers/sequences/deque/deque.capacity/empty.verify.cpp index 793baf9b78c05..699fd93aa43b2 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.capacity/empty.verify.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.capacity/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/sequences/deque/deque.erasure/erase.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.erasure/erase.pass.cpp index 2394c26dcbe03..e10d06174fe20 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.erasure/erase.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.erasure/erase.pass.cpp @@ -10,8 +10,8 @@ // // template -// void erase(deque& c, const U& value); - +// typename deque::size_type +// erase(deque& c, const U& value); #include #include @@ -21,49 +21,46 @@ #include "min_allocator.h" template -void -test0(S s, U val, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase(s, val))); - std::erase(s, val); - assert(s == expected); +void test0(S s, U val, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase(s, val))); + assert(expected_erased_count == std::erase(s, val)); + assert(s == expected); } template void test() { - - test0(S(), 1, S()); - - test0(S({1}), 1, S()); - test0(S({1}), 2, S({1})); - - test0(S({1,2}), 1, S({2})); - test0(S({1,2}), 2, S({1})); - test0(S({1,2}), 3, S({1,2})); - test0(S({1,1}), 1, S()); - test0(S({1,1}), 3, S({1,1})); - - test0(S({1,2,3}), 1, S({2,3})); - test0(S({1,2,3}), 2, S({1,3})); - test0(S({1,2,3}), 3, S({1,2})); - test0(S({1,2,3}), 4, S({1,2,3})); - - test0(S({1,1,1}), 1, S()); - test0(S({1,1,1}), 2, S({1,1,1})); - test0(S({1,1,2}), 1, S({2})); - test0(S({1,1,2}), 2, S({1,1})); - test0(S({1,1,2}), 3, S({1,1,2})); - test0(S({1,2,2}), 1, S({2,2})); - test0(S({1,2,2}), 2, S({1})); - test0(S({1,2,2}), 3, S({1,2,2})); - -// Test cross-type erasure - using opt = std::optional; - test0(S({1,2,1}), opt(), S({1,2,1})); - test0(S({1,2,1}), opt(1), S({2})); - test0(S({1,2,1}), opt(2), S({1,1})); - test0(S({1,2,1}), opt(3), S({1,2,1})); + test0(S(), 1, S(), 0); + + test0(S({1}), 1, S(), 1); + test0(S({1}), 2, S({1}), 0); + + test0(S({1, 2}), 1, S({2}), 1); + test0(S({1, 2}), 2, S({1}), 1); + test0(S({1, 2}), 3, S({1, 2}), 0); + test0(S({1, 1}), 1, S(), 2); + test0(S({1, 1}), 3, S({1, 1}), 0); + + test0(S({1, 2, 3}), 1, S({2, 3}), 1); + test0(S({1, 2, 3}), 2, S({1, 3}), 1); + test0(S({1, 2, 3}), 3, S({1, 2}), 1); + test0(S({1, 2, 3}), 4, S({1, 2, 3}), 0); + + test0(S({1, 1, 1}), 1, S(), 3); + test0(S({1, 1, 1}), 2, S({1, 1, 1}), 0); + test0(S({1, 1, 2}), 1, S({2}), 2); + test0(S({1, 1, 2}), 2, S({1, 1}), 1); + test0(S({1, 1, 2}), 3, S({1, 1, 2}), 0); + test0(S({1, 2, 2}), 1, S({2, 2}), 1); + test0(S({1, 2, 2}), 2, S({1}), 2); + test0(S({1, 2, 2}), 3, S({1, 2, 2}), 0); + + // Test cross-type erasure + using opt = std::optional; + test0(S({1, 2, 1}), opt(), S({1, 2, 1}), 0); + test0(S({1, 2, 1}), opt(1), S({2}), 2); + test0(S({1, 2, 1}), opt(2), S({1, 1}), 1); + test0(S({1, 2, 1}), opt(3), S({1, 2, 1}), 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/deque/deque.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.erasure/erase_if.pass.cpp index cb54e4e443f55..86e4c68dcfb9c 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.erasure/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(deque& c, Predicate pred); +// typename deque::size_type +// erase_if(deque& c, Predicate pred); #include @@ -19,12 +20,10 @@ #include "min_allocator.h" template -void -test0(S s, Pred p, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - assert(s == expected); +void test0(S s, Pred p, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + assert(s == expected); } template @@ -37,33 +36,33 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0(S(), is1, S()); + test0(S(), is1, S(), 0); - test0(S({1}), is1, S()); - test0(S({1}), is2, S({1})); + test0(S({1}), is1, S(), 1); + test0(S({1}), is2, S({1}), 0); - test0(S({1,2}), is1, S({2})); - test0(S({1,2}), is2, S({1})); - test0(S({1,2}), is3, S({1,2})); - test0(S({1,1}), is1, S()); - test0(S({1,1}), is3, S({1,1})); + test0(S({1, 2}), is1, S({2}), 1); + test0(S({1, 2}), is2, S({1}), 1); + test0(S({1, 2}), is3, S({1, 2}), 0); + test0(S({1, 1}), is1, S(), 2); + test0(S({1, 1}), is3, S({1, 1}), 0); - test0(S({1,2,3}), is1, S({2,3})); - test0(S({1,2,3}), is2, S({1,3})); - test0(S({1,2,3}), is3, S({1,2})); - test0(S({1,2,3}), is4, S({1,2,3})); + test0(S({1, 2, 3}), is1, S({2, 3}), 1); + test0(S({1, 2, 3}), is2, S({1, 3}), 1); + test0(S({1, 2, 3}), is3, S({1, 2}), 1); + test0(S({1, 2, 3}), is4, S({1, 2, 3}), 0); - test0(S({1,1,1}), is1, S()); - test0(S({1,1,1}), is2, S({1,1,1})); - test0(S({1,1,2}), is1, S({2})); - test0(S({1,1,2}), is2, S({1,1})); - test0(S({1,1,2}), is3, S({1,1,2})); - test0(S({1,2,2}), is1, S({2,2})); - test0(S({1,2,2}), is2, S({1})); - test0(S({1,2,2}), is3, S({1,2,2})); + test0(S({1, 1, 1}), is1, S(), 3); + test0(S({1, 1, 1}), is2, S({1, 1, 1}), 0); + test0(S({1, 1, 2}), is1, S({2}), 2); + test0(S({1, 1, 2}), is2, S({1, 1}), 1); + test0(S({1, 1, 2}), is3, S({1, 1, 2}), 0); + test0(S({1, 2, 2}), is1, S({2, 2}), 1); + test0(S({1, 2, 2}), is2, S({1}), 2); + test0(S({1, 2, 2}), is3, S({1, 2, 2}), 0); - test0(S({1,2,3}), True, S()); - test0(S({1,2,3}), False, S({1,2,3})); + test0(S({1, 2, 3}), True, S(), 3); + test0(S({1, 2, 3}), False, S({1, 2, 3}), 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/forwardlist/empty.verify.cpp b/libcxx/test/std/containers/sequences/forwardlist/empty.verify.cpp index a8c507535162c..ba76f1f88ee75 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/empty.verify.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase.pass.cpp index 791553548020e..26438c60294ef 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase.pass.cpp @@ -10,8 +10,8 @@ // // template -// void erase(forward_list& c, const U& value); - +// typename forward_list::size_type +// erase(forward_list& c, const U& value); #include #include @@ -21,49 +21,46 @@ #include "min_allocator.h" template -void -test0(S s, U val, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase(s, val))); - std::erase(s, val); - assert(s == expected); +void test0(S s, U val, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase(s, val))); + assert(expected_erased_count == std::erase(s, val)); + assert(s == expected); } template void test() { - - test0(S(), 1, S()); - - test0(S({1}), 1, S()); - test0(S({1}), 2, S({1})); - - test0(S({1,2}), 1, S({2})); - test0(S({1,2}), 2, S({1})); - test0(S({1,2}), 3, S({1,2})); - test0(S({1,1}), 1, S()); - test0(S({1,1}), 3, S({1,1})); - - test0(S({1,2,3}), 1, S({2,3})); - test0(S({1,2,3}), 2, S({1,3})); - test0(S({1,2,3}), 3, S({1,2})); - test0(S({1,2,3}), 4, S({1,2,3})); - - test0(S({1,1,1}), 1, S()); - test0(S({1,1,1}), 2, S({1,1,1})); - test0(S({1,1,2}), 1, S({2})); - test0(S({1,1,2}), 2, S({1,1})); - test0(S({1,1,2}), 3, S({1,1,2})); - test0(S({1,2,2}), 1, S({2,2})); - test0(S({1,2,2}), 2, S({1})); - test0(S({1,2,2}), 3, S({1,2,2})); - -// Test cross-type erasure - using opt = std::optional; - test0(S({1,2,1}), opt(), S({1,2,1})); - test0(S({1,2,1}), opt(1), S({2})); - test0(S({1,2,1}), opt(2), S({1,1})); - test0(S({1,2,1}), opt(3), S({1,2,1})); + test0(S(), 1, S(), 0); + + test0(S({1}), 1, S(), 1); + test0(S({1}), 2, S({1}), 0); + + test0(S({1, 2}), 1, S({2}), 1); + test0(S({1, 2}), 2, S({1}), 1); + test0(S({1, 2}), 3, S({1, 2}), 0); + test0(S({1, 1}), 1, S(), 2); + test0(S({1, 1}), 3, S({1, 1}), 0); + + test0(S({1, 2, 3}), 1, S({2, 3}), 1); + test0(S({1, 2, 3}), 2, S({1, 3}), 1); + test0(S({1, 2, 3}), 3, S({1, 2}), 1); + test0(S({1, 2, 3}), 4, S({1, 2, 3}), 0); + + test0(S({1, 1, 1}), 1, S(), 3); + test0(S({1, 1, 1}), 2, S({1, 1, 1}), 0); + test0(S({1, 1, 2}), 1, S({2}), 2); + test0(S({1, 1, 2}), 2, S({1, 1}), 1); + test0(S({1, 1, 2}), 3, S({1, 1, 2}), 0); + test0(S({1, 2, 2}), 1, S({2, 2}), 1); + test0(S({1, 2, 2}), 2, S({1}), 2); + test0(S({1, 2, 2}), 3, S({1, 2, 2}), 0); + + // Test cross-type erasure + using opt = std::optional; + test0(S({1, 2, 1}), opt(), S({1, 2, 1}), 0); + test0(S({1, 2, 1}), opt(1), S({2}), 2); + test0(S({1, 2, 1}), opt(2), S({1, 1}), 1); + test0(S({1, 2, 1}), opt(3), S({1, 2, 1}), 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase_if.pass.cpp index 103645d4b6ec8..e1a4a64b29ad8 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(forward_list& c, Predicate pred); +// typename forward_list::size_type +// erase_if(forward_list& c, Predicate pred); #include @@ -19,12 +20,10 @@ #include "min_allocator.h" template -void -test0(S s, Pred p, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - assert(s == expected); +void test0(S s, Pred p, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + assert(s == expected); } template @@ -37,33 +36,33 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0(S(), is1, S()); + test0(S(), is1, S(), 0); - test0(S({1}), is1, S()); - test0(S({1}), is2, S({1})); + test0(S({1}), is1, S(), 1); + test0(S({1}), is2, S({1}), 0); - test0(S({1,2}), is1, S({2})); - test0(S({1,2}), is2, S({1})); - test0(S({1,2}), is3, S({1,2})); - test0(S({1,1}), is1, S()); - test0(S({1,1}), is3, S({1,1})); + test0(S({1, 2}), is1, S({2}), 1); + test0(S({1, 2}), is2, S({1}), 1); + test0(S({1, 2}), is3, S({1, 2}), 0); + test0(S({1, 1}), is1, S(), 2); + test0(S({1, 1}), is3, S({1, 1}), 0); - test0(S({1,2,3}), is1, S({2,3})); - test0(S({1,2,3}), is2, S({1,3})); - test0(S({1,2,3}), is3, S({1,2})); - test0(S({1,2,3}), is4, S({1,2,3})); + test0(S({1, 2, 3}), is1, S({2, 3}), 1); + test0(S({1, 2, 3}), is2, S({1, 3}), 1); + test0(S({1, 2, 3}), is3, S({1, 2}), 1); + test0(S({1, 2, 3}), is4, S({1, 2, 3}), 0); - test0(S({1,1,1}), is1, S()); - test0(S({1,1,1}), is2, S({1,1,1})); - test0(S({1,1,2}), is1, S({2})); - test0(S({1,1,2}), is2, S({1,1})); - test0(S({1,1,2}), is3, S({1,1,2})); - test0(S({1,2,2}), is1, S({2,2})); - test0(S({1,2,2}), is2, S({1})); - test0(S({1,2,2}), is3, S({1,2,2})); + test0(S({1, 1, 1}), is1, S(), 3); + test0(S({1, 1, 1}), is2, S({1, 1, 1}), 0); + test0(S({1, 1, 2}), is1, S({2}), 2); + test0(S({1, 1, 2}), is2, S({1, 1}), 1); + test0(S({1, 1, 2}), is3, S({1, 1, 2}), 0); + test0(S({1, 2, 2}), is1, S({2, 2}), 1); + test0(S({1, 2, 2}), is2, S({1}), 2); + test0(S({1, 2, 2}), is3, S({1, 2, 2}), 0); - test0(S({1,2,3}), True, S()); - test0(S({1,2,3}), False, S({1,2,3})); + test0(S({1, 2, 3}), True, S(), 3); + test0(S({1, 2, 3}), False, S({1, 2, 3}), 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/list/list.capacity/empty.verify.cpp b/libcxx/test/std/containers/sequences/list/list.capacity/empty.verify.cpp index 2b0b33e1f47be..d0e804628ff00 100644 --- a/libcxx/test/std/containers/sequences/list/list.capacity/empty.verify.cpp +++ b/libcxx/test/std/containers/sequences/list/list.capacity/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp b/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp index 298785ca52c8a..a8cbbb37671cc 100644 --- a/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp @@ -10,8 +10,8 @@ // // template -// void erase(list& c, const U& value); - +// typename list::size_type +// erase(list& c, const U& value); #include #include @@ -21,49 +21,46 @@ #include "min_allocator.h" template -void -test0(S s, U val, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase(s, val))); - std::erase(s, val); - assert(s == expected); +void test0(S s, U val, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase(s, val))); + assert(expected_erased_count == std::erase(s, val)); + assert(s == expected); } template void test() { - - test0(S(), 1, S()); - - test0(S({1}), 1, S()); - test0(S({1}), 2, S({1})); - - test0(S({1,2}), 1, S({2})); - test0(S({1,2}), 2, S({1})); - test0(S({1,2}), 3, S({1,2})); - test0(S({1,1}), 1, S()); - test0(S({1,1}), 3, S({1,1})); - - test0(S({1,2,3}), 1, S({2,3})); - test0(S({1,2,3}), 2, S({1,3})); - test0(S({1,2,3}), 3, S({1,2})); - test0(S({1,2,3}), 4, S({1,2,3})); - - test0(S({1,1,1}), 1, S()); - test0(S({1,1,1}), 2, S({1,1,1})); - test0(S({1,1,2}), 1, S({2})); - test0(S({1,1,2}), 2, S({1,1})); - test0(S({1,1,2}), 3, S({1,1,2})); - test0(S({1,2,2}), 1, S({2,2})); - test0(S({1,2,2}), 2, S({1})); - test0(S({1,2,2}), 3, S({1,2,2})); - -// Test cross-type erasure - using opt = std::optional; - test0(S({1,2,1}), opt(), S({1,2,1})); - test0(S({1,2,1}), opt(1), S({2})); - test0(S({1,2,1}), opt(2), S({1,1})); - test0(S({1,2,1}), opt(3), S({1,2,1})); + test0(S(), 1, S(), 0); + + test0(S({1}), 1, S(), 1); + test0(S({1}), 2, S({1}), 0); + + test0(S({1, 2}), 1, S({2}), 1); + test0(S({1, 2}), 2, S({1}), 1); + test0(S({1, 2}), 3, S({1, 2}), 0); + test0(S({1, 1}), 1, S(), 2); + test0(S({1, 1}), 3, S({1, 1}), 0); + + test0(S({1, 2, 3}), 1, S({2, 3}), 1); + test0(S({1, 2, 3}), 2, S({1, 3}), 1); + test0(S({1, 2, 3}), 3, S({1, 2}), 1); + test0(S({1, 2, 3}), 4, S({1, 2, 3}), 0); + + test0(S({1, 1, 1}), 1, S(), 3); + test0(S({1, 1, 1}), 2, S({1, 1, 1}), 0); + test0(S({1, 1, 2}), 1, S({2}), 2); + test0(S({1, 1, 2}), 2, S({1, 1}), 1); + test0(S({1, 1, 2}), 3, S({1, 1, 2}), 0); + test0(S({1, 2, 2}), 1, S({2, 2}), 1); + test0(S({1, 2, 2}), 2, S({1}), 2); + test0(S({1, 2, 2}), 3, S({1, 2, 2}), 0); + + // Test cross-type erasure + using opt = std::optional; + test0(S({1, 2, 1}), opt(), S({1, 2, 1}), 0); + test0(S({1, 2, 1}), opt(1), S({2}), 2); + test0(S({1, 2, 1}), opt(2), S({1, 1}), 1); + test0(S({1, 2, 1}), opt(3), S({1, 2, 1}), 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp index b2889eb67c443..9e6dfb19d624f 100644 --- a/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(list& c, Predicate pred); +// typename list::size_type +// erase_if(list& c, Predicate pred); #include @@ -19,12 +20,10 @@ #include "min_allocator.h" template -void -test0(S s, Pred p, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - assert(s == expected); +void test0(S s, Pred p, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + assert(s == expected); } template @@ -37,33 +36,33 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0(S(), is1, S()); + test0(S(), is1, S(), 0); - test0(S({1}), is1, S()); - test0(S({1}), is2, S({1})); + test0(S({1}), is1, S(), 1); + test0(S({1}), is2, S({1}), 0); - test0(S({1,2}), is1, S({2})); - test0(S({1,2}), is2, S({1})); - test0(S({1,2}), is3, S({1,2})); - test0(S({1,1}), is1, S()); - test0(S({1,1}), is3, S({1,1})); + test0(S({1, 2}), is1, S({2}), 1); + test0(S({1, 2}), is2, S({1}), 1); + test0(S({1, 2}), is3, S({1, 2}), 0); + test0(S({1, 1}), is1, S(), 2); + test0(S({1, 1}), is3, S({1, 1}), 0); - test0(S({1,2,3}), is1, S({2,3})); - test0(S({1,2,3}), is2, S({1,3})); - test0(S({1,2,3}), is3, S({1,2})); - test0(S({1,2,3}), is4, S({1,2,3})); + test0(S({1, 2, 3}), is1, S({2, 3}), 1); + test0(S({1, 2, 3}), is2, S({1, 3}), 1); + test0(S({1, 2, 3}), is3, S({1, 2}), 1); + test0(S({1, 2, 3}), is4, S({1, 2, 3}), 0); - test0(S({1,1,1}), is1, S()); - test0(S({1,1,1}), is2, S({1,1,1})); - test0(S({1,1,2}), is1, S({2})); - test0(S({1,1,2}), is2, S({1,1})); - test0(S({1,1,2}), is3, S({1,1,2})); - test0(S({1,2,2}), is1, S({2,2})); - test0(S({1,2,2}), is2, S({1})); - test0(S({1,2,2}), is3, S({1,2,2})); + test0(S({1, 1, 1}), is1, S(), 3); + test0(S({1, 1, 1}), is2, S({1, 1, 1}), 0); + test0(S({1, 1, 2}), is1, S({2}), 2); + test0(S({1, 1, 2}), is2, S({1, 1}), 1); + test0(S({1, 1, 2}), is3, S({1, 1, 2}), 0); + test0(S({1, 2, 2}), is1, S({2, 2}), 1); + test0(S({1, 2, 2}), is2, S({1}), 2); + test0(S({1, 2, 2}), is3, S({1, 2, 2}), 0); - test0(S({1,2,3}), True, S()); - test0(S({1,2,3}), False, S({1,2,3})); + test0(S({1, 2, 3}), True, S(), 3); + test0(S({1, 2, 3}), False, S({1, 2, 3}), 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/vector.bool/empty.verify.cpp b/libcxx/test/std/containers/sequences/vector.bool/empty.verify.cpp index 1b6429b8a5730..754f11d22337a 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/empty.verify.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/sequences/vector/vector.capacity/empty.verify.cpp b/libcxx/test/std/containers/sequences/vector/vector.capacity/empty.verify.cpp index 26a4266a2ce8e..626177ee689d4 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.capacity/empty.verify.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.capacity/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/copy.move_only.verify.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/copy.move_only.verify.cpp index b38f24c3be254..67c91ae5a870f 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/copy.move_only.verify.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.cons/copy.move_only.verify.cpp @@ -9,7 +9,6 @@ // Make sure that a std::vector containing move-only types can't be copied. // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/sequences/vector/vector.erasure/erase.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.erasure/erase.pass.cpp index 687a467e4f7dc..d353a88448e06 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.erasure/erase.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.erasure/erase.pass.cpp @@ -10,8 +10,8 @@ // // template -// void erase(vector& c, const U& value); - +// typename vector::size_type +// erase(vector& c, const U& value); #include #include @@ -21,49 +21,47 @@ #include "min_allocator.h" template -void -test0(S s, U val, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase(s, val))); - std::erase(s, val); - assert(s == expected); +void test0(S s, U val, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase(s, val))); + assert(expected_erased_count == std::erase(s, val)); + assert(s == expected); } template void test() { - test0(S(), 1, S()); - - test0(S({1}), 1, S()); - test0(S({1}), 2, S({1})); - - test0(S({1,2}), 1, S({2})); - test0(S({1,2}), 2, S({1})); - test0(S({1,2}), 3, S({1,2})); - test0(S({1,1}), 1, S()); - test0(S({1,1}), 3, S({1,1})); - - test0(S({1,2,3}), 1, S({2,3})); - test0(S({1,2,3}), 2, S({1,3})); - test0(S({1,2,3}), 3, S({1,2})); - test0(S({1,2,3}), 4, S({1,2,3})); - - test0(S({1,1,1}), 1, S()); - test0(S({1,1,1}), 2, S({1,1,1})); - test0(S({1,1,2}), 1, S({2})); - test0(S({1,1,2}), 2, S({1,1})); - test0(S({1,1,2}), 3, S({1,1,2})); - test0(S({1,2,2}), 1, S({2,2})); - test0(S({1,2,2}), 2, S({1})); - test0(S({1,2,2}), 3, S({1,2,2})); - -// Test cross-type erasure - using opt = std::optional; - test0(S({1,2,1}), opt(), S({1,2,1})); - test0(S({1,2,1}), opt(1), S({2})); - test0(S({1,2,1}), opt(2), S({1,1})); - test0(S({1,2,1}), opt(3), S({1,2,1})); + test0(S(), 1, S(), 0); + + test0(S({1}), 1, S(), 1); + test0(S({1}), 2, S({1}), 0); + + test0(S({1, 2}), 1, S({2}), 1); + test0(S({1, 2}), 2, S({1}), 1); + test0(S({1, 2}), 3, S({1, 2}), 0); + test0(S({1, 1}), 1, S(), 2); + test0(S({1, 1}), 3, S({1, 1}), 0); + + test0(S({1, 2, 3}), 1, S({2, 3}), 1); + test0(S({1, 2, 3}), 2, S({1, 3}), 1); + test0(S({1, 2, 3}), 3, S({1, 2}), 1); + test0(S({1, 2, 3}), 4, S({1, 2, 3}), 0); + + test0(S({1, 1, 1}), 1, S(), 3); + test0(S({1, 1, 1}), 2, S({1, 1, 1}), 0); + test0(S({1, 1, 2}), 1, S({2}), 2); + test0(S({1, 1, 2}), 2, S({1, 1}), 1); + test0(S({1, 1, 2}), 3, S({1, 1, 2}), 0); + test0(S({1, 2, 2}), 1, S({2, 2}), 1); + test0(S({1, 2, 2}), 2, S({1}), 2); + test0(S({1, 2, 2}), 3, S({1, 2, 2}), 0); + + // Test cross-type erasure + using opt = std::optional; + test0(S({1, 2, 1}), opt(), S({1, 2, 1}), 0); + test0(S({1, 2, 1}), opt(1), S({2}), 2); + test0(S({1, 2, 1}), opt(2), S({1, 1}), 1); + test0(S({1, 2, 1}), opt(3), S({1, 2, 1}), 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/vector/vector.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.erasure/erase_if.pass.cpp index 49fece4b76502..f72b3c9eab4f4 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.erasure/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(vector& c, Predicate pred); +// typename vector::size_type +// erase_if(vector& c, Predicate pred); #include @@ -19,12 +20,10 @@ #include "min_allocator.h" template -void -test0(S s, Pred p, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - assert(s == expected); +void test0(S s, Pred p, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + assert(s == expected); } template @@ -37,33 +36,33 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0(S(), is1, S()); + test0(S(), is1, S(), 0); - test0(S({1}), is1, S()); - test0(S({1}), is2, S({1})); + test0(S({1}), is1, S(), 1); + test0(S({1}), is2, S({1}), 0); - test0(S({1,2}), is1, S({2})); - test0(S({1,2}), is2, S({1})); - test0(S({1,2}), is3, S({1,2})); - test0(S({1,1}), is1, S()); - test0(S({1,1}), is3, S({1,1})); + test0(S({1, 2}), is1, S({2}), 1); + test0(S({1, 2}), is2, S({1}), 1); + test0(S({1, 2}), is3, S({1, 2}), 0); + test0(S({1, 1}), is1, S(), 2); + test0(S({1, 1}), is3, S({1, 1}), 0); - test0(S({1,2,3}), is1, S({2,3})); - test0(S({1,2,3}), is2, S({1,3})); - test0(S({1,2,3}), is3, S({1,2})); - test0(S({1,2,3}), is4, S({1,2,3})); + test0(S({1, 2, 3}), is1, S({2, 3}), 1); + test0(S({1, 2, 3}), is2, S({1, 3}), 1); + test0(S({1, 2, 3}), is3, S({1, 2}), 1); + test0(S({1, 2, 3}), is4, S({1, 2, 3}), 0); - test0(S({1,1,1}), is1, S()); - test0(S({1,1,1}), is2, S({1,1,1})); - test0(S({1,1,2}), is1, S({2})); - test0(S({1,1,2}), is2, S({1,1})); - test0(S({1,1,2}), is3, S({1,1,2})); - test0(S({1,2,2}), is1, S({2,2})); - test0(S({1,2,2}), is2, S({1})); - test0(S({1,2,2}), is3, S({1,2,2})); + test0(S({1, 1, 1}), is1, S(), 3); + test0(S({1, 1, 1}), is2, S({1, 1, 1}), 0); + test0(S({1, 1, 2}), is1, S({2}), 2); + test0(S({1, 1, 2}), is2, S({1, 1}), 1); + test0(S({1, 1, 2}), is3, S({1, 1, 2}), 0); + test0(S({1, 2, 2}), is1, S({2, 2}), 1); + test0(S({1, 2, 2}), is2, S({1}), 2); + test0(S({1, 2, 2}), is3, S({1, 2, 2}), 0); - test0(S({1,2,3}), True, S()); - test0(S({1,2,3}), False, S({1,2,3})); + test0(S({1, 2, 3}), True, S(), 3); + test0(S({1, 2, 3}), False, S({1, 2, 3}), 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/unord/unord.map/empty.verify.cpp b/libcxx/test/std/containers/unord/unord.map/empty.verify.cpp index 5f666cb5b65b6..1f5f2fd4cf0b8 100644 --- a/libcxx/test/std/containers/unord/unord.map/empty.verify.cpp +++ b/libcxx/test/std/containers/unord/unord.map/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/unord/unord.map/erase_if.pass.cpp b/libcxx/test/std/containers/unord/unord.map/erase_if.pass.cpp index 2f188655ca81e..782fb6bd6f313 100644 --- a/libcxx/test/std/containers/unord/unord.map/erase_if.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(unordered_map& c, Predicate pred); +// typename unordered_map::size_type +// erase_if(unordered_map& c, Predicate pred); #include @@ -29,14 +30,12 @@ M make (Init vals) } template -void -test0(Init vals, Pred p, Init expected) -{ - M s = make (vals); - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - M e = make(expected); - assert((std::is_permutation(s.begin(), s.end(), e.begin(), e.end()))); +void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { + M s = make(vals); + ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + M e = make(expected); + assert((std::is_permutation(s.begin(), s.end(), e.begin(), e.end()))); } template @@ -49,22 +48,22 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0({}, is1, {}); + test0({}, is1, {}, 0); - test0({1}, is1, {}); - test0({1}, is2, {1}); + test0({1}, is1, {}, 1); + test0({1}, is2, {1}, 0); - test0({1,2}, is1, {2}); - test0({1,2}, is2, {1}); - test0({1,2}, is3, {1,2}); + test0({1, 2}, is1, {2}, 1); + test0({1, 2}, is2, {1}, 1); + test0({1, 2}, is3, {1, 2}, 0); - test0({1,2,3}, is1, {2,3}); - test0({1,2,3}, is2, {1,3}); - test0({1,2,3}, is3, {1,2}); - test0({1,2,3}, is4, {1,2,3}); + test0({1, 2, 3}, is1, {2, 3}, 1); + test0({1, 2, 3}, is2, {1, 3}, 1); + test0({1, 2, 3}, is3, {1, 2}, 1); + test0({1, 2, 3}, is4, {1, 2, 3}, 0); - test0({1,2,3}, True, {}); - test0({1,2,3}, False, {1,2,3}); + test0({1, 2, 3}, True, {}, 3); + test0({1, 2, 3}, False, {1, 2, 3}, 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/unord/unord.multimap/empty.verify.cpp b/libcxx/test/std/containers/unord/unord.multimap/empty.verify.cpp index 5bbc294519c75..b0638edeaff76 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/empty.verify.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/unord/unord.multimap/erase_if.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/erase_if.pass.cpp index 23d18872d1428..0a79dbc5ae7a7 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/erase_if.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(unordered_multimap& c, Predicate pred); +// typename unordered_multimap::size_type +// erase_if(unordered_multimap& c, Predicate pred); #include @@ -29,14 +30,12 @@ M make (Init vals) } template -void -test0(Init vals, Pred p, Init expected) -{ - M s = make (vals); - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - M e = make(expected); - assert((std::is_permutation(s.begin(), s.end(), e.begin(), e.end()))); +void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { + M s = make(vals); + ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + M e = make(expected); + assert((std::is_permutation(s.begin(), s.end(), e.begin(), e.end()))); } template @@ -49,33 +48,33 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0({}, is1, {}); - - test0({1}, is1, {}); - test0({1}, is2, {1}); - - test0({1,2}, is1, {2}); - test0({1,2}, is2, {1}); - test0({1,2}, is3, {1,2}); - test0({1,1}, is1, {}); - test0({1,1}, is3, {1,1}); - - test0({1,2,3}, is1, {2,3}); - test0({1,2,3}, is2, {1,3}); - test0({1,2,3}, is3, {1,2}); - test0({1,2,3}, is4, {1,2,3}); - - test0({1,1,1}, is1, {}); - test0({1,1,1}, is2, {1,1,1}); - test0({1,1,2}, is1, {2}); - test0({1,1,2}, is2, {1,1}); - test0({1,1,2}, is3, {1,1,2}); - test0({1,2,2}, is1, {2,2}); - test0({1,2,2}, is2, {1}); - test0({1,2,2}, is3, {1,2,2}); - - test0({1,2,3}, True, {}); - test0({1,2,3}, False, {1,2,3}); + test0({}, is1, {}, 0); + + test0({1}, is1, {}, 1); + test0({1}, is2, {1}, 0); + + test0({1, 2}, is1, {2}, 1); + test0({1, 2}, is2, {1}, 1); + test0({1, 2}, is3, {1, 2}, 0); + test0({1, 1}, is1, {}, 2); + test0({1, 1}, is3, {1, 1}, 0); + + test0({1, 2, 3}, is1, {2, 3}, 1); + test0({1, 2, 3}, is2, {1, 3}, 1); + test0({1, 2, 3}, is3, {1, 2}, 1); + test0({1, 2, 3}, is4, {1, 2, 3}, 0); + + test0({1, 1, 1}, is1, {}, 3); + test0({1, 1, 1}, is2, {1, 1, 1}, 0); + test0({1, 1, 2}, is1, {2}, 2); + test0({1, 1, 2}, is2, {1, 1}, 1); + test0({1, 1, 2}, is3, {1, 1, 2}, 0); + test0({1, 2, 2}, is1, {2, 2}, 1); + test0({1, 2, 2}, is2, {1}, 2); + test0({1, 2, 2}, is3, {1, 2, 2}, 0); + + test0({1, 2, 3}, True, {}, 3); + test0({1, 2, 3}, False, {1, 2, 3}, 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/unord/unord.multiset/empty.verify.cpp b/libcxx/test/std/containers/unord/unord.multiset/empty.verify.cpp index 222909ab2f42b..efe778b6f9f4f 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/empty.verify.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_if.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_if.pass.cpp index 72e553a734cd7..5f7316923c35e 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/erase_if.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(unordered_multiset& c, Predicate pred); +// typename unordered_multiset::size_type +// erase_if(unordered_multiset& c, Predicate pred); #include @@ -30,14 +31,12 @@ M make (Init vals) } template -void -test0(Init vals, Pred p, Init expected) -{ - M s = make (vals); - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - M e = make(expected); - assert((std::is_permutation(s.begin(), s.end(), e.begin(), e.end()))); +void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { + M s = make(vals); + ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + M e = make(expected); + assert((std::is_permutation(s.begin(), s.end(), e.begin(), e.end()))); } template @@ -50,33 +49,33 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0({}, is1, {}); - - test0({1}, is1, {}); - test0({1}, is2, {1}); - - test0({1,2}, is1, {2}); - test0({1,2}, is2, {1}); - test0({1,2}, is3, {1,2}); - test0({1,1}, is1, {}); - test0({1,1}, is3, {1,1}); - - test0({1,2,3}, is1, {2,3}); - test0({1,2,3}, is2, {1,3}); - test0({1,2,3}, is3, {1,2}); - test0({1,2,3}, is4, {1,2,3}); - - test0({1,1,1}, is1, {}); - test0({1,1,1}, is2, {1,1,1}); - test0({1,1,2}, is1, {2}); - test0({1,1,2}, is2, {1,1}); - test0({1,1,2}, is3, {1,1,2}); - test0({1,2,2}, is1, {2,2}); - test0({1,2,2}, is2, {1}); - test0({1,2,2}, is3, {1,2,2}); - - test0({1,2,3}, True, {}); - test0({1,2,3}, False, {1,2,3}); + test0({}, is1, {}, 0); + + test0({1}, is1, {}, 1); + test0({1}, is2, {1}, 0); + + test0({1, 2}, is1, {2}, 1); + test0({1, 2}, is2, {1}, 1); + test0({1, 2}, is3, {1, 2}, 0); + test0({1, 1}, is1, {}, 2); + test0({1, 1}, is3, {1, 1}, 0); + + test0({1, 2, 3}, is1, {2, 3}, 1); + test0({1, 2, 3}, is2, {1, 3}, 1); + test0({1, 2, 3}, is3, {1, 2}, 1); + test0({1, 2, 3}, is4, {1, 2, 3}, 0); + + test0({1, 1, 1}, is1, {}, 3); + test0({1, 1, 1}, is2, {1, 1, 1}, 0); + test0({1, 1, 2}, is1, {2}, 2); + test0({1, 1, 2}, is2, {1, 1}, 1); + test0({1, 1, 2}, is3, {1, 1, 2}, 0); + test0({1, 2, 2}, is1, {2, 2}, 1); + test0({1, 2, 2}, is2, {1}, 2); + test0({1, 2, 2}, is3, {1, 2, 2}, 0); + + test0({1, 2, 3}, True, {}, 3); + test0({1, 2, 3}, False, {1, 2, 3}, 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/unord/unord.set/empty.verify.cpp b/libcxx/test/std/containers/unord/unord.set/empty.verify.cpp index d39afb29f6e1c..8bf9b0003276d 100644 --- a/libcxx/test/std/containers/unord/unord.set/empty.verify.cpp +++ b/libcxx/test/std/containers/unord/unord.set/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/containers/unord/unord.set/erase_if.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_if.pass.cpp index 5bc692e8d9cf3..cec1041722d64 100644 --- a/libcxx/test/std/containers/unord/unord.set/erase_if.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(unorderd_set& c, Predicate pred); +// typename unordered_set::size_type +// erase_if(unordered_set& c, Predicate pred); #include @@ -30,17 +31,14 @@ M make (Init vals) } template -void -test0(Init vals, Pred p, Init expected) -{ - M s = make (vals); - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - M e = make(expected); - assert((std::is_permutation(s.begin(), s.end(), e.begin(), e.end()))); +void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { + M s = make(vals); + ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + M e = make(expected); + assert((std::is_permutation(s.begin(), s.end(), e.begin(), e.end()))); } - template void test() { @@ -51,22 +49,22 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0({}, is1, {}); + test0({}, is1, {}, 0); - test0({1}, is1, {}); - test0({1}, is2, {1}); + test0({1}, is1, {}, 1); + test0({1}, is2, {1}, 0); - test0({1,2}, is1, {2}); - test0({1,2}, is2, {1}); - test0({1,2}, is3, {1,2}); + test0({1, 2}, is1, {2}, 1); + test0({1, 2}, is2, {1}, 1); + test0({1, 2}, is3, {1, 2}, 0); - test0({1,2,3}, is1, {2,3}); - test0({1,2,3}, is2, {1,3}); - test0({1,2,3}, is3, {1,2}); - test0({1,2,3}, is4, {1,2,3}); + test0({1, 2, 3}, is1, {2, 3}, 1); + test0({1, 2, 3}, is2, {1, 3}, 1); + test0({1, 2, 3}, is3, {1, 2}, 1); + test0({1, 2, 3}, is4, {1, 2, 3}, 0); - test0({1,2,3}, True, {}); - test0({1,2,3}, False, {1,2,3}); + test0({1, 2, 3}, True, {}, 3); + test0({1, 2, 3}, False, {1, 2, 3}, 0); } int main(int, char**) diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.depr_in_cxx11.verify.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.depr_in_cxx11.verify.cpp index 0a58370b70efe..f957b467b92f5 100644 --- a/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.depr_in_cxx11.verify.cpp +++ b/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.depr_in_cxx11.verify.cpp @@ -12,7 +12,6 @@ // UNSUPPORTED: clang-4.0 // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS #include diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.depr_in_cxx11.verify.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.depr_in_cxx11.verify.cpp index b399dda30bdaa..91b6558fcec03 100644 --- a/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.depr_in_cxx11.verify.cpp +++ b/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.depr_in_cxx11.verify.cpp @@ -12,7 +12,6 @@ // UNSUPPORTED: clang-4.0 // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS #include diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.depr_in_cxx11.verify.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.depr_in_cxx11.verify.cpp index e19ac6890361c..733071add7712 100644 --- a/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.depr_in_cxx11.verify.cpp +++ b/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.depr_in_cxx11.verify.cpp @@ -12,7 +12,6 @@ // UNSUPPORTED: clang-4.0 // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS #include diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.depr_in_cxx11.verify.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.depr_in_cxx11.verify.cpp index cc4a128ab7e9b..8914d75b518a9 100644 --- a/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.depr_in_cxx11.verify.cpp +++ b/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.depr_in_cxx11.verify.cpp @@ -12,7 +12,6 @@ // UNSUPPORTED: clang-4.0 // UNSUPPORTED: c++98, c++03 -// REQUIRES: verify-support // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS #include diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/empty.verify.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/empty.verify.cpp index ee4da1caceab5..7322875495452 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/empty.verify.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include "filesystem_include.h" diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.pass.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp similarity index 85% rename from libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.pass.cpp rename to libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp index 5b01f33bf4b0e..650537dcb20ce 100644 --- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.pass.cpp +++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp @@ -10,6 +10,11 @@ // istream cerr; +// FILE_DEPENDENCIES: %t.exe +// RUN: %{build} +// RUN: %{exec} %t.exe 2> %t.err +// RUN: grep -e 'Hello World!' %t.err + #include #include @@ -17,16 +22,15 @@ int main(int, char**) { -#if 0 + std::cerr << "Hello World!\n"; -#else + #ifdef _LIBCPP_HAS_NO_STDOUT assert(std::cerr.tie() == NULL); #else assert(std::cerr.tie() == &std::cout); #endif assert(std::cerr.flags() & std::ios_base::unitbuf); -#endif // 0 return 0; } diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.pass.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.sh.cpp similarity index 78% rename from libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.pass.cpp rename to libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.sh.cpp index 0b3672a4585cd..386dbbd4721d3 100644 --- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.pass.cpp +++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.sh.cpp @@ -12,6 +12,11 @@ // istream cin; +// FILE_DEPENDENCIES: %t.exe +// RUN: %{build} +// RUN: %{exec} echo "123" | %t.exe > %t.out +// RUN: grep -e 'The number is 123!' %t.out + #include #include @@ -19,18 +24,14 @@ int main(int, char**) { -#if 0 - std::cout << "Hello World!\n"; int i; - std::cout << "Enter a number: "; std::cin >> i; - std::cout << "The number is : " << i << '\n'; -#else // 0 + std::cout << "The number is " << i << "!"; + #ifdef _LIBCPP_HAS_NO_STDOUT assert(std::cin.tie() == NULL); #else assert(std::cin.tie() == &std::cout); -#endif #endif return 0; diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.pass.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp similarity index 79% rename from libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.pass.cpp rename to libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp index 68e3729475016..32e23bf61c34f 100644 --- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.pass.cpp +++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp @@ -10,17 +10,18 @@ // istream clog; +// FILE_DEPENDENCIES: %t.exe +// RUN: %{build} +// RUN: %{exec} %t.exe 2> %t.err +// RUN: grep -e 'Hello World!' %t.err + #include #include "test_macros.h" int main(int, char**) { -#if 0 std::clog << "Hello World!\n"; -#else - (void)std::clog; -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.pass.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp similarity index 75% rename from libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.pass.cpp rename to libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp index f1d53b773ac12..f4a066b5c50d8 100644 --- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.pass.cpp +++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp @@ -12,21 +12,18 @@ // istream cout; +// FILE_DEPENDENCIES: %t.exe +// RUN: %{build} +// RUN: %{exec} %t.exe > %t.out +// RUN: grep -e 'Hello World!' %t.out + #include #include "test_macros.h" int main(int, char**) { -#if 0 std::cout << "Hello World!\n"; - int i; - std::cout << "Enter a number: "; - std::cin >> i; - std::cout << "The number is : " << i << '\n'; -#else // 0 - (void)std::cout; -#endif - - return 0; + + return 0; } diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.pass.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp similarity index 84% rename from libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.pass.cpp rename to libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp index 1683c49fbf6d8..30974df3951f6 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.pass.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp @@ -10,6 +10,11 @@ // istream wcerr; +// FILE_DEPENDENCIES: %t.exe +// RUN: %{build} +// RUN: %{exec} %t.exe 2> %t.err +// RUN: grep -e 'Hello World!' %t.err + #include #include @@ -17,16 +22,14 @@ int main(int, char**) { -#if 0 std::wcerr << L"Hello World!\n"; -#else + #ifdef _LIBCPP_HAS_NO_STDOUT assert(std::wcerr.tie() == NULL); #else assert(std::wcerr.tie() == &std::wcout); #endif assert(std::wcerr.flags() & std::ios_base::unitbuf); -#endif // 0 - return 0; + return 0; } diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.pass.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.sh.cpp similarity index 77% rename from libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.pass.cpp rename to libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.sh.cpp index c653b2f60678b..9d24a37233a8c 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.pass.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.sh.cpp @@ -12,6 +12,11 @@ // istream wcin; +// FILE_DEPENDENCIES: %t.exe +// RUN: %{build} +// RUN: %{exec} echo "123" | %t.exe > %t.out +// RUN: grep -e 'The number is 123!' %t.out + #include #include @@ -19,19 +24,15 @@ int main(int, char**) { -#if 0 - std::wcout << L"Hello World!\n"; int i; - std::wcout << L"Enter a number: "; std::wcin >> i; - std::wcout << L"The number is : " << i << L'\n'; -#else // 0 + std::wcout << L"The number is " << i << L"!"; + #ifdef _LIBCPP_HAS_NO_STDOUT assert(std::wcin.tie() == NULL); #else assert(std::wcin.tie() == &std::wcout); -#endif #endif - return 0; + return 0; } diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.pass.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp similarity index 79% rename from libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.pass.cpp rename to libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp index f396500890d88..d1b126067155c 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.pass.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp @@ -10,17 +10,18 @@ // istream wclog; +// FILE_DEPENDENCIES: %t.exe +// RUN: %{build} +// RUN: %{exec} %t.exe 2> %t.err +// RUN: grep -e 'Hello World!' %t.err + #include #include "test_macros.h" int main(int, char**) { -#if 0 std::wclog << L"Hello World!\n"; -#else - (void)std::wclog; -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.pass.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp similarity index 80% rename from libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.pass.cpp rename to libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp index b6bd1ef4ea18e..0a14f898baa21 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.pass.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp @@ -12,17 +12,18 @@ // istream wcout; +// FILE_DEPENDENCIES: %t.exe +// RUN: %{build} +// RUN: %{exec} %t.exe > %t.out +// RUN: grep -e 'Hello World!' %t.out + #include #include "test_macros.h" int main(int, char**) { -#if 0 std::wcout << L"Hello World!\n"; -#else - (void)std::wcout; -#endif - return 0; + return 0; } diff --git a/libcxx/test/std/iterators/iterator.container/empty.array.verify.cpp b/libcxx/test/std/iterators/iterator.container/empty.array.verify.cpp index 48c4a6e928926..4a26125d83f21 100644 --- a/libcxx/test/std/iterators/iterator.container/empty.array.verify.cpp +++ b/libcxx/test/std/iterators/iterator.container/empty.array.verify.cpp @@ -13,7 +13,6 @@ // template constexpr bool empty(const T (&array)[N]) noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include #include diff --git a/libcxx/test/std/iterators/iterator.container/empty.container.verify.cpp b/libcxx/test/std/iterators/iterator.container/empty.container.verify.cpp index 0e9870962158a..d87f986f68869 100644 --- a/libcxx/test/std/iterators/iterator.container/empty.container.verify.cpp +++ b/libcxx/test/std/iterators/iterator.container/empty.container.verify.cpp @@ -13,7 +13,6 @@ // template constexpr auto empty(const C& c) -> decltype(c.empty()); // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include #include diff --git a/libcxx/test/std/iterators/iterator.container/empty.initializer_list.verify.cpp b/libcxx/test/std/iterators/iterator.container/empty.initializer_list.verify.cpp index 2b00c7ea1c111..54d0ba013e62e 100644 --- a/libcxx/test/std/iterators/iterator.container/empty.initializer_list.verify.cpp +++ b/libcxx/test/std/iterators/iterator.container/empty.initializer_list.verify.cpp @@ -13,7 +13,6 @@ // template constexpr bool empty(initializer_list il) noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include #include diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size.verify.cpp index 6b1d62d992f55..3651e2e67d844 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size.verify.cpp @@ -13,7 +13,6 @@ // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support // REQUIRES: -faligned-allocation // ADDITIONAL_COMPILE_FLAGS: -faligned-allocation diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_align.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_align.verify.cpp index 3b1494eef99f9..a0fac812ae3b6 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_align.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_align.verify.cpp @@ -13,7 +13,6 @@ // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support // REQUIRES: -faligned-allocation // ADDITIONAL_COMPILE_FLAGS: -faligned-allocation diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_align_nothrow.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_align_nothrow.verify.cpp index 14488f6a9db3f..0d884f8bd59fc 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_align_nothrow.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_align_nothrow.verify.cpp @@ -13,7 +13,6 @@ // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support // REQUIRES: -faligned-allocation // ADDITIONAL_COMPILE_FLAGS: -faligned-allocation diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_nothrow.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_nothrow.verify.cpp index 2af50eea8f42e..2d2801380f05b 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_nothrow.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_size_nothrow.verify.cpp @@ -13,7 +13,6 @@ // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support // REQUIRES: -faligned-allocation // ADDITIONAL_COMPILE_FLAGS: -faligned-allocation diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array_ptr.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array_ptr.verify.cpp index 093eaef858bb1..c1ab9043afb4a 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array_ptr.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array_ptr.verify.cpp @@ -12,7 +12,6 @@ // void* operator new[](std::size_t, void *); // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_ptr.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_ptr.verify.cpp index df77c9031f312..1a6b5b24560ae 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_ptr.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_ptr.verify.cpp @@ -12,7 +12,6 @@ // void* operator new(std::size_t, void *); // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size.verify.cpp index 4ab5aa703cf40..8c9132659618e 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size.verify.cpp @@ -12,7 +12,6 @@ // void* operator new(std::size_t); // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_align.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_align.verify.cpp index 64706d38fb8c4..b6ffb80962cd6 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_align.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_align.verify.cpp @@ -13,7 +13,6 @@ // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support // REQUIRES: -faligned-allocation // ADDITIONAL_COMPILE_FLAGS: -faligned-allocation diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_align_nothrow.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_align_nothrow.verify.cpp index 5d6d168f5a33a..f02dd5e0ae311 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_align_nothrow.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_align_nothrow.verify.cpp @@ -13,7 +13,6 @@ // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support // REQUIRES: -faligned-allocation // ADDITIONAL_COMPILE_FLAGS: -faligned-allocation diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_nothrow.verify.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_nothrow.verify.cpp index b5c6d90d05486..10f7c60ef8556 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_nothrow.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_nothrow.verify.cpp @@ -12,7 +12,6 @@ // void* operator new(std::size_t, std::nothrow_t &); // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/language.support/support.dynamic/ptr.launder/launder.nodiscard.verify.cpp b/libcxx/test/std/language.support/support.dynamic/ptr.launder/launder.nodiscard.verify.cpp index 8845aaf587131..e20b160f537fe 100644 --- a/libcxx/test/std/language.support/support.dynamic/ptr.launder/launder.nodiscard.verify.cpp +++ b/libcxx/test/std/language.support/support.dynamic/ptr.launder/launder.nodiscard.verify.cpp @@ -12,7 +12,6 @@ // template constexpr T* launder(T* p) noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include #include diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/deque.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/deque.version.pass.cpp index 9d07dcdd24661..ac6f94ff70f9b 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/deque.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/deque.version.pass.cpp @@ -15,7 +15,7 @@ /* Constant Value __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 201811L [C++2a] + __cpp_lib_erase_if 202002L [C++2a] __cpp_lib_nonmember_container_access 201411L [C++17] */ @@ -82,8 +82,8 @@ # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2a" # endif -# if __cpp_lib_erase_if != 201811L -# error "__cpp_lib_erase_if should have the value 201811L in c++2a" +# if __cpp_lib_erase_if != 202002L +# error "__cpp_lib_erase_if should have the value 202002L in c++2a" # endif # ifndef __cpp_lib_nonmember_container_access diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/forward_list.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/forward_list.version.pass.cpp index 4004e6b70a2ac..837abe395404e 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/forward_list.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/forward_list.version.pass.cpp @@ -15,7 +15,7 @@ /* Constant Value __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 201811L [C++2a] + __cpp_lib_erase_if 202002L [C++2a] __cpp_lib_incomplete_container_elements 201505L [C++17] __cpp_lib_list_remove_return_type 201806L [C++2a] __cpp_lib_nonmember_container_access 201411L [C++17] @@ -111,8 +111,8 @@ # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2a" # endif -# if __cpp_lib_erase_if != 201811L -# error "__cpp_lib_erase_if should have the value 201811L in c++2a" +# if __cpp_lib_erase_if != 202002L +# error "__cpp_lib_erase_if should have the value 202002L in c++2a" # endif # ifndef __cpp_lib_incomplete_container_elements diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.pass.cpp index 01570b69a5bb6..218510fb65eff 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.pass.cpp @@ -15,7 +15,7 @@ /* Constant Value __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 201811L [C++2a] + __cpp_lib_erase_if 202002L [C++2a] __cpp_lib_incomplete_container_elements 201505L [C++17] __cpp_lib_list_remove_return_type 201806L [C++2a] __cpp_lib_nonmember_container_access 201411L [C++17] @@ -111,8 +111,8 @@ # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2a" # endif -# if __cpp_lib_erase_if != 201811L -# error "__cpp_lib_erase_if should have the value 201811L in c++2a" +# if __cpp_lib_erase_if != 202002L +# error "__cpp_lib_erase_if should have the value 202002L in c++2a" # endif # ifndef __cpp_lib_incomplete_container_elements diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/map.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/map.version.pass.cpp index a41dd1b8e9862..c6ef179249b10 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/map.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/map.version.pass.cpp @@ -15,7 +15,7 @@ /* Constant Value __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 201811L [C++2a] + __cpp_lib_erase_if 202002L [C++2a] __cpp_lib_generic_associative_lookup 201304L [C++14] __cpp_lib_map_try_emplace 201411L [C++17] __cpp_lib_node_extract 201606L [C++17] @@ -133,8 +133,8 @@ # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2a" # endif -# if __cpp_lib_erase_if != 201811L -# error "__cpp_lib_erase_if should have the value 201811L in c++2a" +# if __cpp_lib_erase_if != 202002L +# error "__cpp_lib_erase_if should have the value 202002L in c++2a" # endif # ifndef __cpp_lib_generic_associative_lookup diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/set.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/set.version.pass.cpp index 80cf9c0af3caa..bc26397d6431b 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/set.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/set.version.pass.cpp @@ -15,7 +15,7 @@ /* Constant Value __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 201811L [C++2a] + __cpp_lib_erase_if 202002L [C++2a] __cpp_lib_generic_associative_lookup 201304L [C++14] __cpp_lib_node_extract 201606L [C++17] __cpp_lib_nonmember_container_access 201411L [C++17] @@ -117,8 +117,8 @@ # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2a" # endif -# if __cpp_lib_erase_if != 201811L -# error "__cpp_lib_erase_if should have the value 201811L in c++2a" +# if __cpp_lib_erase_if != 202002L +# error "__cpp_lib_erase_if should have the value 202002L in c++2a" # endif # ifndef __cpp_lib_generic_associative_lookup diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.pass.cpp index bdd517da0de6d..eacd5ff1ecbd0 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.pass.cpp @@ -16,7 +16,7 @@ /* Constant Value __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] __cpp_lib_char8_t 201811L [C++2a] - __cpp_lib_erase_if 201811L [C++2a] + __cpp_lib_erase_if 202002L [C++2a] __cpp_lib_nonmember_container_access 201411L [C++17] __cpp_lib_string_udls 201304L [C++14] __cpp_lib_string_view 201606L [C++17] @@ -143,8 +143,8 @@ # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2a" # endif -# if __cpp_lib_erase_if != 201811L -# error "__cpp_lib_erase_if should have the value 201811L in c++2a" +# if __cpp_lib_erase_if != 202002L +# error "__cpp_lib_erase_if should have the value 202002L in c++2a" # endif # ifndef __cpp_lib_nonmember_container_access diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_map.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_map.version.pass.cpp index 07eb1a9bc351a..62c974add1baa 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_map.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_map.version.pass.cpp @@ -15,7 +15,7 @@ /* Constant Value __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 201811L [C++2a] + __cpp_lib_erase_if 202002L [C++2a] __cpp_lib_generic_unordered_lookup 201811L [C++2a] __cpp_lib_node_extract 201606L [C++17] __cpp_lib_nonmember_container_access 201411L [C++17] @@ -127,8 +127,8 @@ # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2a" # endif -# if __cpp_lib_erase_if != 201811L -# error "__cpp_lib_erase_if should have the value 201811L in c++2a" +# if __cpp_lib_erase_if != 202002L +# error "__cpp_lib_erase_if should have the value 202002L in c++2a" # endif # if !defined(_LIBCPP_VERSION) diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_set.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_set.version.pass.cpp index 845318a79a5e9..6fe59790857b9 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_set.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/unordered_set.version.pass.cpp @@ -15,7 +15,7 @@ /* Constant Value __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 201811L [C++2a] + __cpp_lib_erase_if 202002L [C++2a] __cpp_lib_generic_unordered_lookup 201811L [C++2a] __cpp_lib_node_extract 201606L [C++17] __cpp_lib_nonmember_container_access 201411L [C++17] @@ -111,8 +111,8 @@ # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2a" # endif -# if __cpp_lib_erase_if != 201811L -# error "__cpp_lib_erase_if should have the value 201811L in c++2a" +# if __cpp_lib_erase_if != 202002L +# error "__cpp_lib_erase_if should have the value 202002L in c++2a" # endif # if !defined(_LIBCPP_VERSION) diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/vector.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/vector.version.pass.cpp index 3ea2a0cf00b18..60bba44446c6f 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/vector.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/vector.version.pass.cpp @@ -15,7 +15,7 @@ /* Constant Value __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 201811L [C++2a] + __cpp_lib_erase_if 202002L [C++2a] __cpp_lib_incomplete_container_elements 201505L [C++17] __cpp_lib_nonmember_container_access 201411L [C++17] */ @@ -98,8 +98,8 @@ # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2a" # endif -# if __cpp_lib_erase_if != 201811L -# error "__cpp_lib_erase_if should have the value 201811L in c++2a" +# if __cpp_lib_erase_if != 202002L +# error "__cpp_lib_erase_if should have the value 202002L in c++2a" # endif # ifndef __cpp_lib_incomplete_container_elements diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp index c55c577aa0030..081f0fede2341 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp @@ -38,7 +38,7 @@ __cpp_lib_destroying_delete 201806L [C++2a] __cpp_lib_enable_shared_from_this 201603L [C++17] __cpp_lib_endian 201907L [C++2a] - __cpp_lib_erase_if 201811L [C++2a] + __cpp_lib_erase_if 202002L [C++2a] __cpp_lib_exchange_function 201304L [C++14] __cpp_lib_execution 201603L [C++17] __cpp_lib_filesystem 201703L [C++17] @@ -1718,8 +1718,8 @@ # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++2a" # endif -# if __cpp_lib_erase_if != 201811L -# error "__cpp_lib_erase_if should have the value 201811L in c++2a" +# if __cpp_lib_erase_if != 202002L +# error "__cpp_lib_erase_if should have the value 202002L in c++2a" # endif # ifndef __cpp_lib_exchange_function diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp index 74bc7ba1ef37a..2f734284a9051 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp @@ -5,10 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This test is passing in an uncontrolled manner in some Apple environment. -// UNSUPPORTED: darwin -// + // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -36,6 +33,19 @@ #include "platform_support.h" // locale name macros +// TODO: +// Some of the assertions in this test are failing on Apple platforms. +// Until we figure out the problem and fix it, disable these tests on +// Apple platforms. Note that we're not using XFAIL or UNSUPPORTED markup +// here, because this test would otherwise be disabled on all platforms +// we test. To avoid this test becoming entirely stale, we just disable +// the parts that fail. +// +// See https://llvm.org/PR45739 for the bug tracking this. +#if defined(__APPLE__) +# define APPLE_FIXME +#endif + typedef std::money_get > Fn; class my_facet @@ -304,6 +314,7 @@ int main(int, char**) assert(err == std::ios_base::goodbit); assert(ex == 0); } +#if !defined(APPLE_FIXME) { // zero, showbase std::string v = "0,00 RUB "; showbase(ios); @@ -317,6 +328,7 @@ int main(int, char**) assert(ex == 0); noshowbase(ios); } +#endif { // negative one, showbase std::string v = "-0,01 RUB "; typedef input_iterator I; @@ -328,6 +340,7 @@ int main(int, char**) assert(err == std::ios_base::goodbit); assert(ex == -1); } +#if !defined(APPLE_FIXME) { // negative one, showbase std::string v = "-0,01 RUB "; showbase(ios); @@ -341,6 +354,7 @@ int main(int, char**) assert(ex == -1); noshowbase(ios); } +#endif { // positive, showbase std::string v = "1 234 567,89 RUB "; typedef input_iterator I; @@ -352,6 +366,7 @@ int main(int, char**) assert(err == std::ios_base::goodbit); assert(ex == 123456789); } +#if !defined(APPLE_FIXME) { // positive, showbase std::string v = "1 234 567,89 RUB "; showbase(ios); @@ -365,6 +380,8 @@ int main(int, char**) assert(ex == 123456789); noshowbase(ios); } +#endif +#if !defined(APPLE_FIXME) { // negative, showbase std::string v = "-1 234 567,89 RUB "; showbase(ios); @@ -378,6 +395,7 @@ int main(int, char**) assert(ex == -123456789); noshowbase(ios); } +#endif { // negative, showbase std::string v = "-1 234 567,89 \xD1\x80\xD1\x83\xD0\xB1""."; showbase(ios); @@ -638,6 +656,7 @@ int main(int, char**) assert(err == std::ios_base::goodbit); assert(ex == 0); } +#if !defined(APPLE_FIXME) { // zero, showbase std::wstring v = L"0,00 RUB "; showbase(ios); @@ -651,6 +670,7 @@ int main(int, char**) assert(ex == 0); noshowbase(ios); } +#endif { // negative one, showbase std::wstring v = L"-0,01 RUB "; typedef input_iterator I; @@ -662,6 +682,7 @@ int main(int, char**) assert(err == std::ios_base::goodbit); assert(ex == -1); } +#if !defined(APPLE_FIXME) { // negative one, showbase std::wstring v = L"-0,01 RUB "; showbase(ios); @@ -675,6 +696,7 @@ int main(int, char**) assert(ex == -1); noshowbase(ios); } +#endif { // positive, showbase std::wstring v = L"1 234 567,89 RUB "; typedef input_iterator I; @@ -686,6 +708,7 @@ int main(int, char**) assert(err == std::ios_base::goodbit); assert(ex == 123456789); } +#if !defined(APPLE_FIXME) { // positive, showbase std::wstring v = L"1 234 567,89 RUB "; showbase(ios); @@ -699,6 +722,8 @@ int main(int, char**) assert(ex == 123456789); noshowbase(ios); } +#endif +#if !defined(APPLE_FIXME) { // negative, showbase std::wstring v = L"-1 234 567,89 RUB "; showbase(ios); @@ -712,6 +737,7 @@ int main(int, char**) assert(ex == -123456789); noshowbase(ios); } +#endif { // negative, showbase std::wstring v = L"-1 234 567,89 \x440\x443\x431""."; showbase(ios); diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp index 3bf9c4ec0cd90..2b82d61139c80 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp @@ -5,10 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This test is passing in an uncontrolled manner in some Apple environment. -// UNSUPPORTED: darwin -// + // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -36,6 +33,19 @@ #include "platform_support.h" // locale name macros +// TODO: +// Some of the assertions in this test are failing on Apple platforms. +// Until we figure out the problem and fix it, disable these tests on +// Apple platforms. Note that we're not using XFAIL or UNSUPPORTED markup +// here, because this test would otherwise be disabled on all platforms +// we test. To avoid this test becoming entirely stale, we just disable +// the parts that fail. +// +// See https://llvm.org/PR45739 for the bug tracking this. +#if defined(__APPLE__) +# define APPLE_FIXME +#endif + typedef std::money_put > Fn; class my_facet @@ -71,6 +81,7 @@ int main(int, char**) { const my_facet f(1); // char, national +#if !defined(APPLE_FIXME) { // zero long double v = 0; char str[100]; @@ -103,6 +114,7 @@ int main(int, char**) std::string ex(str, iter.base()); assert(ex == "-1 234 567,89 "); } +#endif { // zero, showbase long double v = 0; showbase(ios); @@ -179,6 +191,7 @@ int main(int, char**) // char, international noshowbase(ios); ios.unsetf(std::ios_base::adjustfield); +#if !defined(APPLE_FIXME) { // zero long double v = 0; char str[100]; @@ -247,6 +260,7 @@ int main(int, char**) std::string ex(str, iter.base()); assert(ex == "-1 234 567,89 RUB "); } +#endif { // negative, showbase, left long double v = -123456789; showbase(ios); @@ -259,6 +273,7 @@ int main(int, char**) assert(ex == "-1 234 567,89 RUB "); assert(ios.width() == 0); } +#if !defined(APPLE_FIXME) { // negative, showbase, internal long double v = -123456789; showbase(ios); @@ -283,12 +298,14 @@ int main(int, char**) assert(ex == " -1 234 567,89 RUB "); assert(ios.width() == 0); } +#endif } { const my_facetw f(1); // wchar_t, national noshowbase(ios); ios.unsetf(std::ios_base::adjustfield); +#if !defined(APPLE_FIXME) { // zero long double v = 0; wchar_t str[100]; @@ -321,6 +338,7 @@ int main(int, char**) std::wstring ex(str, iter.base()); assert(ex == L"-1 234 567,89 "); } +#endif { // zero, showbase long double v = 0; showbase(ios); @@ -397,6 +415,7 @@ int main(int, char**) // wchar_t, international noshowbase(ios); ios.unsetf(std::ios_base::adjustfield); +#if !defined(APPLE_FIXME) { // zero long double v = 0; wchar_t str[100]; @@ -465,6 +484,7 @@ int main(int, char**) std::wstring ex(str, iter.base()); assert(ex == L"-1 234 567,89 RUB "); } +#endif { // negative, showbase, left long double v = -123456789; showbase(ios); @@ -477,6 +497,7 @@ int main(int, char**) assert(ex == L"-1 234 567,89 RUB "); assert(ios.width() == 0); } +#if !defined(APPLE_FIXME) { // negative, showbase, internal long double v = -123456789; showbase(ios); @@ -501,6 +522,7 @@ int main(int, char**) assert(ex == L" -1 234 567,89 RUB "); assert(ios.width() == 0); } +#endif } return 0; diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp index e8084f8f76abc..474f9ce571877 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp @@ -12,9 +12,6 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, double v) const; -// TODO(EricWF): This test takes 40+ minutes to build with Clang 3.8 under ASAN or MSAN. -// UNSUPPORTED: asan, msan - #include #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp index 16c7d61cc9364..0740271292742 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp @@ -12,9 +12,6 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, long double v) const; -// TODO(EricWF): This test takes 40+ minutes to build with Clang 3.8 under ASAN or MSAN. -// UNSUPPORTED: asan, msan - // TODO GLIBC uses a different string for positive and negative NAN numbers. // XFAIL: linux-gnu diff --git a/libcxx/test/std/re/re.const/re.matchflag/match_prev_avail.pass.cpp b/libcxx/test/std/re/re.const/re.matchflag/match_prev_avail.pass.cpp new file mode 100644 index 0000000000000..5decf933d9652 --- /dev/null +++ b/libcxx/test/std/re/re.const/re.matchflag/match_prev_avail.pass.cpp @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// match_prev_avail: +// --first is a valid iterator position. When this flag is set the flags +// match_not_bol and match_not_bow shall be ignored by the regular +// expression algorithms (30.11) and iterators (30.12) + +#include +#include +using namespace std; + +int main() { + char str1[] = "\na"; + auto str1_scnd = str1 + 1; + // Assert that match_prev_avail disables match_not_bol and this matches + assert(regex_match(str1 + 1, str1 + 2, regex("^a"), + regex_constants::match_not_bol | + regex_constants::match_prev_avail)); + // Manually passing match_prev_avail defines that --str1 is a valid position + assert(regex_match(str1_scnd, regex("a"), + regex_constants::match_not_bol | + regex_constants::match_prev_avail)); + + //Assert that match_prev_avail disables match_not_bow and this matches + assert(regex_search(str1, regex("\\ba"))); + assert(regex_match(str1 + 1, str1 + 2, regex("\\ba\\b"), + regex_constants::match_not_bow | + regex_constants::match_prev_avail)); + assert(regex_search(str1_scnd, regex("\\ba"), + regex_constants::match_not_bow | + regex_constants::match_prev_avail)); + + //Assert that match_prev_avail disables both match_not_bow and match_not_bol + assert(regex_match(str1 + 1, str1 + 2, regex("^a"), + regex_constants::match_not_bol | + regex_constants::match_not_bow | + regex_constants::match_prev_avail)); + assert(regex_match(str1_scnd, regex("\\ba"), + regex_constants::match_not_bol | + regex_constants::match_not_bow | + regex_constants::match_prev_avail)); + + // pr 42199 + string S = " cd"; + string::iterator Start = S.begin() + 1; + string::iterator End = S.end(); + assert(regex_search(Start, End, regex("^cd"))); + + assert( + !regex_search(Start, End, regex("^cd"), regex_constants::match_not_bol)); + assert(!regex_search(Start, End, regex(".*\\bcd\\b"), + regex_constants::match_not_bow)); + assert(!regex_search(Start, End, regex("^cd"), + regex_constants::match_not_bol | + regex_constants::match_not_bow)); + assert(!regex_search(Start, End, regex(".*\\bcd\\b"), + regex_constants::match_not_bol | + regex_constants::match_not_bow)); + + assert(regex_search(Start, End, regex("^cd"), + regex_constants::match_prev_avail)); + + assert(regex_search(Start, End, regex("^cd"), + regex_constants::match_not_bol | + regex_constants::match_prev_avail)); + assert(regex_search(Start, End, regex("^cd"), + regex_constants::match_not_bow | + regex_constants::match_prev_avail)); + assert(regex_match(Start, End, regex("\\bcd\\b"), + regex_constants::match_not_bol | + regex_constants::match_not_bow | + regex_constants::match_prev_avail)); + return 0; +} diff --git a/libcxx/test/std/re/re.results/re.results.size/empty.verify.cpp b/libcxx/test/std/re/re.results/re.results.size/empty.verify.cpp index c2449c0b92153..5405aabb5d442 100644 --- a/libcxx/test/std/re/re.results/re.results.size/empty.verify.cpp +++ b/libcxx/test/std/re/re.results/re.results.size/empty.verify.cpp @@ -13,7 +13,6 @@ // bool empty() const; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/re/re.traits/isctype.pass.cpp b/libcxx/test/std/re/re.traits/isctype.pass.cpp index 1eed193ae00f4..8c0b07988d6e7 100644 --- a/libcxx/test/std/re/re.traits/isctype.pass.cpp +++ b/libcxx/test/std/re/re.traits/isctype.pass.cpp @@ -12,9 +12,6 @@ // bool isctype(charT c, char_class_type f) const; -// TODO(EricWF): This test takes 40+ minutes to build with Clang 3.8 under ASAN or MSAN. -// UNSUPPORTED: asan, msan - #include #include diff --git a/libcxx/test/std/strings/basic.string/string.capacity/empty.verify.cpp b/libcxx/test/std/strings/basic.string/string.capacity/empty.verify.cpp index 864bb48b666f8..2e9f552006131 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/empty.verify.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp b/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp index 82bd71b1d5764..3eda844b7b6d5 100644 --- a/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp @@ -50,7 +50,7 @@ int main(int, char**) { using TestSizeT = test_allocator::size_type; { // Testing (1) - // Nothing TODO. Cannot deduce without any arguments. + // Nothing to do. Cannot deduce without any arguments. } { // Testing (2) // This overload isn't compatible with implicit deduction guides as diff --git a/libcxx/test/std/strings/string.view/string.view.capacity/empty.verify.cpp b/libcxx/test/std/strings/string.view/string.view.capacity/empty.verify.cpp index 05e2db6739bd5..63de07cafc1cf 100644 --- a/libcxx/test/std/strings/string.view/string.view.capacity/empty.verify.cpp +++ b/libcxx/test/std/strings/string.view/string.view.capacity/empty.verify.cpp @@ -14,7 +14,6 @@ // bool empty() const noexcept; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/strings/string.view/string.view.cons/implicit_deduction_guides.pass.cpp b/libcxx/test/std/strings/string.view/string.view.cons/implicit_deduction_guides.pass.cpp index 9cea8c6c69427..db6d0d00648a4 100644 --- a/libcxx/test/std/strings/string.view/string.view.cons/implicit_deduction_guides.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.cons/implicit_deduction_guides.pass.cpp @@ -29,7 +29,7 @@ int main(int, char**) { { // Testing (1) - // Nothing TODO. Cannot deduce without any arguments. + // Nothing to do. Cannot deduce without any arguments. } { // Testing (2) const std::string_view sin("abc"); diff --git a/libcxx/test/std/strings/strings.erasure/erase.pass.cpp b/libcxx/test/std/strings/strings.erasure/erase.pass.cpp index 26d6b8674e87d..6614d65b9527e 100644 --- a/libcxx/test/std/strings/strings.erasure/erase.pass.cpp +++ b/libcxx/test/std/strings/strings.erasure/erase.pass.cpp @@ -10,8 +10,8 @@ // // template -// void erase(basic_string& c, const U& value); - +// typename basic_string::size_type +// erase(basic_string& c, const U& value); #include #include @@ -21,50 +21,48 @@ #include "min_allocator.h" template -void -test0(S s, U val, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase(s, val))); - std::erase(s, val); - LIBCPP_ASSERT(s.__invariants()); - assert(s == expected); +void test0(S s, U val, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase(s, val))); + assert(expected_erased_count == std::erase(s, val)); + LIBCPP_ASSERT(s.__invariants()); + assert(s == expected); } template void test() { - test0(S(""), 'a', S("")); + test0(S(""), 'a', S(""), 0); - test0(S("a"), 'a', S("")); - test0(S("a"), 'b', S("a")); + test0(S("a"), 'a', S(""), 1); + test0(S("a"), 'b', S("a"), 0); - test0(S("ab"), 'a', S("b")); - test0(S("ab"), 'b', S("a")); - test0(S("ab"), 'c', S("ab")); - test0(S("aa"), 'a', S("")); - test0(S("aa"), 'c', S("aa")); + test0(S("ab"), 'a', S("b"), 1); + test0(S("ab"), 'b', S("a"), 1); + test0(S("ab"), 'c', S("ab"), 0); + test0(S("aa"), 'a', S(""), 2); + test0(S("aa"), 'c', S("aa"), 0); - test0(S("abc"), 'a', S("bc")); - test0(S("abc"), 'b', S("ac")); - test0(S("abc"), 'c', S("ab")); - test0(S("abc"), 'd', S("abc")); + test0(S("abc"), 'a', S("bc"), 1); + test0(S("abc"), 'b', S("ac"), 1); + test0(S("abc"), 'c', S("ab"), 1); + test0(S("abc"), 'd', S("abc"), 0); - test0(S("aab"), 'a', S("b")); - test0(S("aab"), 'b', S("aa")); - test0(S("aab"), 'c', S("aab")); - test0(S("abb"), 'a', S("bb")); - test0(S("abb"), 'b', S("a")); - test0(S("abb"), 'c', S("abb")); - test0(S("aaa"), 'a', S("")); - test0(S("aaa"), 'b', S("aaa")); + test0(S("aab"), 'a', S("b"), 2); + test0(S("aab"), 'b', S("aa"), 1); + test0(S("aab"), 'c', S("aab"), 0); + test0(S("abb"), 'a', S("bb"), 1); + test0(S("abb"), 'b', S("a"), 2); + test0(S("abb"), 'c', S("abb"), 0); + test0(S("aaa"), 'a', S(""), 3); + test0(S("aaa"), 'b', S("aaa"), 0); -// Test cross-type erasure - using opt = std::optional; - test0(S("aba"), opt(), S("aba")); - test0(S("aba"), opt('a'), S("b")); - test0(S("aba"), opt('b'), S("aa")); - test0(S("aba"), opt('c'), S("aba")); + // Test cross-type erasure + using opt = std::optional; + test0(S("aba"), opt(), S("aba"), 0); + test0(S("aba"), opt('a'), S("b"), 2); + test0(S("aba"), opt('b'), S("aa"), 1); + test0(S("aba"), opt('c'), S("aba"), 0); } int main(int, char**) diff --git a/libcxx/test/std/strings/strings.erasure/erase_if.pass.cpp b/libcxx/test/std/strings/strings.erasure/erase_if.pass.cpp index 30a58c0e646d9..c46a325868058 100644 --- a/libcxx/test/std/strings/strings.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/strings/strings.erasure/erase_if.pass.cpp @@ -10,7 +10,8 @@ // // template -// void erase_if(basic_string& c, Predicate pred); +// typename basic_string::size_type +// erase_if(basic_string& c, Predicate pred); #include @@ -19,13 +20,11 @@ #include "min_allocator.h" template -void -test0(S s, Pred p, S expected) -{ - ASSERT_SAME_TYPE(void, decltype(std::erase_if(s, p))); - std::erase_if(s, p); - LIBCPP_ASSERT(s.__invariants()); - assert(s == expected); +void test0(S s, Pred p, S expected, size_t expected_erased_count) { + ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + LIBCPP_ASSERT(s.__invariants()); + assert(s == expected); } template @@ -38,33 +37,33 @@ void test() auto True = [](auto) { return true; }; auto False = [](auto) { return false; }; - test0(S(""), isA, S("")); + test0(S(""), isA, S(""), 0); - test0(S("a"), isA, S("")); - test0(S("a"), isB, S("a")); + test0(S("a"), isA, S(""), 1); + test0(S("a"), isB, S("a"), 0); - test0(S("ab"), isA, S("b")); - test0(S("ab"), isB, S("a")); - test0(S("ab"), isC, S("ab")); - test0(S("aa"), isA, S("")); - test0(S("aa"), isC, S("aa")); + test0(S("ab"), isA, S("b"), 1); + test0(S("ab"), isB, S("a"), 1); + test0(S("ab"), isC, S("ab"), 0); + test0(S("aa"), isA, S(""), 2); + test0(S("aa"), isC, S("aa"), 0); - test0(S("abc"), isA, S("bc")); - test0(S("abc"), isB, S("ac")); - test0(S("abc"), isC, S("ab")); - test0(S("abc"), isD, S("abc")); + test0(S("abc"), isA, S("bc"), 1); + test0(S("abc"), isB, S("ac"), 1); + test0(S("abc"), isC, S("ab"), 1); + test0(S("abc"), isD, S("abc"), 0); - test0(S("aab"), isA, S("b")); - test0(S("aab"), isB, S("aa")); - test0(S("aab"), isC, S("aab")); - test0(S("abb"), isA, S("bb")); - test0(S("abb"), isB, S("a")); - test0(S("abb"), isC, S("abb")); - test0(S("aaa"), isA, S("")); - test0(S("aaa"), isB, S("aaa")); + test0(S("aab"), isA, S("b"), 2); + test0(S("aab"), isB, S("aa"), 1); + test0(S("aab"), isC, S("aab"), 0); + test0(S("abb"), isA, S("bb"), 1); + test0(S("abb"), isB, S("a"), 2); + test0(S("abb"), isC, S("abb"), 0); + test0(S("aaa"), isA, S(""), 3); + test0(S("aaa"), isB, S("aaa"), 0); - test0(S("aba"), False, S("aba")); - test0(S("aba"), True, S("")); + test0(S("aba"), False, S("aba"), 0); + test0(S("aba"), True, S(""), 3); } int main(int, char**) diff --git a/libcxx/test/std/thread/futures/futures.async/async.verify.cpp b/libcxx/test/std/thread/futures/futures.async/async.verify.cpp index d26df44684323..1fea53c6f11e1 100644 --- a/libcxx/test/std/thread/futures/futures.async/async.verify.cpp +++ b/libcxx/test/std/thread/futures/futures.async/async.verify.cpp @@ -8,7 +8,6 @@ // // UNSUPPORTED: libcpp-has-no-threads // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp index 8485d73c037f9..b2e8e72e282fa 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// + // UNSUPPORTED: libcpp-has-no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp index baa1939420e74..1de51ec22a56f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// + // UNSUPPORTED: libcpp-has-no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp index 2d92c0875317b..5e5eb1df82f18 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp @@ -5,9 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// + // UNSUPPORTED: libcpp-has-no-threads -// // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp index 1744d2f254f84..4eb435f4dd38e 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp @@ -5,9 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: libcpp-has-no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp index 1fb1a0dbc8a54..02bc366e9aac6 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp @@ -5,9 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: libcpp-has-no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp index 028a98a84977e..afde76c2c4562 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// + // UNSUPPORTED: libcpp-has-no-threads // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp index c4f0078284aa5..98e34370206ef 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp @@ -5,9 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: libcpp-has-no-threads // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp index e915a3f9907fc..0045414ba4a26 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp @@ -5,9 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// + // UNSUPPORTED: libcpp-has-no-threads -// // ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp index 8d58a6623e9dd..c1072432d84f2 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp @@ -5,8 +5,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// + // UNSUPPORTED: libcpp-has-no-threads +// ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp index fa8c9ae704f81..a2734d229eb6c 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp @@ -5,8 +5,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// + // UNSUPPORTED: libcpp-has-no-threads +// ALLOW_RETRIES: 2 // diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.verify.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.verify.cpp index e03c4e6672988..02eb8b3872a0c 100644 --- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.verify.cpp +++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.verify.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support // diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.verify.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.verify.cpp index e62df63d299e3..a3e25ef68bc63 100644 --- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.verify.cpp +++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.verify.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support // diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp index d56346e6d6e28..60c0272d0fc17 100644 --- a/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp @@ -186,6 +186,7 @@ void test_copy_assign_throws() } int main(int, char**) { + globalMemCounter.reset(); test_copy_assign(); test_copy_assign(); test_copy_assign(); diff --git a/libcxx/test/std/utilities/function.objects/negators/binary_negate.depr_in_cxx17.verify.cpp b/libcxx/test/std/utilities/function.objects/negators/binary_negate.depr_in_cxx17.verify.cpp index e6aa2377345fd..5002567f619c9 100644 --- a/libcxx/test/std/utilities/function.objects/negators/binary_negate.depr_in_cxx17.verify.cpp +++ b/libcxx/test/std/utilities/function.objects/negators/binary_negate.depr_in_cxx17.verify.cpp @@ -13,7 +13,6 @@ // UNSUPPORTED: clang-4.0 // UNSUPPORTED: c++98, c++03, c++11, c++14 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/utilities/function.objects/negators/not1.depr_in_cxx17.verify.cpp b/libcxx/test/std/utilities/function.objects/negators/not1.depr_in_cxx17.verify.cpp index 49be77d7c1015..ac954c1e2f9f9 100644 --- a/libcxx/test/std/utilities/function.objects/negators/not1.depr_in_cxx17.verify.cpp +++ b/libcxx/test/std/utilities/function.objects/negators/not1.depr_in_cxx17.verify.cpp @@ -13,7 +13,6 @@ // UNSUPPORTED: clang-4.0 // UNSUPPORTED: c++98, c++03, c++11, c++14 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/utilities/function.objects/negators/not2.depr_in_cxx17.verify.cpp b/libcxx/test/std/utilities/function.objects/negators/not2.depr_in_cxx17.verify.cpp index 6737badf8f00c..7df5166b4efa2 100644 --- a/libcxx/test/std/utilities/function.objects/negators/not2.depr_in_cxx17.verify.cpp +++ b/libcxx/test/std/utilities/function.objects/negators/not2.depr_in_cxx17.verify.cpp @@ -13,7 +13,6 @@ // UNSUPPORTED: clang-4.0 // UNSUPPORTED: c++98, c++03, c++11, c++14 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/utilities/function.objects/negators/unary_negate.depr_in_cxx17.verify.cpp b/libcxx/test/std/utilities/function.objects/negators/unary_negate.depr_in_cxx17.verify.cpp index 6530c55b4c386..f651c8f8ba85f 100644 --- a/libcxx/test/std/utilities/function.objects/negators/unary_negate.depr_in_cxx17.verify.cpp +++ b/libcxx/test/std/utilities/function.objects/negators/unary_negate.depr_in_cxx17.verify.cpp @@ -13,7 +13,6 @@ // UNSUPPORTED: clang-4.0 // UNSUPPORTED: c++98, c++03, c++11, c++14 -// REQUIRES: verify-support #include diff --git a/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.verify.cpp b/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.verify.cpp index 9cb150012c637..d710c828f9064 100644 --- a/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.verify.cpp +++ b/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.verify.cpp @@ -16,7 +16,6 @@ // }; // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support #include #include diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.verify.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.verify.cpp index f630303ecc94b..5d78b6b8f411e 100644 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.verify.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.verify.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 -// REQUIRES: verify-support // diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/const_optional_U.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/const_optional_U.pass.cpp index cf44921fe5729..b75dbfed029ba 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/const_optional_U.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/const_optional_U.pass.cpp @@ -143,7 +143,7 @@ void test_with_test_type() { assert(T::alive == 0); } -void test_ambigious_assign() { +void test_ambiguous_assign() { using OptInt = std::optional; { using T = AssignableFrom; @@ -199,7 +199,7 @@ void test_ambigious_assign() { int main(int, char**) { test_with_test_type(); - test_ambigious_assign(); + test_ambiguous_assign(); { optional opt; constexpr optional opt2; diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/optional_U.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/optional_U.pass.cpp index 2239acaf6295d..d8bae281b9a52 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/optional_U.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.assign/optional_U.pass.cpp @@ -148,7 +148,7 @@ void test_with_test_type() { } -void test_ambigious_assign() { +void test_ambiguous_assign() { using OptInt = std::optional; { using T = AssignableFrom; @@ -204,7 +204,7 @@ void test_ambigious_assign() { int main(int, char**) { test_with_test_type(); - test_ambigious_assign(); + test_ambiguous_assign(); { optional opt; optional opt2; diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp index 55ce0fced101d..27a1cdb76e288 100644 --- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp +++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp @@ -54,8 +54,8 @@ extern Incomplete inc_obj; int main(int, char**) { { - typedef std::pair P; - const P p1(CopyAssignable(), 4); + typedef std::pair P; + const P p1(CopyAssignable(), short{4}); P p2; p2 = p1; assert(p2.second == 4); @@ -97,7 +97,7 @@ int main(int, char**) assert(&p.second == &inc_obj); } - return 0; + return 0; } struct Incomplete {}; diff --git a/libcxx/test/support/platform_support.h b/libcxx/test/support/platform_support.h index 19531ce080041..e897ba7625ca6 100644 --- a/libcxx/test/support/platform_support.h +++ b/libcxx/test/support/platform_support.h @@ -16,39 +16,39 @@ // locale names #ifdef _WIN32 -// WARNING: Windows does not support UTF-8 codepages. -// Locales are "converted" using https://docs.moodle.org/dev/Table_of_locales -#define LOCALE_en_US "en-US" -#define LOCALE_en_US_UTF_8 "en-US" -#define LOCALE_cs_CZ_ISO8859_2 "cs-CZ" -#define LOCALE_fr_FR_UTF_8 "fr-FR" -#define LOCALE_fr_CA_ISO8859_1 "fr-CA" -#define LOCALE_ru_RU_UTF_8 "ru-RU" -#define LOCALE_zh_CN_UTF_8 "zh-CN" + // WARNING: Windows does not support UTF-8 codepages. + // Locales are "converted" using https://docs.moodle.org/dev/Table_of_locales +# define LOCALE_en_US "en-US" +# define LOCALE_en_US_UTF_8 "en-US" +# define LOCALE_cs_CZ_ISO8859_2 "cs-CZ" +# define LOCALE_fr_FR_UTF_8 "fr-FR" +# define LOCALE_fr_CA_ISO8859_1 "fr-CA" +# define LOCALE_ru_RU_UTF_8 "ru-RU" +# define LOCALE_zh_CN_UTF_8 "zh-CN" #elif defined(__CloudABI__) -// Timezones are integrated into locales through LC_TIMEZONE_MASK on -// CloudABI. LC_ALL_MASK can only be used if a timezone has also been -// provided. UTC should be all right. -#define LOCALE_en_US "en_US" -#define LOCALE_en_US_UTF_8 "en_US.UTF-8@UTC" -#define LOCALE_fr_FR_UTF_8 "fr_FR.UTF-8@UTC" -#define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO-8859-1@UTC" -#define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO-8859-2@UTC" -#define LOCALE_ru_RU_UTF_8 "ru_RU.UTF-8@UTC" -#define LOCALE_zh_CN_UTF_8 "zh_CN.UTF-8@UTC" + // Timezones are integrated into locales through LC_TIMEZONE_MASK on + // CloudABI. LC_ALL_MASK can only be used if a timezone has also been + // provided. UTC should be all right. +# define LOCALE_en_US "en_US" +# define LOCALE_en_US_UTF_8 "en_US.UTF-8@UTC" +# define LOCALE_fr_FR_UTF_8 "fr_FR.UTF-8@UTC" +# define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO-8859-1@UTC" +# define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO-8859-2@UTC" +# define LOCALE_ru_RU_UTF_8 "ru_RU.UTF-8@UTC" +# define LOCALE_zh_CN_UTF_8 "zh_CN.UTF-8@UTC" #else -#define LOCALE_en_US "en_US" -#define LOCALE_en_US_UTF_8 "en_US.UTF-8" -#define LOCALE_fr_FR_UTF_8 "fr_FR.UTF-8" -#ifdef __linux__ -#define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO-8859-1" -#define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO-8859-2" -#else -#define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO8859-1" -#define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO8859-2" -#endif -#define LOCALE_ru_RU_UTF_8 "ru_RU.UTF-8" -#define LOCALE_zh_CN_UTF_8 "zh_CN.UTF-8" +# define LOCALE_en_US "en_US" +# define LOCALE_en_US_UTF_8 "en_US.UTF-8" +# define LOCALE_fr_FR_UTF_8 "fr_FR.UTF-8" +# ifdef __linux__ +# define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO-8859-1" +# define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO-8859-2" +# else +# define LOCALE_fr_CA_ISO8859_1 "fr_CA.ISO8859-1" +# define LOCALE_cs_CZ_ISO8859_2 "cs_CZ.ISO8859-2" +# endif +# define LOCALE_ru_RU_UTF_8 "ru_RU.UTF-8" +# define LOCALE_zh_CN_UTF_8 "zh_CN.UTF-8" #endif #include @@ -57,9 +57,9 @@ #include #include #if defined(_WIN32) || defined(__MINGW32__) -#include // _mktemp_s +# include // _mktemp_s #else -#include // close +# include // close #endif #if defined(_NEWLIB_VERSION) && defined(__STRICT_ANSI__) diff --git a/libcxx/utils/ci/apple-install-libcxxabi.sh b/libcxx/utils/ci/apple-install-libcxxabi.sh index 04173b2adbeb3..d550016a4c61c 100755 --- a/libcxx/utils/ci/apple-install-libcxxabi.sh +++ b/libcxx/utils/ci/apple-install-libcxxabi.sh @@ -120,7 +120,7 @@ for arch in ${architectures}; do -DCMAKE_INSTALL_NAME_DIR="${install_name_dir}" \ -DCMAKE_OSX_ARCHITECTURES="${arch}" \ -DLIBCXXABI_LIBRARY_VERSION="${version}" \ - -DLIBCXXABI_LIBCXX_INCLUDES="$(xcrun --sdk "${sdk}" c++ -print-resource-dir)/../../../include/c++/v1" + -DLIBCXXABI_LIBCXX_PATH="${llvm_root}/libcxx" ) xcrun --sdk "${sdk}" cmake --build "${build_dir}/${arch}" --target install-cxxabi -- -v diff --git a/libcxx/utils/ci/macos-backdeployment.sh b/libcxx/utils/ci/macos-backdeployment.sh index 859b8e43daaaf..ae3dfa841cde5 100755 --- a/libcxx/utils/ci/macos-backdeployment.sh +++ b/libcxx/utils/ci/macos-backdeployment.sh @@ -4,13 +4,12 @@ set -ue function usage() { cat < --std --arch --deployment-target --sdk-version [--libcxx-roots ] [--lit-args ] [--no-cleanup] +$(basename ${0}) [-h|--help] --monorepo-root --std --deployment-target --sdk-version [--libcxx-roots ] [--lit-args ] [--no-cleanup] This script is used to continually test the back-deployment use case of libc++ and libc++abi on MacOS. --monorepo-root Full path to the root of the LLVM monorepo. Both libc++ and libc++abi headers from the monorepo are used. --std Version of the C++ Standard to run the tests under (c++03, c++11, etc..). - --arch Architecture to build the tests for (32, 64). --deployment-target The deployment target to run the tests for. This should be a version number of MacOS (e.g. 10.12). All MacOS versions until and including 10.9 are supported. --sdk-version The version of the SDK to test with. This should be a version number of MacOS (e.g. 10.12). We'll link against the libc++ dylib in that SDK, but we'll run against the one on the given deployment target. The SDK version must be no older than the deployment target. [--libcxx-roots] The path to previous libc++/libc++abi dylibs to use for back-deployment testing. Those are normally downloaded automatically, but if specified, this option will override the directory used. The directory should have the same layout as the roots downloaded automatically. @@ -43,10 +42,6 @@ while [[ $# -gt 0 ]]; do STD="${2}" shift; shift ;; - --arch) - ARCH="${2}" - shift; shift - ;; --deployment-target) DEPLOYMENT_TARGET="${2}" shift; shift @@ -81,7 +76,6 @@ done if [[ -z ${MONOREPO_ROOT+x} ]]; then echo "--monorepo-root is a required parameter"; usage; exit 1; fi if [[ -z ${STD+x} ]]; then echo "--std is a required parameter"; usage; exit 1; fi -if [[ -z ${ARCH+x} ]]; then echo "--arch is a required parameter"; usage; exit 1; fi if [[ -z ${DEPLOYMENT_TARGET+x} ]]; then echo "--deployment-target is a required parameter"; usage; exit 1; fi if [[ -z ${MACOS_SDK_VERSION+x} ]]; then echo "--sdk-version is a required parameter"; usage; exit 1; fi if [[ -z ${ADDITIONAL_LIT_ARGS+x} ]]; then ADDITIONAL_LIT_ARGS=""; fi @@ -113,14 +107,6 @@ PREVIOUS_DYLIBS_URL="http://lab.llvm.org:8080/roots/libcxx-roots.tar.gz" LLVM_TARBALL_URL="https://github.com/llvm-mirror/llvm/archive/master.tar.gz" -echo "@@@ Setting up LIT flags for architecture @@@" -LIT_ARCH_STRING="" -if [[ "${ARCH}" == "32" ]]; then - LIT_ARCH_STRING="--param=enable_32bit=true" -fi -echo "@@@@@@" - - echo "@@@ Configuring CMake @@@" mkdir -p "${LLVM_BUILD_DIR}" (cd "${LLVM_BUILD_DIR}" && @@ -129,7 +115,7 @@ mkdir -p "${LLVM_BUILD_DIR}" -GNinja \ -DCMAKE_INSTALL_PREFIX="${LLVM_INSTALL_DIR}" \ -DLLVM_ENABLE_PROJECTS="libcxx;libcxxabi" \ - -DCMAKE_OSX_ARCHITECTURES="i386;x86_64" \ + -DCMAKE_OSX_ARCHITECTURES="x86_64" \ "${MONOREPO_ROOT}/llvm" ) echo "@@@@@@" @@ -157,7 +143,6 @@ LIBCXX_IN_SDK="${PREVIOUS_DYLIBS_DIR}/macOS/${MACOS_SDK_VERSION}/libc++.dylib" echo "@@@ Running tests for libc++ @@@" "${LLVM_BUILD_DIR}/bin/llvm-lit" -sv "${MONOREPO_ROOT}/libcxx/test" \ --param=enable_experimental=false \ - ${LIT_ARCH_STRING} \ --param=cxx_headers="${LLVM_INSTALL_DIR}/include/c++/v1" \ --param=std="${STD}" \ --param=platform="macosx${DEPLOYMENT_TARGET}" \ diff --git a/libcxx/utils/ci/macos-trunk.sh b/libcxx/utils/ci/macos-trunk.sh index ea6a9b435b5a5..843d23c1d00c3 100755 --- a/libcxx/utils/ci/macos-trunk.sh +++ b/libcxx/utils/ci/macos-trunk.sh @@ -4,13 +4,12 @@ set -ue function usage() { cat < --std --arch --libcxx-exceptions [--lit-args ] +$(basename ${0}) [-h|--help] --monorepo-root --std --libcxx-exceptions [--lit-args ] This script is used to continually test libc++ and libc++abi trunk on MacOS. --monorepo-root Full path to the root of the LLVM monorepo. Both libc++ and libc++abi from the monorepo are used. --std Version of the C++ Standard to run the tests under (c++03, c++11, etc..). - --arch Architecture to build the tests for (32, 64). --libcxx-exceptions Whether to enable exceptions when building libc++ and running the libc++ tests. libc++abi is always built with support for exceptions because other libraries in the runtime depend on it (like libobjc). This must be ON or OFF. [--cmake-args] Additional arguments to pass to CMake (both the libc++ and the libc++abi configuration). If there are multiple arguments, quote them to paass them as a single argument to this script. [--lit-args] Additional arguments to pass to lit. If there are multiple arguments, quote them to pass them as a single argument to this script. @@ -34,10 +33,6 @@ while [[ $# -gt 0 ]]; do STD="${2}" shift; shift ;; - --arch) - ARCH="${2}" - shift; shift - ;; --libcxx-exceptions) LIBCXX_EXCEPTIONS="${2}" shift; shift @@ -68,7 +63,6 @@ done if [[ -z ${MONOREPO_ROOT+x} ]]; then echo "--monorepo-root is a required parameter"; usage; exit 1; fi if [[ -z ${STD+x} ]]; then echo "--std is a required parameter"; usage; exit 1; fi -if [[ -z ${ARCH+x} ]]; then echo "--arch is a required parameter"; usage; exit 1; fi if [[ "${LIBCXX_EXCEPTIONS}" != "ON" && "${LIBCXX_EXCEPTIONS}" != "OFF" ]]; then echo "--libcxx-exceptions is a required parameter and must be either ON or OFF"; usage; exit 1; fi if [[ -z ${ADDITIONAL_CMAKE_ARGS+x} ]]; then ADDITIONAL_CMAKE_ARGS=""; fi if [[ -z ${ADDITIONAL_LIT_ARGS+x} ]]; then ADDITIONAL_LIT_ARGS=""; fi @@ -92,9 +86,6 @@ LLVM_INSTALL_DIR="${TEMP_DIR}/llvm-install" echo "@@@ Setting up LIT flags @@@" LIT_FLAGS="-sv --param=std=${STD} ${ADDITIONAL_LIT_ARGS}" -if [[ "${ARCH}" == "32" ]]; then - LIT_FLAGS+=" --param=enable_32bit=true" -fi echo "@@@@@@" @@ -110,7 +101,7 @@ mkdir -p "${LLVM_BUILD_DIR}" ${ADDITIONAL_CMAKE_ARGS} \ -DLLVM_LIT_ARGS="${LIT_FLAGS}" \ -DLLVM_ENABLE_PROJECTS="libcxx;libcxxabi" \ - -DCMAKE_OSX_ARCHITECTURES="i386;x86_64" \ + -DCMAKE_OSX_ARCHITECTURES="x86_64" \ "${MONOREPO_ROOT}/llvm" ) echo "@@@@@@" diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 8caee5c13ba94..96860c2118c22 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -475,7 +475,7 @@ def add_version_header(tc): }, {"name": "__cpp_lib_erase_if", "values": { - "c++2a": int(201811), + "c++2a": int(202002), }, "headers": ["string", "deque", "forward_list", "list", "vector", "map", "set", "unordered_map", "unordered_set"] @@ -587,7 +587,7 @@ def add_version_header(tc): }, {"name": "__cpp_lib_to_array", "values": { - "c++2a": 201907L, + "c++2a": int(201907), }, "headers": ["array"], }, diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 225dadb5f02d7..8369a94f1321c 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -22,6 +22,7 @@ from libcxx.test.executor import * from libcxx.test.tracing import * import libcxx.util +import libcxx.test.features def loadSiteConfig(lit_config, config, param_name, env_name): # We haven't loaded the site specific configuration (the user is @@ -130,23 +131,22 @@ def configure(self): self.configure_obj_root() self.configure_cxx_stdlib_under_test() self.configure_cxx_library_root() - self.configure_use_clang_verify() - self.configure_use_thread_safety() - self.configure_ccache() self.configure_compile_flags() self.configure_link_flags() self.configure_env() - self.configure_color_diagnostics() self.configure_debug_mode() self.configure_warnings() self.configure_sanitizer() self.configure_coverage() self.configure_modules() - self.configure_coroutines() - self.configure_blocks() - self.configure_objc_arc() self.configure_substitutions() self.configure_features() + self.configure_new_features() + + def configure_new_features(self): + supportedFeatures = [f for f in libcxx.test.features.features if f.isSupported(self.config)] + for feature in supportedFeatures: + feature.enableIn(self.config) def print_config_info(self): # Print the final compile and link flags. @@ -162,7 +162,7 @@ def print_config_info(self): self.lit_config.note('Using link flags: %s' % self.cxx.link_flags) # Print as list to prevent "set([...])" from being printed. self.lit_config.note('Using available_features: %s' % - list(self.config.available_features)) + list(sorted(self.config.available_features))) show_env_vars = {} for k,v in self.exec_env.items(): if k not in os.environ or os.environ[k] != v: @@ -219,21 +219,7 @@ def configure_cxx(self): '(e.g., --param=cxx_under_test=clang++)') self.cxx = CXXCompiler(self, cxx) if not self.cxx_is_clang_cl else \ self._configure_clang_cl(cxx) - cxx_type = self.cxx.type - if cxx_type is not None: - assert self.cxx.version is not None - maj_v, min_v, patch_v = self.cxx.version - self.config.available_features.add(cxx_type) - self.config.available_features.add('%s-%s' % (cxx_type, maj_v)) - self.config.available_features.add('%s-%s.%s' % ( - cxx_type, maj_v, min_v)) - self.config.available_features.add('%s-%s.%s.%s' % ( - cxx_type, maj_v, min_v, patch_v)) self.cxx.compile_env = dict(os.environ) - # 'CCACHE_CPP2' prevents ccache from stripping comments while - # preprocessing. This is required to prevent stripping of '-verify' - # comments. - self.cxx.compile_env['CCACHE_CPP2'] = '1' def _configure_clang_cl(self, clang_path): def _split_env_var(var): @@ -253,17 +239,6 @@ def _prefixed_env_list(var, prefix): compile_flags=compile_flags, link_flags=link_flags) - def _dump_macros_verbose(self, *args, **kwargs): - macros_or_error = self.cxx.dumpMacros(*args, **kwargs) - if isinstance(macros_or_error, tuple): - cmd, out, err, rc = macros_or_error - report = libcxx.util.makeReport(cmd, out, err, rc) - report += "Compiler failed unexpectedly when dumping macros!" - self.lit_config.fatal(report) - return None - assert isinstance(macros_or_error, dict) - return macros_or_error - def configure_src_root(self): self.libcxx_src_root = self.get_lit_conf( 'libcxx_src_root', os.path.dirname(self.config.test_source_root)) @@ -323,31 +298,6 @@ def configure_cxx_stdlib_under_test(self): if self.get_lit_conf('enable_experimental') is None: self.config.enable_experimental = 'true' - def configure_use_clang_verify(self): - '''If set, run clang with -verify on failing tests.''' - self.use_clang_verify = self.get_lit_bool('use_clang_verify') - if self.use_clang_verify is None: - # NOTE: We do not test for the -verify flag directly because - # -verify will always exit with non-zero on an empty file. - self.use_clang_verify = self.cxx.isVerifySupported() - self.lit_config.note( - "inferred use_clang_verify as: %r" % self.use_clang_verify) - - def configure_use_thread_safety(self): - '''If set, run clang with -verify on failing tests.''' - has_thread_safety = self.cxx.hasCompileFlag('-Werror=thread-safety') - if has_thread_safety: - self.cxx.compile_flags += ['-Werror=thread-safety'] - self.config.available_features.add('thread-safety') - self.lit_config.note("enabling thread-safety annotations") - - def configure_ccache(self): - use_ccache_default = os.environ.get('LIBCXX_USE_CCACHE') is not None - use_ccache = self.get_lit_bool('use_ccache', use_ccache_default) - if use_ccache: - self.cxx.use_ccache = True - self.lit_config.note('enabling ccache') - def configure_features(self): additional_features = self.get_lit_conf('additional_features') if additional_features: @@ -395,36 +345,9 @@ def configure_features(self): if not self.get_lit_bool('enable_filesystem', default=True): self.config.available_features.add('c++filesystem-disabled') - - # Run a compile test for the -fsized-deallocation flag. This is needed - # in test/std/language.support/support.dynamic/new.delete - if self.cxx.hasCompileFlag('-fsized-deallocation'): - self.config.available_features.add('-fsized-deallocation') - - if self.cxx.hasCompileFlag('-faligned-allocation'): - self.config.available_features.add('-faligned-allocation') - - if self.cxx.hasCompileFlag('-fdelayed-template-parsing'): - self.config.available_features.add('fdelayed-template-parsing') - if self.get_lit_bool('has_libatomic', False): self.config.available_features.add('libatomic') - macros = self._dump_macros_verbose() - if '__cpp_if_constexpr' not in macros: - self.config.available_features.add('libcpp-no-if-constexpr') - - if '__cpp_structured_bindings' not in macros: - self.config.available_features.add('libcpp-no-structured-bindings') - - if '__cpp_deduction_guides' not in macros or \ - intMacroValue(macros['__cpp_deduction_guides']) < 201611: - self.config.available_features.add('libcpp-no-deduction-guides') - - if '__cpp_concepts' not in macros or \ - intMacroValue(macros['__cpp_concepts']) < 201811: - self.config.available_features.add('libcpp-no-concepts') - if self.target_info.is_windows(): self.config.available_features.add('windows') if self.cxx_stdlib_under_test == 'libc++': @@ -440,12 +363,6 @@ def configure_features(self): self.config.available_features.add('libcxx_gdb') self.cxx.libcxx_gdb = libcxx_gdb - # Support Objective-C++ only on MacOS and if the compiler supports it. - if self.target_info.platform() == "darwin" and \ - self.target_info.is_host_macosx() and \ - self.cxx.hasCompileFlag(["-x", "objective-c++", "-fobjc-arc"]): - self.config.available_features.add("objective-c++") - def configure_compile_flags(self): self.configure_default_compile_flags() # Configure extra flags @@ -501,7 +418,6 @@ def configure_default_compile_flags(self): # Configure feature flags. self.configure_compile_flags_exceptions() self.configure_compile_flags_rtti() - self.configure_compile_flags_abi_version() enable_32bit = self.get_lit_bool('enable_32bit', False) if enable_32bit: self.cxx.flags += ['-m32'] @@ -590,63 +506,8 @@ def configure_config_site_header(self): config_site_header = os.path.join(self.libcxx_obj_root, '__config_site') if not os.path.isfile(config_site_header): return - contained_macros = self.parse_config_site_and_add_features( - config_site_header) - self.lit_config.note('Using __config_site header %s with macros: %r' - % (config_site_header, contained_macros)) - # FIXME: This must come after the call to - # 'parse_config_site_and_add_features(...)' in order for it to work. self.cxx.compile_flags += ['-include', config_site_header] - def parse_config_site_and_add_features(self, header): - """ parse_config_site_and_add_features - Deduce and add the test - features that that are implied by the #define's in the __config_site - header. Return a dictionary containing the macros found in the - '__config_site' header. - """ - # Parse the macro contents of __config_site by dumping the macros - # using 'c++ -dM -E' and filtering the predefines. - predefines = self._dump_macros_verbose() - macros = self._dump_macros_verbose(header) - feature_macros_keys = set(macros.keys()) - set(predefines.keys()) - feature_macros = {} - for k in feature_macros_keys: - feature_macros[k] = macros[k] - # We expect the header guard to be one of the definitions - assert '_LIBCPP_CONFIG_SITE' in feature_macros - del feature_macros['_LIBCPP_CONFIG_SITE'] - # The __config_site header should be non-empty. Otherwise it should - # have never been emitted by CMake. - assert len(feature_macros) > 0 - # FIXME: This is a hack that should be fixed using module maps. - # If modules are enabled then we have to lift all of the definitions - # in __config_site onto the command line. - for m in feature_macros: - define = '-D%s' % m - if feature_macros[m]: - define += '=%s' % (feature_macros[m]) - self.cxx.modules_flags += [define] - self.cxx.compile_flags += ['-Wno-macro-redefined'] - # Transform the following macro names from __config_site into features - # that can be used in the tests. - # Ex. _LIBCPP_HAS_NO_THREADS -> libcpp-has-no-threads - translate = { - '_LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE', - '_LIBCPP_HAS_NO_MONOTONIC_CLOCK', - '_LIBCPP_HAS_NO_STDIN', - '_LIBCPP_HAS_NO_STDOUT', - '_LIBCPP_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS', - '_LIBCPP_HAS_NO_THREADS', - '_LIBCPP_HAS_THREAD_API_EXTERNAL', - '_LIBCPP_HAS_THREAD_API_PTHREAD', - '_LIBCPP_NO_VCRUNTIME' - } - for m in translate.intersection(feature_macros.keys()): - self.config.available_features.add(m.lower()[1:].replace('_', '-')) - return feature_macros - - - def configure_compile_flags_exceptions(self): enable_exceptions = self.get_lit_bool('enable_exceptions', True) if not enable_exceptions: @@ -659,16 +520,6 @@ def configure_compile_flags_rtti(self): self.config.available_features.add('-fno-rtti') self.cxx.compile_flags += ['-fno-rtti', '-D_LIBCPP_NO_RTTI'] - def configure_compile_flags_abi_version(self): - abi_version = self.get_lit_conf('abi_version', '').strip() - abi_unstable = self.get_lit_bool('abi_unstable') - # Only add the ABI version when it is non-default. - # FIXME(EricWF): Get the ABI version from the "__config_site". - if abi_version and abi_version != '1': - self.cxx.compile_flags += ['-D_LIBCPP_ABI_VERSION=' + abi_version] - if abi_unstable: - self.cxx.compile_flags += ['-D_LIBCPP_ABI_UNSTABLE'] - def configure_link_flags(self): # Configure library path self.configure_link_flags_cxx_library_path() @@ -791,25 +642,6 @@ def configure_extra_library_flags(self): self.cxx.link_flags += ['-lc++external_threads'] self.target_info.add_cxx_link_flags(self.cxx.link_flags) - def configure_color_diagnostics(self): - use_color = self.get_lit_conf('color_diagnostics') - if use_color is None: - use_color = os.environ.get('LIBCXX_COLOR_DIAGNOSTICS') - if use_color is None: - return - if use_color != '': - self.lit_config.fatal('Invalid value for color_diagnostics "%s".' - % use_color) - color_flag = '-fdiagnostics-color=always' - # Check if the compiler supports the color diagnostics flag. Issue a - # warning if it does not since color diagnostics have been requested. - if not self.cxx.hasCompileFlag(color_flag): - self.lit_config.warning( - 'color diagnostics have been requested but are not supported ' - 'by the compiler') - else: - self.cxx.flags += [color_flag] - def configure_debug_mode(self): debug_level = self.get_lit_conf('debug_level', None) if not debug_level: @@ -820,22 +652,17 @@ def configure_debug_mode(self): self.cxx.compile_flags += ['-D_LIBCPP_DEBUG=%s' % debug_level] def configure_warnings(self): - # Turn on warnings by default for Clang based compilers when C++ >= 11 - default_enable_warnings = self.cxx.type in ['clang', 'apple-clang'] \ - and len(self.config.available_features.intersection( - ['c++11', 'c++14', 'c++17', 'c++2a'])) != 0 + # Turn on warnings by default for Clang based compilers + default_enable_warnings = self.cxx.type in ['clang', 'apple-clang'] enable_warnings = self.get_lit_bool('enable_warnings', default_enable_warnings) self.cxx.useWarnings(enable_warnings) - self.cxx.warning_flags += ['-Wall', '-Wextra'] + self.cxx.warning_flags += ['-Werror', '-Wall', '-Wextra'] # On GCC, the libc++ headers cause errors due to throw() decorators # on operator new clashing with those from the test suite, so we # don't enable warnings in system headers on GCC. if self.cxx.type != 'gcc': self.cxx.warning_flags += ['-D_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER'] - if self.cxx.hasWarningFlag('-Wuser-defined-warnings'): - self.cxx.warning_flags += ['-Wuser-defined-warnings'] - self.config.available_features.add('diagnose-if-support') self.cxx.addWarningFlagIfSupported('-Wshadow') self.cxx.addWarningFlagIfSupported('-Wno-unused-command-line-argument') self.cxx.addWarningFlagIfSupported('-Wno-attributes') @@ -921,31 +748,8 @@ def configure_coverage(self): self.cxx.flags += ['-g', '--coverage'] self.cxx.compile_flags += ['-O0'] - def configure_coroutines(self): - if self.cxx.hasCompileFlag('-fcoroutines-ts'): - macros = self._dump_macros_verbose(flags=['-fcoroutines-ts']) - if '__cpp_coroutines' not in macros: - self.lit_config.warning('-fcoroutines-ts is supported but ' - '__cpp_coroutines is not defined') - # Consider coroutines supported only when the feature test macro - # reflects a recent value. - if intMacroValue(macros['__cpp_coroutines']) >= 201703: - self.config.available_features.add('fcoroutines-ts') - - def configure_blocks(self): - if self.cxx.hasCompileFlag('-fblocks'): - self.config.available_features.add('has-fblocks') - - def configure_objc_arc(self): - cxx = copy.deepcopy(self.cxx) - cxx.source_lang = 'objective-c++' - if cxx.hasCompileFlag('-fobjc-arc'): - self.config.available_features.add('has-fobjc-arc') - def configure_modules(self): - modules_flags = ['-fmodules'] - if not self.target_info.is_darwin(): - modules_flags += ['-Xclang', '-fmodules-local-submodule-visibility'] + modules_flags = ['-fmodules', '-Xclang', '-fmodules-local-submodule-visibility'] supports_modules = self.cxx.hasCompileFlag(modules_flags) enable_modules = self.get_lit_bool('enable_modules', default=False, env_var='LIBCXX_ENABLE_MODULES') @@ -954,7 +758,6 @@ def configure_modules(self): '-fmodules is enabled but not supported by the compiler') if not supports_modules: return - self.config.available_features.add('modules-support') module_cache = os.path.join(self.config.test_exec_root, 'modules.cache') module_cache = os.path.realpath(module_cache) @@ -980,7 +783,7 @@ def configure_substitutions(self): sub.append(('%{libcxx_src_root}', self.libcxx_src_root)) # Configure flags substitutions flags = self.cxx.flags + (self.cxx.modules_flags if self.cxx.use_modules else []) - compile_flags = self.cxx.compile_flags + self.cxx.warning_flags + compile_flags = self.cxx.compile_flags + (self.cxx.warning_flags if self.cxx.use_warnings else []) sub.append(('%{flags}', ' '.join(map(pipes.quote, flags)))) sub.append(('%{compile_flags}', ' '.join(map(pipes.quote, compile_flags)))) sub.append(('%{link_flags}', ' '.join(map(pipes.quote, self.cxx.link_flags)))) diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py new file mode 100644 index 0000000000000..1110ded979679 --- /dev/null +++ b/libcxx/utils/libcxx/test/dsl.py @@ -0,0 +1,322 @@ +#===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===----------------------------------------------------------------------===## + +import libcxx.test.newformat +import lit +import lit.util +import os +import pipes +import platform +import tempfile + +def _memoize(f): + cache = dict() + def memoized(x): + if x not in cache: + cache[x] = f(x) + return cache[x] + return memoized + +def _executeScriptInternal(test, commands): + """ + Returns (stdout, stderr, exitCode, timeoutInfo) + + TODO: This really should be easier to access from Lit itself + """ + class FakeLitConfig(object): + def __init__(self): + self.isWindows = platform.system() == 'Windows' + self.maxIndividualTestTime = 0 + litConfig = FakeLitConfig() + _, tmpBase = lit.TestRunner.getTempPaths(test) + execDir = os.path.dirname(test.getExecPath()) + if not os.path.exists(execDir): + os.makedirs(execDir) + res = lit.TestRunner.executeScriptInternal(test, litConfig, tmpBase, commands, execDir) + if isinstance(res, lit.Test.Result): + res = ('', '', 127, None) + return res + +def _makeConfigTest(config): + sourceRoot = os.path.join(config.test_exec_root, '__config_src__') + execRoot = os.path.join(config.test_exec_root, '__config_exec__') + suite = lit.Test.TestSuite('__config__', sourceRoot, execRoot, config) + if not os.path.exists(sourceRoot): + os.makedirs(sourceRoot) + tmp = tempfile.NamedTemporaryFile(dir=sourceRoot, delete=False) + tmp.close() + pathInSuite = [os.path.relpath(tmp.name, sourceRoot)] + class TestWrapper(lit.Test.Test): + def __enter__(self): return self + def __exit__(self, *args): os.remove(tmp.name) + return TestWrapper(suite, pathInSuite, config) + +def hasCompileFlag(config, flag): + """ + Return whether the compiler in the configuration supports a given compiler flag. + + This is done by executing the %{cxx} substitution with the given flag and + checking whether that succeeds. + """ + with _makeConfigTest(config) as test: + commands = ["%{{cxx}} -xc++ {} -Werror -fsyntax-only %{{flags}} %{{compile_flags}} {}".format(os.devnull, flag)] + commands = libcxx.test.newformat.parseScript(test, preamble=commands, fileDependencies=[]) + out, err, exitCode, timeoutInfo = _executeScriptInternal(test, commands) + return exitCode == 0 + +def hasLocale(config, locale): + """ + Return whether the runtime execution environment supports a given locale. + + This is done by executing a program that tries to set the given locale using + %{exec} -- this means that the command may be executed on a remote host + depending on the %{exec} substitution. + """ + with _makeConfigTest(config) as test: + with open(test.getSourcePath(), 'w') as source: + source.write(""" + #include + int main(int, char** argv) { + if (::setlocale(LC_ALL, argv[1]) != NULL) return 0; + else return 1; + } + """) + commands = [ + "mkdir -p %T", + "%{cxx} -xc++ %s %{flags} %{compile_flags} %{link_flags} -o %t.exe", + "%{{exec}} %t.exe {}".format(pipes.quote(locale)), + ] + commands = libcxx.test.newformat.parseScript(test, preamble=commands, fileDependencies=['%t.exe']) + out, err, exitCode, timeoutInfo = _executeScriptInternal(test, commands) + cleanup = libcxx.test.newformat.parseScript(test, preamble=['rm %t.exe'], fileDependencies=[]) + _executeScriptInternal(test, cleanup) + return exitCode == 0 + +def compilerMacros(config, flags=''): + """ + Return a dictionary of predefined compiler macros. + + The keys are strings representing macros, and the values are strings + representing what each macro is defined to. + + If the optional `flags` argument (a string) is provided, these flags will + be added to the compiler invocation when generating the macros. + """ + with _makeConfigTest(config) as test: + commands = ["%{{cxx}} -xc++ {} -dM -E %{{flags}} %{{compile_flags}} {}".format(os.devnull, flags)] + commands = libcxx.test.newformat.parseScript(test, preamble=commands, fileDependencies=[]) + unparsedOutput, err, exitCode, timeoutInfo = _executeScriptInternal(test, commands) + parsedMacros = dict() + defines = (l.strip() for l in unparsedOutput.split('\n') if l.startswith('#define ')) + for line in defines: + line = line[len('#define '):] + macro, _, value = line.partition(' ') + parsedMacros[macro] = value + return parsedMacros + +def featureTestMacros(config, flags=''): + """ + Return a dictionary of feature test macros. + + The keys are strings representing feature test macros, and the values are + integers representing the value of the macro. + """ + allMacros = compilerMacros(config, flags) + return {m: int(v.rstrip('LlUu')) for (m, v) in allMacros.items() if m.startswith('__cpp_')} + + +class Feature(object): + """ + Represents a Lit available feature that is enabled whenever it is supported. + + A feature like this informs the test suite about a capability of the compiler, + platform, etc. Unlike Parameters, it does not make sense to explicitly + control whether a Feature is enabled -- it should be enabled whenever it + is supported. + """ + def __init__(self, name, compileFlag=None, linkFlag=None, when=lambda _: True): + """ + Create a Lit feature for consumption by a test suite. + + - name + The name of the feature. This is what will end up in Lit's available + features if the feature is enabled. This can be either a string or a + callable, in which case it is passed the TestingConfig and should + generate a string representing the name of the feature. + + - compileFlag + An optional compile flag to add when this feature is added to a + TestingConfig. If provided, this must be a string representing a + compile flag that will be appended to the end of the %{compile_flags} + substitution of the TestingConfig. + + - linkFlag + An optional link flag to add when this feature is added to a + TestingConfig. If provided, this must be a string representing a + link flag that will be appended to the end of the %{link_flags} + substitution of the TestingConfig. + + - when + A callable that gets passed a TestingConfig and should return a + boolean representing whether the feature is supported in that + configuration. For example, this can use `hasCompileFlag` to + check whether the compiler supports the flag that the feature + represents. If omitted, the feature will always be considered + supported. + """ + self._name = name + self._compileFlag = compileFlag + self._linkFlag = linkFlag + self._isSupported = when + + def isSupported(self, config): + """ + Return whether the feature is supported by the given TestingConfig. + """ + return self._isSupported(config) + + def enableIn(self, config): + """ + Enable a feature in a TestingConfig. + + The name of the feature is added to the set of available features of + `config`, and any compile or link flags provided upon construction of + the Feature are added to the end of the corresponding substitution in + the config. + + It is an error to call `f.enableIn(cfg)` if the feature `f` is not + supported in that TestingConfig (i.e. if `not f.isSupported(cfg)`). + """ + assert self.isSupported(config), \ + "Trying to enable feature {} that is not supported in the given configuration".format(self._name) + + addTo = lambda subs, sub, flag: [(s, x + ' ' + flag) if s == sub else (s, x) for (s, x) in subs] + if self._compileFlag: + compileFlag = self._compileFlag(config) if callable(self._compileFlag) else self._compileFlag + config.substitutions = addTo(config.substitutions, '%{compile_flags}', compileFlag) + if self._linkFlag: + linkFlag = self._linkFlag(config) if callable(self._linkFlag) else self._linkFlag + config.substitutions = addTo(config.substitutions, '%{link_flags}', linkFlag) + + name = self._name(config) if callable(self._name) else self._name + config.available_features.add(name) + + +def _str_to_bool(s): + """ + Convert a string value to a boolean. + + True values are "y", "yes", "t", "true", "on" and "1", regardless of capitalization. + False values are "n", "no", "f", "false", "off" and "0", regardless of capitalization. + """ + trueVals = ["y", "yes", "t", "true", "on", "1"] + falseVals = ["n", "no", "f", "false", "off", "0"] + lower = s.lower() + if lower in trueVals: + return True + elif lower in falseVals: + return False + else: + raise ValueError("Got string '{}', which isn't a valid boolean".format(s)) + + +class Parameter(object): + """ + Represents a parameter of a Lit test suite. + + Parameters are used to customize the behavior of test suites in a user + controllable way, more specifically by passing `--param =` + when running Lit. Parameters have multiple possible values, and they can + have a default value when left unspecified. + + Parameters can have a Feature associated to them, in which case the Feature + is added to the TestingConfig if the parameter is enabled. It is an error if + the Parameter is enabled but the Feature associated to it is not supported, + for example trying to set the compilation standard to C++17 when `-std=c++17` + is not supported by the compiler. + + One important point is that Parameters customize the behavior of the test + suite in a bounded way, i.e. there should be a finite set of possible choices + for ``. While this may appear to be an aggressive restriction, this + is actually a very important constraint that ensures that the set of + configurations supported by a test suite is finite. Otherwise, a test + suite could have an unbounded number of supported configurations, and + nobody wants to be stuck maintaining that. If it's not possible for an + option to have a finite set of possible values (e.g. the path to the + compiler), it can be handled in the `lit.cfg`, but it shouldn't be + represented with a Parameter. + """ + def __init__(self, name, choices, type, help, feature, default=None): + """ + Create a Lit parameter to customize the behavior of a test suite. + + - name + The name of the parameter that can be used to set it on the command-line. + On the command-line, the parameter can be set using `--param =` + when running Lit. This must be non-empty. + + - choices + A non-empty set of possible values for this parameter. This must be + anything that can be iterated. It is an error if the parameter is + given a value that is not in that set, whether explicitly or through + a default value. + + - type + A callable that can be used to parse the value of the parameter given + on the command-line. As a special case, using the type `bool` also + allows parsing strings with boolean-like contents. + + - help + A string explaining the parameter, for documentation purposes. + TODO: We should be able to surface those from the Lit command-line. + + - feature + A callable that gets passed the parsed value of the parameter (either + the one passed on the command-line or the default one), and that returns + either None or a Feature. + + - default + An optional default value to use for the parameter when no value is + provided on the command-line. If the default value is a callable, it + is called with the TestingConfig and should return the default value + for the parameter. Whether the default value is computed or specified + directly, it must be in the 'choices' provided for that Parameter. + """ + self._name = name + if len(self._name) == 0: + raise ValueError("Parameter name must not be the empty string") + + self._choices = list(choices) # should be finite + if len(self._choices) == 0: + raise ValueError("Parameter '{}' must be given at least one possible value".format(self._name)) + + self._parse = lambda x: (_str_to_bool(x) if type is bool and isinstance(x, str) + else type(x)) + self._help = help + self._feature = feature + self._default = default + + @property + def name(self): + """ + Return the name of the parameter. + + This is the name that can be used to set the parameter on the command-line + when running Lit. + """ + return self._name + + def getFeature(self, config, litParams): + param = litParams.get(self.name, None) + if param is None and self._default is None: + raise ValueError("Parameter {} doesn't have a default value, but it was not specified in the Lit parameters".format(self.name)) + getDefault = lambda: self._default(config) if callable(self._default) else self._default + value = self._parse(param) if param is not None else getDefault() + if value not in self._choices: + raise ValueError("Got value '{}' for parameter '{}', which is not in the provided set of possible choices: {}".format(value, self.name, self._choices)) + return self._feature(value) diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py new file mode 100644 index 0000000000000..ab6dd8f64a540 --- /dev/null +++ b/libcxx/utils/libcxx/test/features.py @@ -0,0 +1,86 @@ +#===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===----------------------------------------------------------------------===## + +from libcxx.test.dsl import * +import sys + +_isClang = lambda cfg: '__clang__' in compilerMacros(cfg) and '__apple_build_version__' not in compilerMacros(cfg) +_isAppleClang = lambda cfg: '__apple_build_version__' in compilerMacros(cfg) +_isGCC = lambda cfg: '__GNUC__' in compilerMacros(cfg) and '__clang__' not in compilerMacros(cfg) + +features = [ + Feature(name='fcoroutines-ts', compileFlag='-fcoroutines-ts', + when=lambda cfg: hasCompileFlag(cfg, '-fcoroutines-ts') and + featureTestMacros(cfg, flags='-fcoroutines-ts').get('__cpp_coroutines', 0) >= 201703), + + Feature(name='thread-safety', when=lambda cfg: hasCompileFlag(cfg, '-Werror=thread-safety'), compileFlag='-Werror=thread-safety'), + Feature(name='has-fblocks', when=lambda cfg: hasCompileFlag(cfg, '-fblocks')), + Feature(name='-fsized-deallocation', when=lambda cfg: hasCompileFlag(cfg, '-fsized-deallocation')), + Feature(name='-faligned-allocation', when=lambda cfg: hasCompileFlag(cfg, '-faligned-allocation')), + Feature(name='fdelayed-template-parsing', when=lambda cfg: hasCompileFlag(cfg, '-fdelayed-template-parsing')), + Feature(name='libcpp-no-if-constexpr', when=lambda cfg: '__cpp_if_constexpr' not in featureTestMacros(cfg)), + Feature(name='libcpp-no-structured-bindings', when=lambda cfg: '__cpp_structured_bindings' not in featureTestMacros(cfg)), + Feature(name='libcpp-no-deduction-guides', when=lambda cfg: featureTestMacros(cfg).get('__cpp_deduction_guides', 0) < 201611), + Feature(name='libcpp-no-concepts', when=lambda cfg: featureTestMacros(cfg).get('__cpp_concepts', 0) < 201811), + Feature(name='has-fobjc-arc', when=lambda cfg: hasCompileFlag(cfg, '-xobjective-c++ -fobjc-arc') and + sys.platform.lower().strip() == 'darwin'), # TODO: this doesn't handle cross-compiling to Apple platforms. + Feature(name='objective-c++', when=lambda cfg: hasCompileFlag(cfg, '-xobjective-c++ -fobjc-arc')), + Feature(name='diagnose-if-support', when=lambda cfg: hasCompileFlag(cfg, '-Wuser-defined-warnings'), compileFlag='-Wuser-defined-warnings'), + Feature(name='modules-support', when=lambda cfg: hasCompileFlag(cfg, '-fmodules')), + + Feature(name='apple-clang', when=_isAppleClang), + Feature(name=lambda cfg: 'apple-clang-{__clang_major__}'.format(**compilerMacros(cfg)), when=_isAppleClang), + Feature(name=lambda cfg: 'apple-clang-{__clang_major__}.{__clang_minor__}'.format(**compilerMacros(cfg)), when=_isAppleClang), + Feature(name=lambda cfg: 'apple-clang-{__clang_major__}.{__clang_minor__}.{__clang_patchlevel__}'.format(**compilerMacros(cfg)), when=_isAppleClang), + + Feature(name='clang', when=_isClang), + Feature(name=lambda cfg: 'clang-{__clang_major__}'.format(**compilerMacros(cfg)), when=_isClang), + Feature(name=lambda cfg: 'clang-{__clang_major__}.{__clang_minor__}'.format(**compilerMacros(cfg)), when=_isClang), + Feature(name=lambda cfg: 'clang-{__clang_major__}.{__clang_minor__}.{__clang_patchlevel__}'.format(**compilerMacros(cfg)), when=_isClang), + + Feature(name='gcc', when=_isGCC), + Feature(name=lambda cfg: 'gcc-{__GNUC__}'.format(**compilerMacros(cfg)), when=_isGCC), + Feature(name=lambda cfg: 'gcc-{__GNUC__}.{__GNUC_MINOR__}'.format(**compilerMacros(cfg)), when=_isGCC), + Feature(name=lambda cfg: 'gcc-{__GNUC__}.{__GNUC_MINOR__}.{__GNUC_PATCHLEVEL__}'.format(**compilerMacros(cfg)), when=_isGCC), +] + +# Deduce and add the test features that that are implied by the #defines in +# the <__config_site> header. +# +# For each macro of the form `_LIBCPP_XXX_YYY_ZZZ` defined below that +# is defined after including <__config_site>, add a Lit feature called +# `libcpp-xxx-yyy-zzz`. When a macro is defined to a specific value +# (e.g. `_LIBCPP_ABI_VERSION=2`), the feature is `libcpp-xxx-yyy-zzz=`. +macros = [ + '_LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE', + '_LIBCPP_HAS_NO_MONOTONIC_CLOCK', + '_LIBCPP_HAS_NO_STDIN', + '_LIBCPP_HAS_NO_STDOUT', + '_LIBCPP_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS', + '_LIBCPP_HAS_NO_THREADS', + '_LIBCPP_HAS_THREAD_API_EXTERNAL', + '_LIBCPP_HAS_THREAD_API_PTHREAD', + '_LIBCPP_NO_VCRUNTIME', + '_LIBCPP_ABI_VERSION', + '_LIBCPP_ABI_UNSTABLE' +] +for macro in macros: + features += [ + Feature(name=lambda cfg, macro=macro: macro.lower()[1:].replace('_', '-') + ( + '={}'.format(compilerMacros(cfg)[macro]) if compilerMacros(cfg)[macro] else '' + ), + when=lambda cfg, macro=macro: macro in compilerMacros(cfg), + + # FIXME: This is a hack that should be fixed using module maps. + # If modules are enabled then we have to lift all of the definitions + # in <__config_site> onto the command line. + compileFlag=lambda cfg, macro=macro: '-Wno-macro-redefined -D{}'.format(macro) + ( + '={}'.format(compilerMacros(cfg)[macro]) if compilerMacros(cfg)[macro] else '' + ) + ) + ] diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py index 99f10d7f21055..207df2322be9b 100644 --- a/libcxx/utils/libcxx/test/format.py +++ b/libcxx/utils/libcxx/test/format.py @@ -135,8 +135,6 @@ def _execute(self, test, lit_config): substitutions.append(('%{file_dependencies}', ' '.join(data_files))) # Add other convenience substitutions - if self.cxx.isVerifySupported(): - substitutions.append(('%{verify}', ' '.join(self.cxx.verify_flags))) substitutions.append(('%{build}', '%{cxx} -o %t.exe %s %{flags} %{compile_flags} %{link_flags}')) substitutions.append(('%{run}', '%{exec} %t.exe')) diff --git a/libcxx/utils/libcxx/test/newformat.py b/libcxx/utils/libcxx/test/newformat.py index 2ee6e502da6d6..0bb3be3369ba9 100644 --- a/libcxx/utils/libcxx/test/newformat.py +++ b/libcxx/utils/libcxx/test/newformat.py @@ -13,16 +13,16 @@ import re import subprocess -def _supportsVerify(test): +def _supportsVerify(config): """ - Determine whether clang-verify is supported for that test. + Determine whether clang-verify is supported by the given configuration. - This is done by checking whether the %{cxx} substitution supports certain - compiler flags. + This is done by checking whether the %{cxx} substitution in that + configuration supports certain compiler flags. """ command = "%{{cxx}} -xc++ {} -Werror -fsyntax-only -Xclang -verify-ignore-unexpected".format(os.devnull) - command = lit.TestRunner.applySubstitutions([command], test.config.substitutions, - recursion_limit=test.config.recursiveExpansionLimit)[0] + command = lit.TestRunner.applySubstitutions([command], config.substitutions, + recursion_limit=config.recursiveExpansionLimit)[0] devNull = open(os.devnull, 'w') result = subprocess.call(command, shell=True, stdout=devNull, stderr=devNull) return result == 0 @@ -56,11 +56,6 @@ def parseScript(test, preamble, fileDependencies): substitutions.append(('%{build}', '%{cxx} %s %{flags} %{compile_flags} %{link_flags} -o %t.exe')) substitutions.append(('%{run}', '%{exec} %t.exe')) - # Add the %{verify} substitution and the verify-support feature if Clang-verify is supported - if _supportsVerify(test): - test.config.available_features.add('verify-support') - substitutions.append(('%{verify}', '-Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0')) - # Parse the test file, including custom directives additionalCompileFlags = [] fileDependencies = list(fileDependencies) @@ -125,7 +120,9 @@ class CxxStandardLibraryTest(lit.formats.TestFormat): FOO.sh. - A builtin Lit Shell test - FOO.verify.cpp - Compiles with clang-verify + FOO.verify.cpp - Compiles with clang-verify. This type of test is + automatically marked as UNSUPPORTED if the compiler + does not support Clang-verify. FOO.fail.cpp - Compiled with clang-verify if clang-verify is supported, and equivalent to a .compile.fail.cpp @@ -176,19 +173,7 @@ class CxxStandardLibraryTest(lit.formats.TestFormat): Additional provided substitutions and features ============================================== - The test format will define the following substitutions for use inside - tests: - - %{verify} - - This expands to the set of flags that must be passed to the - compiler in order to use Clang-verify, if that is supported. - - verify-support - - This Lit feature will be made available when the compiler supports - Clang-verify. This can be used to disable tests that require that - feature, such as `.verify.cpp` tests. + The test format will define the following substitutions for use inside tests: %{file_dependencies} @@ -203,18 +188,6 @@ class CxxStandardLibraryTest(lit.formats.TestFormat): %{run} Equivalent to `%{exec} %t.exe`. This is intended to be used in conjunction with the %{build} substitution. - - - Design notes - ============ - This test format never implicitly disables a type of test. For example, - we could be tempted to automatically mark `.verify.cpp` tests as - UNSUPPORTED when clang-verify isn't supported by the compiler. However, - this sort of logic has been known to cause tests to be ignored in the - past, so we favour having tests mark themselves as unsupported explicitly. - - This test format still needs work in the following areas: - - It is unknown how well it works on Windows yet. """ def getTestsInDirectory(self, testSuite, pathInSuite, litConfig, localConfig): SUPPORTED_SUFFIXES = ['[.]pass[.]cpp$', '[.]pass[.]mm$', '[.]run[.]fail[.]cpp$', @@ -246,6 +219,8 @@ def _disableWithModules(self, test): def execute(self, test, litConfig): self._checkBaseSubstitutions(test.config.substitutions) + VERIFY_FLAGS = '-Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0' + supportsVerify = _supportsVerify(test.config) filename = test.path_in_suite[-1] # TODO(ldionne): We currently disable tests that re-define _LIBCPP_ASSERT @@ -256,50 +231,51 @@ def execute(self, test, litConfig): if '-fmodules' in test.config.available_features and self._disableWithModules(test): return lit.Test.Result(lit.Test.UNSUPPORTED, 'Test {} is unsupported when modules are enabled') - # TODO(ldionne): Enable -Werror with all supported compilers. - clangOrAppleClang = {'clang', 'apple-clang'}.intersection(test.config.available_features) != set() - werror = '-Werror' if clangOrAppleClang else '' - if re.search('[.]sh[.][^.]+$', filename): steps = [ ] # The steps are already in the script return self._executeShTest(test, litConfig, steps) elif filename.endswith('.compile.pass.cpp'): steps = [ - "%dbg(COMPILED WITH) %{{cxx}} %s {} %{{flags}} %{{compile_flags}} -fsyntax-only".format(werror) + "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} -fsyntax-only" ] return self._executeShTest(test, litConfig, steps) elif filename.endswith('.compile.fail.cpp'): steps = [ - "%dbg(COMPILED WITH) ! %{{cxx}} %s {} %{{flags}} %{{compile_flags}} -fsyntax-only".format(werror) + "%dbg(COMPILED WITH) ! %{cxx} %s %{flags} %{compile_flags} -fsyntax-only" ] return self._executeShTest(test, litConfig, steps) elif filename.endswith('.link.pass.cpp'): steps = [ - "%dbg(COMPILED WITH) %{{cxx}} %s {} %{{flags}} %{{compile_flags}} %{{link_flags}} -o %t.exe".format(werror) + "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} %{link_flags} -o %t.exe" ] return self._executeShTest(test, litConfig, steps) elif filename.endswith('.link.fail.cpp'): steps = [ - "%dbg(COMPILED WITH) %{{cxx}} %s {} %{{flags}} %{{compile_flags}} -c -o %t.o".format(werror), + "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} -c -o %t.o", "%dbg(LINKED WITH) ! %{cxx} %t.o %{flags} %{link_flags} -o %t.exe" ] return self._executeShTest(test, litConfig, steps) elif filename.endswith('.run.fail.cpp'): steps = [ - "%dbg(COMPILED WITH) %{{cxx}} %s {} %{{flags}} %{{compile_flags}} %{{link_flags}} -o %t.exe".format(werror), + "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} %{link_flags} -o %t.exe", "%dbg(EXECUTED AS) %{exec} ! %t.exe" ] return self._executeShTest(test, litConfig, steps, fileDependencies=['%t.exe']) elif filename.endswith('.verify.cpp'): + if not supportsVerify: + return lit.Test.Result(lit.Test.UNSUPPORTED, + "Test {} requires support for Clang-verify, which isn't supported by the compiler".format(test.getFullName())) steps = [ - "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} -fsyntax-only %{verify}" + # Note: Use -Wno-error to make sure all diagnostics are not treated as errors, + # which doesn't make sense for clang-verify tests. + "%dbg(COMPILED WITH) %{{cxx}} %s %{{flags}} %{{compile_flags}} -fsyntax-only -Wno-error {}".format(VERIFY_FLAGS) ] return self._executeShTest(test, litConfig, steps) # Make sure to check these ones last, since they will match other # suffixes above too. elif filename.endswith('.pass.cpp') or filename.endswith('.pass.mm'): steps = [ - "%dbg(COMPILED WITH) %{{cxx}} %s {} %{{flags}} %{{compile_flags}} %{{link_flags}} -o %t.exe".format(werror), + "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} %{link_flags} -o %t.exe", "%dbg(EXECUTED AS) %{exec} %t.exe" ] return self._executeShTest(test, litConfig, steps, fileDependencies=['%t.exe']) @@ -307,13 +283,13 @@ def execute(self, test, litConfig): # otherwise it's like a .compile.fail.cpp test. This is only provided # for backwards compatibility with the test suite. elif filename.endswith('.fail.cpp'): - if _supportsVerify(test): + if supportsVerify: steps = [ - "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} -fsyntax-only %{verify}" + "%dbg(COMPILED WITH) %{{cxx}} %s %{{flags}} %{{compile_flags}} -fsyntax-only -Wno-error {}".format(VERIFY_FLAGS) ] else: steps = [ - "%dbg(COMPILED WITH) ! %{{cxx}} {} %s %{{flags}} %{{compile_flags}} -fsyntax-only".format(werror) + "%dbg(COMPILED WITH) ! %{cxx} %s %{flags} %{compile_flags} -fsyntax-only" ] return self._executeShTest(test, litConfig, steps) else: @@ -333,7 +309,7 @@ def _executeShTest(self, test, litConfig, steps, fileDependencies=None): return script if litConfig.noExecute: - return lit.Test.Result(lit.Test.PASS) + return lit.Test.Result(lit.Test.XFAIL if test.isExpectedToFail() else lit.Test.PASS) else: _, tmpBase = lit.TestRunner.getTempPaths(test) useExternalSh = True diff --git a/libcxx/utils/libcxx/test/target_info.py b/libcxx/utils/libcxx/test/target_info.py index caa161ec93be8..b5b8a9ded8045 100644 --- a/libcxx/utils/libcxx/test/target_info.py +++ b/libcxx/utils/libcxx/test/target_info.py @@ -7,6 +7,7 @@ #===----------------------------------------------------------------------===// import importlib +import lit.util import locale import os import platform @@ -91,13 +92,12 @@ def __init__(self, full_config): super(DarwinLocalTI, self).__init__(full_config) def is_host_macosx(self): - name = subprocess.check_output(['sw_vers', '-productName']).strip() + name = lit.util.to_string(subprocess.check_output(['sw_vers', '-productName'])).strip() return name == "Mac OS X" def get_macosx_version(self): assert self.is_host_macosx() - version = subprocess.check_output( - ['sw_vers', '-productVersion']).strip() + version = lit.util.to_string(subprocess.check_output(['sw_vers', '-productVersion'])).strip() version = re.sub(r'([0-9]+\.[0-9]+)(\..*)?', r'\1', version) return version diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html index c8a74b56dafd3..9f1ff469be0b0 100644 --- a/libcxx/www/cxx2a_status.html +++ b/libcxx/www/cxx2a_status.html @@ -227,7 +227,7 @@

Paper Status

P0586LWGSafe integral comparisonsPrague P0593CWGImplicit creation of objects for low-level object manipulationPrague - P1115LWGImproving the Return Value of Erase-Like Algorithms II: Free erase/erase ifPrague + P1115LWGImproving the Return Value of Erase-Like Algorithms II: Free erase/erase ifPragueComplete11.0 P1243LWGRangify New AlgorithmsPrague P1460LWGMandating the Standard Library: Clause 20 - Utilities libraryPrague P1739LWGAvoid template bloat for safe_ranges in combination with "subrange-y" view adaptorsPrague diff --git a/libcxxabi/cmake/config-ix.cmake b/libcxxabi/cmake/config-ix.cmake index 8892964ad3769..15b52083fee26 100644 --- a/libcxxabi/cmake/config-ix.cmake +++ b/libcxxabi/cmake/config-ix.cmake @@ -6,8 +6,12 @@ include(CheckCSourceCompiles) check_library_exists(c fopen "" LIBCXXABI_HAS_C_LIB) if (NOT LIBCXXABI_USE_COMPILER_RT) - check_library_exists(gcc_s __gcc_personality_v0 "" LIBCXXABI_HAS_GCC_S_LIB) - check_library_exists(gcc __aeabi_uldivmod "" LIBCXXABI_HAS_GCC_LIB) + if (ANDROID) + check_library_exists(gcc __gcc_personality_v0 "" LIBCXXABI_HAS_GCC_LIB) + else () + check_library_exists(gcc_s __gcc_personality_v0 "" LIBCXXABI_HAS_GCC_S_LIB) + check_library_exists(gcc __aeabi_uldivmod "" LIBCXXABI_HAS_GCC_LIB) + endif () endif () # libc++abi is built with -nodefaultlibs, so we want all our checks to also diff --git a/libcxxabi/src/cxa_vector.cpp b/libcxxabi/src/cxa_vector.cpp index f20e978d36ef3..325bbf22d2011 100644 --- a/libcxxabi/src/cxa_vector.cpp +++ b/libcxxabi/src/cxa_vector.cpp @@ -24,9 +24,9 @@ namespace __cxxabiv1 { -#if 0 -#pragma mark --Helper routines and classes -- -#endif +// +// Helper routines and classes +// namespace { inline static size_t __get_element_count ( void *p ) { @@ -111,9 +111,9 @@ namespace { }; } -#if 0 -#pragma mark --Externally visible routines-- -#endif +// +// Externally visible routines +// namespace { _LIBCXXABI_NORETURN diff --git a/libcxxabi/test/incomplete_type.sh.cpp b/libcxxabi/test/incomplete_type.sh.cpp index 999c172dd5b5e..20deb16011f0f 100644 --- a/libcxxabi/test/incomplete_type.sh.cpp +++ b/libcxxabi/test/incomplete_type.sh.cpp @@ -20,8 +20,8 @@ // for shell tests because of Apple security features). // FILE_DEPENDENCIES: %t.exe -// RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.one.o -// RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.two.o -DTU_ONE +// RUN: %{cxx} %{flags} %{compile_flags} -Wno-unreachable-code -c %s -o %t.one.o +// RUN: %{cxx} %{flags} %{compile_flags} -Wno-unreachable-code -c %s -o %t.two.o -DTU_ONE // RUN: %{cxx} %{flags} %t.one.o %t.two.o %{link_libcxxabi} %{link_flags} -o %t.exe // RUN: %{exec} %t.exe diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake index 9c8089cfe2150..3e42818882b33 100644 --- a/libunwind/cmake/config-ix.cmake +++ b/libunwind/cmake/config-ix.cmake @@ -8,8 +8,12 @@ include(CheckCSourceCompiles) check_library_exists(c fopen "" LIBUNWIND_HAS_C_LIB) if (NOT LIBUNWIND_USE_COMPILER_RT) - check_library_exists(gcc_s __gcc_personality_v0 "" LIBUNWIND_HAS_GCC_S_LIB) - check_library_exists(gcc __absvdi2 "" LIBUNWIND_HAS_GCC_LIB) + if (ANDROID) + check_library_exists(gcc __gcc_personality_v0 "" LIBUNWIND_HAS_GCC_LIB) + else () + check_library_exists(gcc_s __gcc_personality_v0 "" LIBUNWIND_HAS_GCC_S_LIB) + check_library_exists(gcc __absvdi2 "" LIBUNWIND_HAS_GCC_LIB) + endif () endif() # libunwind is built with -nodefaultlibs, so we want all our checks to also diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt index 3bd9e9556a855..7bbc59c7a811a 100644 --- a/lld/CMakeLists.txt +++ b/lld/CMakeLists.txt @@ -56,16 +56,38 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) include(HandleLLVMOptions) if(LLVM_INCLUDE_TESTS) - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR -"Unable to find Python interpreter, required for testing. + if(CMAKE_VERSION VERSION_LESS 3.12) + include(FindPythonInterp) + if(NOT PYTHONINTERP_FOUND) + message(FATAL_ERROR + "Unable to find Python interpreter, required for testing. -Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") - endif() + Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") + endif() + + if(${PYTHON_VERSION_STRING} VERSION_LESS 2.7) + message(FATAL_ERROR "Python 2.7 or newer is required") + endif() - if(${PYTHON_VERSION_STRING} VERSION_LESS 2.7) - message(FATAL_ERROR "Python 2.7 or newer is required") + add_executable(Python3::Interpeter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) + set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) + else() + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") + endif() + + # Treat python2 as python3 + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) + endif() endif() # Check prebuilt llvm/utils. diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h index 2be2a72c4a1e9..0528143383c5c 100644 --- a/lld/COFF/Chunks.h +++ b/lld/COFF/Chunks.h @@ -269,7 +269,8 @@ class SectionChunk final : public Chunk { AssociatedIterator() = default; AssociatedIterator(SectionChunk *head) : cur(head) {} bool operator==(const AssociatedIterator &r) const { return cur == r.cur; } - const SectionChunk &operator*() const { return *cur; } + // FIXME: Wrong const-ness, but it makes filter ranges work. + SectionChunk &operator*() const { return *cur; } SectionChunk &operator*() { return *cur; } AssociatedIterator &operator++() { cur = cur->assocChildren; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 08415fa009842..0469b49b88eb2 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -251,7 +251,7 @@ void LinkerDriver::enqueuePath(StringRef path, bool wholeArchive, bool lazy) { // the option `/nodefaultlib` than a reference to a file in the root // directory. std::string nearest; - if (COFFOptTable().findNearest(pathStr, nearest) > 1) + if (optTable.findNearest(pathStr, nearest) > 1) error(msg); else error(msg + "; did you mean '" + nearest + "'"); @@ -343,11 +343,9 @@ void LinkerDriver::parseDirectives(InputFile *file) { ArgParser parser; // .drectve is always tokenized using Windows shell rules. // /EXPORT: option can appear too many times, processing in fastpath. - opt::InputArgList args; - std::vector exports; - std::tie(args, exports) = parser.parseDirectives(s); + ParsedDirectives directives = parser.parseDirectives(s); - for (StringRef e : exports) { + for (StringRef e : directives.exports) { // If a common header file contains dllexported function // declarations, many object files may end up with having the // same /EXPORT options. In order to save cost of parsing them, @@ -366,7 +364,11 @@ void LinkerDriver::parseDirectives(InputFile *file) { config->exports.push_back(exp); } - for (auto *arg : args) { + // Handle /include: in bulk. + for (StringRef inc : directives.includes) + addUndefined(inc); + + for (auto *arg : directives.args) { switch (arg->getOption().getID()) { case OPT_aligncomm: parseAligncomm(arg->getValue()); diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h index cc2f25a6f95e6..92c0db84c3847 100644 --- a/lld/COFF/Driver.h +++ b/lld/COFF/Driver.h @@ -41,6 +41,21 @@ class COFFOptTable : public llvm::opt::OptTable { COFFOptTable(); }; +// Constructing the option table is expensive. Use a global table to avoid doing +// it more than once. +extern COFFOptTable optTable; + +// The result of parsing the .drective section. The /export: and /include: +// options are handled separately because they reference symbols, and the number +// of symbols can be quite large. The LLVM Option library will perform at least +// one memory allocation per argument, and that is prohibitively slow for +// parsing directives. +struct ParsedDirectives { + std::vector exports; + std::vector includes; + llvm::opt::InputArgList args; +}; + class ArgParser { public: // Parses command line options. @@ -52,16 +67,13 @@ class ArgParser { // Tokenizes a given string and then parses as command line options in // .drectve section. /EXPORT options are returned in second element // to be processed in fastpath. - std::pair> - parseDirectives(StringRef s); + ParsedDirectives parseDirectives(StringRef s); private: // Concatenate LINK environment variable. void addLINK(SmallVector &argv); std::vector tokenize(StringRef s); - - COFFOptTable table; }; class LinkerDriver { diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index f1b50e6142e1d..6cb761abea4ed 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -767,6 +767,8 @@ static const llvm::opt::OptTable::Info infoTable[] = { COFFOptTable::COFFOptTable() : OptTable(infoTable, true) {} +COFFOptTable optTable; + // Set color diagnostics according to --color-diagnostics={auto,always,never} // or --no-color-diagnostics flags. static void handleColorDiagnostics(opt::InputArgList &args) { @@ -812,8 +814,7 @@ opt::InputArgList ArgParser::parse(ArrayRef argv) { // options so we parse here before and ignore all the options but // --rsp-quoting and /lldignoreenv. // (This means --rsp-quoting can't be added through %LINK%.) - opt::InputArgList args = table.ParseArgs(argv, missingIndex, missingCount); - + opt::InputArgList args = optTable.ParseArgs(argv, missingIndex, missingCount); // Expand response files (arguments in the form of @) and insert // flags from %LINK% and %_LINK_%, and then parse the argument again. @@ -822,8 +823,8 @@ opt::InputArgList ArgParser::parse(ArrayRef argv) { if (!args.hasArg(OPT_lldignoreenv)) addLINK(expandedArgv); cl::ExpandResponseFiles(saver, getQuotingStyle(args), expandedArgv); - args = table.ParseArgs(makeArrayRef(expandedArgv).drop_front(), missingIndex, - missingCount); + args = optTable.ParseArgs(makeArrayRef(expandedArgv).drop_front(), + missingIndex, missingCount); // Print the real command line if response files are expanded. if (args.hasArg(OPT_verbose) && argv.size() != expandedArgv.size()) { @@ -847,7 +848,7 @@ opt::InputArgList ArgParser::parse(ArrayRef argv) { for (auto *arg : args.filtered(OPT_UNKNOWN)) { std::string nearest; - if (table.findNearest(arg->getAsString(args), nearest) > 1) + if (optTable.findNearest(arg->getAsString(args), nearest) > 1) warn("ignoring unknown argument '" + arg->getAsString(args) + "'"); else warn("ignoring unknown argument '" + arg->getAsString(args) + @@ -861,30 +862,38 @@ opt::InputArgList ArgParser::parse(ArrayRef argv) { } // Tokenizes and parses a given string as command line in .drective section. -// /EXPORT options are processed in fastpath. -std::pair> -ArgParser::parseDirectives(StringRef s) { - std::vector exports; +ParsedDirectives ArgParser::parseDirectives(StringRef s) { + ParsedDirectives result; SmallVector rest; - for (StringRef tok : tokenize(s)) { + // Handle /EXPORT and /INCLUDE in a fast path. These directives can appear for + // potentially every symbol in the object, so they must be handled quickly. + SmallVector tokens; + cl::TokenizeWindowsCommandLineNoCopy(s, saver, tokens); + for (StringRef tok : tokens) { if (tok.startswith_lower("/export:") || tok.startswith_lower("-export:")) - exports.push_back(tok.substr(strlen("/export:"))); - else - rest.push_back(tok.data()); + result.exports.push_back(tok.substr(strlen("/export:"))); + else if (tok.startswith_lower("/include:") || + tok.startswith_lower("-include:")) + result.includes.push_back(tok.substr(strlen("/include:"))); + else { + // Save non-null-terminated strings to make proper C strings. + bool HasNul = tok.data()[tok.size()] == '\0'; + rest.push_back(HasNul ? tok.data() : saver.save(tok).data()); + } } // Make InputArgList from unparsed string vectors. unsigned missingIndex; unsigned missingCount; - opt::InputArgList args = table.ParseArgs(rest, missingIndex, missingCount); + result.args = optTable.ParseArgs(rest, missingIndex, missingCount); if (missingCount) - fatal(Twine(args.getArgString(missingIndex)) + ": missing argument"); - for (auto *arg : args.filtered(OPT_UNKNOWN)) - warn("ignoring unknown argument: " + arg->getAsString(args)); - return {std::move(args), std::move(exports)}; + fatal(Twine(result.args.getArgString(missingIndex)) + ": missing argument"); + for (auto *arg : result.args.filtered(OPT_UNKNOWN)) + warn("ignoring unknown argument: " + arg->getAsString(result.args)); + return result; } // link.exe has an interesting feature. If LINK or _LINK_ environment @@ -909,9 +918,9 @@ std::vector ArgParser::tokenize(StringRef s) { } void printHelp(const char *argv0) { - COFFOptTable().PrintHelp(lld::outs(), - (std::string(argv0) + " [options] file...").c_str(), - "LLVM Linker", false); + optTable.PrintHelp(lld::outs(), + (std::string(argv0) + " [options] file...").c_str(), + "LLVM Linker", false); } } // namespace coff diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp index c821569e34579..330379869cf99 100644 --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -127,15 +127,19 @@ void ICF::segregate(size_t begin, size_t end, bool constant) { // Returns true if two sections' associative children are equal. bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) { - auto childClasses = [&](const SectionChunk *sc) { - std::vector classes; - for (const SectionChunk &c : sc->children()) - if (!c.getSectionName().startswith(".debug") && - c.getSectionName() != ".gfids$y" && c.getSectionName() != ".gljmp$y") - classes.push_back(c.eqClass[cnt % 2]); - return classes; + // Ignore associated metadata sections that don't participate in ICF, such as + // debug info and CFGuard metadata. + auto considerForICF = [](const SectionChunk &assoc) { + StringRef Name = assoc.getSectionName(); + return !(Name.startswith(".debug") || Name == ".gfids$y" || + Name == ".gljmp$y"); }; - return childClasses(a) == childClasses(b); + auto ra = make_filter_range(a->children(), considerForICF); + auto rb = make_filter_range(b->children(), considerForICF); + return std::equal(ra.begin(), ra.end(), rb.begin(), rb.end(), + [&](const SectionChunk &ia, const SectionChunk &ib) { + return ia.eqClass[cnt % 2] == ib.eqClass[cnt % 2]; + }); } // Compare "non-moving" part of two sections, namely everything diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp index c88ce0ee95a93..bb44819e60f8d 100644 --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -81,6 +81,7 @@ static lto::Config createConfig() { c.CPU = getCPUStr(); c.MAttrs = getMAttrs(); c.CGOptLevel = args::getCGOptLevel(config->ltoo); + c.AlwaysEmitRegularLTOObj = !config->ltoObjPath.empty(); if (config->saveTemps) checkError(c.addSaveTemps(std::string(config->outputFile) + ".", diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index a182ed34165de..24b7ab5f35652 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -87,7 +87,7 @@ class PDBLinker { public: PDBLinker(SymbolTable *symtab) - : alloc(), symtab(symtab), builder(alloc), tMerger(alloc) { + : symtab(symtab), builder(bAlloc), tMerger(bAlloc) { // This isn't strictly necessary, but link.exe usually puts an empty string // as the first "valid" string in the string table, so we do the same in // order to maintain as much byte-for-byte compatibility as possible. @@ -166,8 +166,6 @@ class PDBLinker { void printStats(); private: - BumpPtrAllocator alloc; - SymbolTable *symtab; pdb::PDBFileBuilder builder; @@ -726,10 +724,9 @@ static void translateIdSymbols(MutableArrayRef &recordData, // in both cases we just need the second type index. if (!ti->isSimple() && !ti->isNoneType()) { CVType funcIdData = iDTable.getType(*ti); - SmallVector indices; - discoverTypeIndices(funcIdData, indices); - assert(indices.size() == 2); - *ti = indices[1]; + ArrayRef tiBuf = funcIdData.data().slice(8, 4); + assert(tiBuf.size() == 4 && "corrupt LF_[MEM]FUNC_ID record"); + *ti = *reinterpret_cast(tiBuf.data()); } kind = (kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32 @@ -899,7 +896,7 @@ void PDBLinker::mergeSymbolRecords(ObjFile *file, const CVIndexMap &indexMap, MutableArrayRef alignedSymbolMem; if (needsRealignment) { void *alignedData = - alloc.Allocate(totalRealignedSize, alignOf(CodeViewContainer::Pdb)); + bAlloc.Allocate(totalRealignedSize, alignOf(CodeViewContainer::Pdb)); alignedSymbolMem = makeMutableArrayRef( reinterpret_cast(alignedData), totalRealignedSize); } @@ -981,9 +978,8 @@ void PDBLinker::mergeSymbolRecords(ObjFile *file, const CVIndexMap &indexMap, } // Allocate memory for a .debug$S / .debug$F section and relocate it. -static ArrayRef relocateDebugChunk(BumpPtrAllocator &alloc, - SectionChunk &debugChunk) { - uint8_t *buffer = alloc.Allocate(debugChunk.getSize()); +static ArrayRef relocateDebugChunk(SectionChunk &debugChunk) { + uint8_t *buffer = bAlloc.Allocate(debugChunk.getSize()); assert(debugChunk.getOutputSectionIdx() == 0 && "debug sections should not be in output sections"); debugChunk.writeTo(buffer); @@ -1031,7 +1027,7 @@ void DebugSHandler::handleDebugS(lld::coff::SectionChunk &debugS) { DebugSubsectionArray subsections; ArrayRef relocatedDebugContents = SectionChunk::consumeDebugMagic( - relocateDebugChunk(linker.alloc, debugS), debugS.getSectionName()); + relocateDebugChunk(debugS), debugS.getSectionName()); BinaryStreamReader reader(relocatedDebugContents, support::little); exitOnErr(reader.readArray(subsections, relocatedDebugContents.size())); @@ -1242,7 +1238,7 @@ void PDBLinker::addObjFile(ObjFile *file, CVIndexMap *externIndexMap) { if (debugChunk->getSectionName() == ".debug$F") { ArrayRef relocatedDebugContents = - relocateDebugChunk(alloc, *debugChunk); + relocateDebugChunk(*debugChunk); FixedStreamArray fpoRecords; BinaryStreamReader reader(relocatedDebugContents, support::little); @@ -1523,8 +1519,7 @@ static void fillLinkerVerRecord(Compile3Sym &cs) { } static void addCommonLinkerModuleSymbols(StringRef path, - pdb::DbiModuleDescriptorBuilder &mod, - BumpPtrAllocator &allocator) { + pdb::DbiModuleDescriptorBuilder &mod) { ObjNameSym ons(SymbolRecordKind::ObjNameSym); EnvBlockSym ebs(SymbolRecordKind::EnvBlockSym); Compile3Sym cs(SymbolRecordKind::Compile3Sym); @@ -1551,17 +1546,16 @@ static void addCommonLinkerModuleSymbols(StringRef path, ebs.Fields.push_back("cmd"); ebs.Fields.push_back(argStr); mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol( - ons, allocator, CodeViewContainer::Pdb)); + ons, bAlloc, CodeViewContainer::Pdb)); mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol( - cs, allocator, CodeViewContainer::Pdb)); + cs, bAlloc, CodeViewContainer::Pdb)); mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol( - ebs, allocator, CodeViewContainer::Pdb)); + ebs, bAlloc, CodeViewContainer::Pdb)); } static void addLinkerModuleCoffGroup(PartialSection *sec, pdb::DbiModuleDescriptorBuilder &mod, - OutputSection &os, - BumpPtrAllocator &allocator) { + OutputSection &os) { // If there's a section, there's at least one chunk assert(!sec->chunks.empty()); const Chunk *firstChunk = *sec->chunks.begin(); @@ -1582,12 +1576,11 @@ static void addLinkerModuleCoffGroup(PartialSection *sec, cgs.Characteristics |= llvm::COFF::IMAGE_SCN_MEM_WRITE; mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol( - cgs, allocator, CodeViewContainer::Pdb)); + cgs, bAlloc, CodeViewContainer::Pdb)); } static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &mod, - OutputSection &os, - BumpPtrAllocator &allocator) { + OutputSection &os) { SectionSym sym(SymbolRecordKind::SectionSym); sym.Alignment = 12; // 2^12 = 4KB sym.Characteristics = os.header.Characteristics; @@ -1596,7 +1589,7 @@ static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &mod, sym.Rva = os.getRVA(); sym.SectionNumber = os.sectionIndex; mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol( - sym, allocator, CodeViewContainer::Pdb)); + sym, bAlloc, CodeViewContainer::Pdb)); // Skip COFF groups in MinGW because it adds a significant footprint to the // PDB, due to each function being in its own section @@ -1605,7 +1598,7 @@ static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &mod, // Output COFF groups for individual chunks of this section. for (PartialSection *sec : os.contribSections) { - addLinkerModuleCoffGroup(sec, mod, os, allocator); + addLinkerModuleCoffGroup(sec, mod, os); } } @@ -1672,18 +1665,18 @@ void PDBLinker::addImportFilesToPDB(ArrayRef outputSections) { ts.Offset = thunkChunk->getRVA() - thunkOS->getRVA(); mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol( - ons, alloc, CodeViewContainer::Pdb)); + ons, bAlloc, CodeViewContainer::Pdb)); mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol( - cs, alloc, CodeViewContainer::Pdb)); + cs, bAlloc, CodeViewContainer::Pdb)); SmallVector scopes; CVSymbol newSym = codeview::SymbolSerializer::writeOneSymbol( - ts, alloc, CodeViewContainer::Pdb); + ts, bAlloc, CodeViewContainer::Pdb); scopeStackOpen(scopes, mod->getNextSymbolOffset(), newSym); mod->addSymbol(newSym); - newSym = codeview::SymbolSerializer::writeOneSymbol(es, alloc, + newSym = codeview::SymbolSerializer::writeOneSymbol(es, bAlloc, CodeViewContainer::Pdb); scopeStackClose(scopes, mod->getNextSymbolOffset(), file); @@ -1759,11 +1752,11 @@ void PDBLinker::addSections(ArrayRef outputSections, uint32_t pdbFilePathNI = dbiBuilder.addECName(nativePath); auto &linkerModule = exitOnErr(dbiBuilder.addModuleInfo("* Linker *")); linkerModule.setPdbFilePathNI(pdbFilePathNI); - addCommonLinkerModuleSymbols(nativePath, linkerModule, alloc); + addCommonLinkerModuleSymbols(nativePath, linkerModule); // Add section contributions. They must be ordered by ascending RVA. for (OutputSection *os : outputSections) { - addLinkerModuleSectionSymbol(linkerModule, *os, alloc); + addLinkerModuleSectionSymbol(linkerModule, *os); for (Chunk *c : os->chunks) { pdb::SectionContrib sc = createSectionContrib(c, linkerModule.getModuleIndex()); diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index 290a29d8af7d4..9cbd245244e80 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -52,23 +52,15 @@ std::string toCOFFString(const Archive::Symbol &b) { namespace coff { -StringRef Symbol::getName() { - // COFF symbol names are read lazily for a performance reason. - // Non-external symbol names are never used by the linker except for logging - // or debugging. Their internal references are resolved not by name but by - // symbol index. And because they are not external, no one can refer them by - // name. Object files contain lots of non-external symbols, and creating - // StringRefs for them (which involves lots of strlen() on the string table) - // is a waste of time. - if (nameData == nullptr) { - auto *d = cast(this); - StringRef nameStr; - cast(d->file)->getCOFFObj()->getSymbolName(d->sym, nameStr); - nameData = nameStr.data(); - nameSize = nameStr.size(); - assert(nameSize == nameStr.size() && "name length truncated"); - } - return StringRef(nameData, nameSize); +void Symbol::computeName() { + assert(nameData == nullptr && + "should only compute the name once for DefinedCOFF symbols"); + auto *d = cast(this); + StringRef nameStr; + cast(d->file)->getCOFFObj()->getSymbolName(d->sym, nameStr); + nameData = nameStr.data(); + nameSize = nameStr.size(); + assert(nameSize == nameStr.size() && "name length truncated"); } InputFile *Symbol::getFile() { diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index a8e70320b9956..1da4df3669662 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -69,7 +69,18 @@ class Symbol { Kind kind() const { return static_cast(symbolKind); } // Returns the symbol name. - StringRef getName(); + StringRef getName() { + // COFF symbol names are read lazily for a performance reason. + // Non-external symbol names are never used by the linker except for logging + // or debugging. Their internal references are resolved not by name but by + // symbol index. And because they are not external, no one can refer them by + // name. Object files contain lots of non-external symbols, and creating + // StringRefs for them (which involves lots of strlen() on the string table) + // is a waste of time. + if (nameData == nullptr) + computeName(); + return StringRef(nameData, nameSize); + } void replaceKeepingName(Symbol *other, size_t size); @@ -84,6 +95,9 @@ class Symbol { return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; } +private: + void computeName(); + protected: friend SymbolTable; explicit Symbol(Kind k, StringRef n = "") diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index 27b20baceeede..60cc581f94fe1 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -120,6 +120,8 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, case R_HEX_B22_PCREL_X: case R_HEX_B32_PCREL_X: case R_HEX_GD_PLT_B22_PCREL: + case R_HEX_GD_PLT_B22_PCREL_X: + case R_HEX_GD_PLT_B32_PCREL_X: return R_PLT_PC; case R_HEX_IE_32_6_X: case R_HEX_IE_16_X: @@ -311,16 +313,18 @@ void Hexagon::relocate(uint8_t *loc, const Relocation &rel, case R_HEX_B15_PCREL_X: or32le(loc, applyMask(0x00df20fe, val & 0x3f)); break; - case R_HEX_GD_PLT_B22_PCREL: case R_HEX_B22_PCREL: + case R_HEX_GD_PLT_B22_PCREL: case R_HEX_PLT_B22_PCREL: checkInt(loc, val, 22, rel); or32le(loc, applyMask(0x1ff3ffe, val >> 2)); break; case R_HEX_B22_PCREL_X: + case R_HEX_GD_PLT_B22_PCREL_X: or32le(loc, applyMask(0x1ff3ffe, val & 0x3f)); break; case R_HEX_B32_PCREL_X: + case R_HEX_GD_PLT_B32_PCREL_X: or32le(loc, applyMask(0x0fff3fff, val >> 6)); break; case R_HEX_GOTREL_HI16: diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index e01a16b810230..7e5ee7acef083 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -110,6 +110,7 @@ struct Configuration { llvm::StringRef optRemarksPasses; llvm::StringRef optRemarksFormat; llvm::StringRef progName; + llvm::StringRef printArchiveStats; llvm::StringRef printSymbolOrder; llvm::StringRef soName; llvm::StringRef sysroot; @@ -166,6 +167,7 @@ struct Configuration { bool ignoreFunctionAddressEquality; bool ltoCSProfileGenerate; bool ltoDebugPassManager; + bool ltoEmitAsm; bool ltoNewPassManager; bool ltoUniqueBBSectionNames; bool ltoWholeProgramVisibility; diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp index bf4fff76c31f8..f1587d93e53b7 100644 --- a/lld/ELF/DWARF.cpp +++ b/lld/ELF/DWARF.cpp @@ -36,6 +36,7 @@ template LLDDwarfObj::LLDDwarfObj(ObjFile *obj) { .Case(".debug_gnu_pubnames", &gnuPubnamesSection) .Case(".debug_gnu_pubtypes", &gnuPubtypesSection) .Case(".debug_info", &infoSection) + .Case(".debug_loclists", &loclistsSection) .Case(".debug_ranges", &rangesSection) .Case(".debug_rnglists", &rnglistsSection) .Case(".debug_str_offsets", &strOffsetsSection) diff --git a/lld/ELF/DWARF.h b/lld/ELF/DWARF.h index 51ec9092f1723..8609e35faf953 100644 --- a/lld/ELF/DWARF.h +++ b/lld/ELF/DWARF.h @@ -32,6 +32,10 @@ template class LLDDwarfObj final : public llvm::DWARFObject { f(infoSection); } + const llvm::DWARFSection &getLoclistsSection() const override { + return loclistsSection; + } + const llvm::DWARFSection &getRangesSection() const override { return rangesSection; } @@ -81,6 +85,7 @@ template class LLDDwarfObj final : public llvm::DWARFObject { LLDDWARFSection gnuPubnamesSection; LLDDWARFSection gnuPubtypesSection; LLDDWARFSection infoSection; + LLDDWARFSection loclistsSection; LLDDWARFSection rangesSection; LLDDWARFSection rnglistsSection; LLDDWARFSection strOffsetsSection; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index a5b1179133561..4fa1d60826d05 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -90,8 +90,10 @@ bool link(ArrayRef args, bool canExitEarly, raw_ostream &stdoutOS, inputSections.clear(); outputSections.clear(); + archiveFiles.clear(); binaryFiles.clear(); bitcodeFiles.clear(); + lazyObjFiles.clear(); objectFiles.clear(); sharedFiles.clear(); backwardReferences.clear(); @@ -917,6 +919,7 @@ static void readConfigs(opt::InputArgList &args) { config->ltoCSProfileGenerate = args.hasArg(OPT_lto_cs_profile_generate); config->ltoCSProfileFile = args.getLastArgValue(OPT_lto_cs_profile_file); config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager); + config->ltoEmitAsm = args.hasArg(OPT_lto_emit_asm); config->ltoNewPassManager = args.hasArg(OPT_lto_new_pass_manager); config->ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes); config->ltoWholeProgramVisibility = @@ -953,6 +956,7 @@ static void readConfigs(opt::InputArgList &args) { args.hasFlag(OPT_print_icf_sections, OPT_no_print_icf_sections, false); config->printGcSections = args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false); + config->printArchiveStats = args.getLastArgValue(OPT_print_archive_stats); config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order); config->rpath = getRpath(args); @@ -963,7 +967,7 @@ static void readConfigs(opt::InputArgList &args) { config->searchPaths = args::getStrings(args, OPT_library_path); config->sectionStartMap = getSectionStartMap(args); config->shared = args.hasArg(OPT_shared); - config->singleRoRx = args.hasArg(OPT_no_rosegment); + config->singleRoRx = !args.hasFlag(OPT_rosegment, OPT_no_rosegment, true); config->soName = args.getLastArgValue(OPT_soname); config->sortSection = getSortSection(args); config->splitStackAdjustSize = args::getInteger(args, OPT_split_stack_adjust_size, 16384); @@ -1949,13 +1953,10 @@ template void LinkerDriver::link(opt::InputArgList &args) { // If -thinlto-index-only is given, we should create only "index // files" and not object files. Index file creation is already done // in addCombinedLTOObject, so we are done if that's the case. - if (config->thinLTOIndexOnly) - return; - - // Likewise, --plugin-opt=emit-llvm is an option to make LTO create - // an output file in bitcode and exit, so that you can just get a - // combined bitcode file. - if (config->emitLLVM) + // Likewise, --plugin-opt=emit-llvm and --plugin-opt=emit-asm are the + // options to create output files in bitcode or assembly code + // repsectively. No object files are generated. + if (config->thinLTOIndexOnly || config->emitLLVM || config->ltoEmitAsm) return; // Apply symbol renames for -wrap. diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index f3dd11fde3f66..e3cf340cabe19 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -55,6 +55,7 @@ std::string toString(const elf::InputFile *f) { namespace elf { bool InputFile::isInGroup; uint32_t InputFile::nextGroupId; +std::vector archiveFiles; std::vector binaryFiles; std::vector bitcodeFiles; std::vector lazyObjFiles; @@ -173,6 +174,7 @@ template static void doParseFile(InputFile *file) { // .a file if (auto *f = dyn_cast(file)) { + archiveFiles.push_back(f); f->parse(); return; } @@ -1165,6 +1167,19 @@ void ArchiveFile::fetch(const Archive::Symbol &sym) { parseFile(file); } +size_t ArchiveFile::getMemberCount() const { + size_t count = 0; + Error err = Error::success(); + for (const Archive::Child &c : file->children(err)) { + (void)c; + ++count; + } + // This function is used by --print-archive-stats=, where an error does not + // really matter. + consumeError(std::move(err)); + return count; +} + unsigned SharedFile::vernauxNum; // Parse the version definitions in the object file if present, and return a diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index d08c533e047bb..48146e60e63e8 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -326,6 +326,9 @@ class ArchiveFile : public InputFile { // more than once.) void fetch(const Archive::Symbol &sym); + size_t getMemberCount() const; + size_t getFetchedMemberCount() const { return seen.size(); } + private: std::unique_ptr file; llvm::DenseSet seen; @@ -387,6 +390,7 @@ inline bool isBitcode(MemoryBufferRef mb) { std::string replaceThinLTOSuffix(StringRef path); +extern std::vector archiveFiles; extern std::vector binaryFiles; extern std::vector bitcodeFiles; extern std::vector lazyObjFiles; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index b9b6fef9dc483..dc8c1ee0c56df 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -45,6 +45,7 @@ std::string toString(const elf::InputSectionBase *sec) { namespace elf { std::vector inputSections; +DenseSet> ppc64noTocRelax; template static ArrayRef getSectionContents(ObjFile &file, @@ -970,7 +971,13 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { target->relaxGot(bufLoc, rel, targetVA); break; case R_PPC64_RELAX_TOC: - if (!tryRelaxPPC64TocIndirection(rel, bufLoc)) + // rel.sym refers to the STT_SECTION symbol associated to the .toc input + // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC + // entry, there may be R_PPC64_TOC16_HA not paired with + // R_PPC64_TOC16_LO_DS. Don't relax. This loses some relaxation + // opportunities but is safe. + if (ppc64noTocRelax.count({rel.sym, rel.addend}) || + !tryRelaxPPC64TocIndirection(rel, bufLoc)) target->relocate(bufLoc, rel, targetVA); break; case R_RELAX_TLS_IE_TO_LE: diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 719971b9c72a1..112c6ab49a38d 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -397,6 +397,11 @@ inline bool isDebugSection(const InputSectionBase &sec) { // The list of all input sections. extern std::vector inputSections; +// The set of TOC entries (.toc + addend) for which we should not apply +// toc-indirect to toc-relative relaxation. const Symbol * refers to the +// STT_SECTION symbol associated to the .toc input section. +extern llvm::DenseSet> ppc64noTocRelax; + } // namespace elf std::string toString(const elf::InputSectionBase *); diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index d09f2a74e48cf..503a9c4f20f83 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -138,6 +138,7 @@ static lto::Config createConfig() { c.DwoDir = std::string(config->dwoDir); c.HasWholeProgramVisibility = config->ltoWholeProgramVisibility; + c.AlwaysEmitRegularLTOObj = !config->ltoObjPath.empty(); c.TimeTraceEnabled = config->timeTraceEnabled; c.TimeTraceGranularity = config->timeTraceGranularity; @@ -153,6 +154,9 @@ static lto::Config createConfig() { }; } + if (config->ltoEmitAsm) + c.CGFileType = CGFT_AssemblyFile; + if (config->saveTemps) checkError(c.addSaveTemps(config->outputFile.str() + ".", /*UseInputModulePath*/ true)); @@ -324,11 +328,19 @@ std::vector BitcodeCompiler::compile() { } if (config->saveTemps) { - saveBuffer(buf[0], config->outputFile + ".lto.o"); + if (!buf[0].empty()) + saveBuffer(buf[0], config->outputFile + ".lto.o"); for (unsigned i = 1; i != maxTasks; ++i) saveBuffer(buf[i], config->outputFile + Twine(i) + ".lto.o"); } + if (config->ltoEmitAsm) { + saveBuffer(buf[0], config->outputFile); + for (unsigned i = 1; i != maxTasks; ++i) + saveBuffer(buf[i], config->outputFile + Twine(i)); + return {}; + } + std::vector ret; for (unsigned i = 0; i != maxTasks; ++i) if (!buf[i].empty()) diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp index e5f5c4f4ff23f..42181574d1061 100644 --- a/lld/ELF/MapFile.cpp +++ b/lld/ELF/MapFile.cpp @@ -259,5 +259,23 @@ void writeCrossReferenceTable() { } } +void writeArchiveStats() { + if (config->printArchiveStats.empty()) + return; + + std::error_code ec; + raw_fd_ostream os(config->printArchiveStats, ec, sys::fs::OF_None); + if (ec) { + error("--print-archive-stats=: cannot open " + config->printArchiveStats + + ": " + ec.message()); + return; + } + + os << "members\tfetched\tarchive\n"; + for (const ArchiveFile *f : archiveFiles) + os << f->getMemberCount() << '\t' << f->getFetchedMemberCount() << '\t' + << f->getName() << '\n'; +} + } // namespace elf } // namespace lld diff --git a/lld/ELF/MapFile.h b/lld/ELF/MapFile.h index 7e7938919edfb..c4da18f8ad7f9 100644 --- a/lld/ELF/MapFile.h +++ b/lld/ELF/MapFile.h @@ -13,6 +13,7 @@ namespace lld { namespace elf { void writeMapFile(); void writeCrossReferenceTable(); +void writeArchiveStats(); } // namespace elf } // namespace lld diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 799707af49fae..ae32cab5c33b8 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -256,9 +256,6 @@ def no_nmagic: F<"no-nmagic">, MetaVarName<"">, def no_omagic: F<"no-omagic">, MetaVarName<"">, HelpText<"Do not set the text data sections to be writable, page align sections (default)">; -def no_rosegment: F<"no-rosegment">, - HelpText<"Do not put read-only non-executable sections in their own segment">; - def no_undefined: F<"no-undefined">, HelpText<"Report unresolved symbols even if the linker is creating a shared library">; @@ -297,6 +294,10 @@ defm print_icf_sections: B<"print-icf-sections", "List identical folded sections", "Do not list identical folded sections (default)">; +def print_archive_stats: J<"print-archive-stats=">, + HelpText<"Write archive usage statistics to the specified file. " + "Print the numbers of members and fetched members for each archive">; + defm print_symbol_order: Eq<"print-symbol-order", "Print a symbol order specified by --call-graph-ordering-file into the specified file">; @@ -311,6 +312,10 @@ def print_map: F<"print-map">, defm reproduce: Eq<"reproduce", "Write a tar file containing input files and command line options to reproduce link">; +defm rosegment: B<"rosegment", + "Put read-only non-executable sections in their own segment (default)", + "Do not put read-only non-executable sections in their own segment">; + defm rpath: Eq<"rpath", "Add a DT_RUNPATH to the output">; def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">; @@ -483,6 +488,8 @@ def lto_aa_pipeline: J<"lto-aa-pipeline=">, HelpText<"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes">; def lto_debug_pass_manager: F<"lto-debug-pass-manager">, HelpText<"Debug new pass manager">; +def lto_emit_asm: F<"lto-emit-asm">, + HelpText<"Emit assembly code">; def lto_new_pass_manager: F<"lto-new-pass-manager">, HelpText<"Use new pass manager">; def lto_newpm_passes: J<"lto-newpm-passes=">, @@ -535,6 +542,8 @@ def: F<"plugin-opt=debug-pass-manager">, def: F<"plugin-opt=disable-verify">, Alias, HelpText<"Alias for --disable-verify">; def plugin_opt_dwo_dir_eq: J<"plugin-opt=dwo_dir=">, HelpText<"Directory to store .dwo files when LTO and debug fission are used">; +def plugin_opt_emit_asm: F<"plugin-opt=emit-asm">, + Alias, HelpText<"Alias for --lto-emit-asm">; def plugin_opt_emit_llvm: F<"plugin-opt=emit-llvm">; def: J<"plugin-opt=jobs=">, Alias, HelpText<"Alias for --thinlto-jobs">; def: J<"plugin-opt=lto-partitions=">, Alias, HelpText<"Alias for --lto-partitions">; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 21fec6e44aa0a..eb30166fcc4ca 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1294,17 +1294,6 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, if (expr == R_NONE) return; - // We can separate the small code model relocations into 2 categories: - // 1) Those that access the compiler generated .toc sections. - // 2) Those that access the linker allocated got entries. - // lld allocates got entries to symbols on demand. Since we don't try to sort - // the got entries in any way, we don't have to track which objects have - // got-based small code model relocs. The .toc sections get placed after the - // end of the linker allocated .got section and we do sort those so sections - // addressed with small code model relocations come first. - if (config->emachine == EM_PPC64 && isPPC64SmallCodeModelTocReloc(type)) - sec.file->ppc64SmallCodeModelTocRelocs = true; - if (sym.isGnuIFunc() && !config->zText && config->warnIfuncTextrel) { warn("using ifunc symbols when text relocations are allowed may produce " "a binary that will segfault, if the object file is linked with " @@ -1318,6 +1307,25 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, // Read an addend. int64_t addend = computeAddend(rel, end, sec, expr, sym.isLocal()); + if (config->emachine == EM_PPC64) { + // We can separate the small code model relocations into 2 categories: + // 1) Those that access the compiler generated .toc sections. + // 2) Those that access the linker allocated got entries. + // lld allocates got entries to symbols on demand. Since we don't try to + // sort the got entries in any way, we don't have to track which objects + // have got-based small code model relocs. The .toc sections get placed + // after the end of the linker allocated .got section and we do sort those + // so sections addressed with small code model relocations come first. + if (isPPC64SmallCodeModelTocReloc(type)) + sec.file->ppc64SmallCodeModelTocRelocs = true; + + // Record the TOC entry (.toc + addend) as not relaxable. See the comment in + // InputSectionBase::relocateAlloc(). + if (type == R_PPC64_TOC16_LO && sym.isSection() && isa(sym) && + cast(sym).section->name == ".toc") + ppc64noTocRelax.insert({&sym, addend}); + } + // Relax relocations. // // If we know that a PLT entry will be resolved within the same ELF module, we @@ -1336,8 +1344,11 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, addend &= ~0x8000; // R_HEX_GD_PLT_B22_PCREL (call a@GDPLT) is transformed into // call __tls_get_addr even if the symbol is non-preemptible. - if (!(config->emachine == EM_HEXAGON && type == R_HEX_GD_PLT_B22_PCREL)) - expr = fromPlt(expr); + if (!(config->emachine == EM_HEXAGON && + (type == R_HEX_GD_PLT_B22_PCREL || + type == R_HEX_GD_PLT_B22_PCREL_X || + type == R_HEX_GD_PLT_B32_PCREL_X))) + expr = fromPlt(expr); } } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index e1322cdb69f08..bffea2352489c 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -3363,8 +3363,19 @@ void ARMExidxSyntheticSection::finalizeContents() { // ICF may remove executable InputSections and their dependent .ARM.exidx // section that we recorded earlier. auto isDiscarded = [](const InputSection *isec) { return !isec->isLive(); }; - llvm::erase_if(executableSections, isDiscarded); llvm::erase_if(exidxSections, isDiscarded); + // We need to remove discarded InputSections and InputSections without + // .ARM.exidx sections that if we generated the .ARM.exidx it would be out + // of range. + auto isDiscardedOrOutOfRange = [this](InputSection *isec) { + if (!isec->isLive()) + return true; + if (findExidxSection(isec)) + return false; + int64_t off = static_cast(isec->getVA() - getVA()); + return off != llvm::SignExtend64(off, 31); + }; + llvm::erase_if(executableSections, isDiscardedOrOutOfRange); // Sort the executable sections that may or may not have associated // .ARM.exidx sections by order of ascending address. This requires the diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 03fe0fe92aa0b..5fa4c44355f48 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -153,14 +153,23 @@ template void writeResult() { } static void removeEmptyPTLoad(std::vector &phdrs) { - llvm::erase_if(phdrs, [&](const PhdrEntry *p) { - if (p->p_type != PT_LOAD) - return false; - if (!p->firstSec) - return true; - uint64_t size = p->lastSec->addr + p->lastSec->size - p->firstSec->addr; - return size == 0; - }); + auto it = std::stable_partition( + phdrs.begin(), phdrs.end(), [&](const PhdrEntry *p) { + if (p->p_type != PT_LOAD) + return true; + if (!p->firstSec) + return false; + uint64_t size = p->lastSec->addr + p->lastSec->size - p->firstSec->addr; + return size != 0; + }); + + // Clear OutputSection::ptLoad for sections contained in removed + // segments. + DenseSet removed(it, phdrs.end()); + for (OutputSection *sec : outputSections) + if (removed.count(sec->ptLoad)) + sec->ptLoad = nullptr; + phdrs.erase(it, phdrs.end()); } void copySectionsIntoPartitions() { @@ -601,11 +610,13 @@ template void Writer::run() { for (OutputSection *sec : outputSections) sec->addr = 0; - // Handle --print-map(-M)/--Map and --cref. Dump them before checkSections() - // because the files may be useful in case checkSections() or openFile() - // fails, for example, due to an erroneous file size. + // Handle --print-map(-M)/--Map, --cref and --print-archive-stats=. Dump them + // before checkSections() because the files may be useful in case + // checkSections() or openFile() fails, for example, due to an erroneous file + // size. writeMapFile(); writeCrossReferenceTable(); + writeArchiveStats(); if (config->checkSections) checkSections(); @@ -681,6 +692,15 @@ static bool shouldKeepInSymtab(const Defined &sym) { if (config->copyRelocs && sym.used) return true; + // Exclude local symbols pointing to .ARM.exidx sections. + // They are probably mapping symbols "$d", which are optional for these + // sections. After merging the .ARM.exidx sections, some of these symbols + // may become dangling. The easiest way to avoid the issue is not to add + // them to the symbol table from the beginning. + if (config->emachine == EM_ARM && sym.section && + sym.section->type == SHT_ARM_EXIDX) + return false; + if (config->discard == DiscardPolicy::None) return true; if (config->discard == DiscardPolicy::All) @@ -1585,7 +1605,7 @@ static bool compareByFilePosition(InputSection *a, InputSection *b) { OutputSection *bOut = lb->getParent(); if (aOut != bOut) - return aOut->sectionIndex < bOut->sectionIndex; + return aOut->addr < bOut->addr; return la->outSecOff < lb->outSecOff; } @@ -1655,11 +1675,13 @@ template void Writer::finalizeAddressDependentContent() { AArch64Err843419Patcher a64p; ARMErr657417Patcher a32p; script->assignAddresses(); - // .ARM.exidx does not require precise addresses, but it does require the - // relative addresses of OutputSections because linker scripts can assign - // Virtual Addresses to OutputSections that are not monotonically increasing. + // .ARM.exidx and SHF_LINK_ORDER do not require precise addresses, but they + // do require the relative addresses of OutputSections because linker scripts + // can assign Virtual Addresses to OutputSections that are not monotonically + // increasing. for (Partition &part : partitions) finalizeSynthetic(part.armExidx); + resolveShfLinkOrder(); // Converts call x@GDPLT to call __tls_get_addr if (config->emachine == EM_HEXAGON) @@ -1922,6 +1944,7 @@ template void Writer::finalizeSections() { // we can correctly decide if a dynamic relocation is needed. This is called // after processSymbolAssignments() because it needs to know whether a // linker-script-defined symbol is absolute. + ppc64noTocRelax.clear(); if (!config->relocatable) { forEachRelSec(scanRelocations); reportUndefinedSymbols(); @@ -2092,12 +2115,6 @@ template void Writer::finalizeSections() { if (!script->hasSectionsCommand && !config->relocatable) fixSectionAlignments(); - // SHFLinkOrder processing must be processed after relative section placements are - // known but before addresses are allocated. - resolveShfLinkOrder(); - if (errorCount()) - return; - // This is used to: // 1) Create "thunks": // Jump instructions in many ISAs have small displacements, and therefore @@ -2120,6 +2137,8 @@ template void Writer::finalizeSections() { // sometimes using forward symbol declarations. We want to set the correct // values. They also might change after adding the thunks. finalizeAddressDependentContent(); + if (errorCount()) + return; // finalizeAddressDependentContent may have added local symbols to the static symbol table. finalizeSynthetic(in.symTab); diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp index 39dd644d5d1cc..6c13ac53c747e 100644 --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -31,7 +31,11 @@ X86_64::X86_64() { uint64_t X86_64::getImplicitAddend(const uint8_t *loc, uint8_t type) const { switch (type) { + case X86_64_RELOC_BRANCH: case X86_64_RELOC_SIGNED: + case X86_64_RELOC_SIGNED_1: + case X86_64_RELOC_SIGNED_2: + case X86_64_RELOC_SIGNED_4: case X86_64_RELOC_GOT_LOAD: return read32le(loc); default: @@ -42,7 +46,11 @@ uint64_t X86_64::getImplicitAddend(const uint8_t *loc, uint8_t type) const { void X86_64::relocateOne(uint8_t *loc, uint8_t type, uint64_t val) const { switch (type) { + case X86_64_RELOC_BRANCH: case X86_64_RELOC_SIGNED: + case X86_64_RELOC_SIGNED_1: + case X86_64_RELOC_SIGNED_2: + case X86_64_RELOC_SIGNED_4: case X86_64_RELOC_GOT_LOAD: // These types are only used for pc-relative relocations, so offset by 4 // since the RIP has advanced by 4 at this point. diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt index 0ed841fb15e04..a4dc69ea35934 100644 --- a/lld/MachO/CMakeLists.txt +++ b/lld/MachO/CMakeLists.txt @@ -5,8 +5,11 @@ add_public_tablegen_target(MachOOptionsTableGen) add_lld_library(lldMachO2 Arch/X86_64.cpp Driver.cpp + ExportTrie.cpp InputFiles.cpp InputSection.cpp + MergedOutputSection.cpp + OutputSection.cpp OutputSegment.cpp SymbolTable.cpp Symbols.cpp diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 7542de0521695..5a3566fd09ce2 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -10,6 +10,7 @@ #define LLD_MACHO_CONFIG_H #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" #include @@ -19,9 +20,10 @@ namespace macho { class Symbol; struct Configuration { - llvm::StringRef outputFile; Symbol *entry; - + llvm::MachO::HeaderFileType outputType; + llvm::StringRef installName; + llvm::StringRef outputFile; std::vector searchPaths; }; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 2dd58dc02094d..aabdc2ed5320a 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -9,6 +9,7 @@ #include "Driver.h" #include "Config.h" #include "InputFiles.h" +#include "OutputSection.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" @@ -119,6 +120,9 @@ bool macho::link(llvm::ArrayRef argsArr, bool canExitEarly, lld::stdoutOS = &stdoutOS; lld::stderrOS = &stderrOS; + stderrOS.enable_colors(stderrOS.has_colors()); + // TODO: Set up error handler properly, e.g. the errorLimitExceededMsg + MachOOptTable parser; opt::InputArgList args = parser.parse(argsArr.slice(1)); @@ -128,7 +132,10 @@ bool macho::link(llvm::ArrayRef argsArr, bool canExitEarly, config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main")); config->outputFile = args.getLastArgValue(OPT_o, "a.out"); + config->installName = + args.getLastArgValue(OPT_install_name, config->outputFile); config->searchPaths = getSearchPaths(args); + config->outputType = args.hasArg(OPT_dylib) ? MH_DYLIB : MH_EXECUTE; if (args.hasArg(OPT_v)) { message(getLLDVersion()); @@ -139,10 +146,6 @@ bool macho::link(llvm::ArrayRef argsArr, bool canExitEarly, return !errorCount(); } - getOrCreateOutputSegment("__TEXT", VM_PROT_READ | VM_PROT_EXECUTE); - getOrCreateOutputSegment("__DATA", VM_PROT_READ | VM_PROT_WRITE); - getOrCreateOutputSegment("__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE); - for (opt::Arg *arg : args) { switch (arg->getOption().getID()) { case OPT_INPUT: @@ -155,7 +158,7 @@ bool macho::link(llvm::ArrayRef argsArr, bool canExitEarly, } } - if (!isa(config->entry)) { + if (config->outputType == MH_EXECUTE && !isa(config->entry)) { error("undefined symbol: " + config->entry->getName()); return false; } @@ -167,14 +170,6 @@ bool macho::link(llvm::ArrayRef argsArr, bool canExitEarly, for (InputSection *sec : file->sections) inputSections.push_back(sec); - // Add input sections to output segments. - for (InputSection *isec : inputSections) { - OutputSegment *os = - getOrCreateOutputSegment(isec->segname, VM_PROT_READ | VM_PROT_WRITE); - isec->parent = os; - os->sections[isec->name].push_back(isec); - } - // Write to an output file. writeResult(); diff --git a/lld/MachO/ExportTrie.cpp b/lld/MachO/ExportTrie.cpp new file mode 100644 index 0000000000000..871cf334d616f --- /dev/null +++ b/lld/MachO/ExportTrie.cpp @@ -0,0 +1,236 @@ +//===- ExportTrie.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a partial implementation of the Mach-O export trie format. It's +// essentially a symbol table encoded as a compressed prefix trie, meaning that +// the common prefixes of each symbol name are shared for a more compact +// representation. The prefixes are stored on the edges of the trie, and one +// edge can represent multiple characters. For example, given two exported +// symbols _bar and _baz, we will have a trie like this (terminal nodes are +// marked with an asterisk): +// +// +-+-+ +// | | // root node +// +-+-+ +// | +// | _ba +// | +// +-+-+ +// | | +// +-+-+ +// r / \ z +// / \ +// +-+-+ +-+-+ +// | * | | * | +// +-+-+ +-+-+ +// +// More documentation of the format can be found in +// llvm/tools/obj2yaml/macho2yaml.cpp. +// +//===----------------------------------------------------------------------===// + +#include "ExportTrie.h" +#include "Symbols.h" + +#include "lld/Common/Memory.h" +#include "llvm/ADT/Optional.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace lld; +using namespace lld::macho; + +namespace { + +struct Edge { + Edge(StringRef s, TrieNode *node) : substring(s), child(node) {} + + StringRef substring; + struct TrieNode *child; +}; + +struct ExportInfo { + uint64_t address; + // TODO: Add proper support for re-exports & stub-and-resolver flags. +}; + +} // namespace + +namespace lld { +namespace macho { + +struct TrieNode { + std::vector edges; + Optional info; + // Estimated offset from the start of the serialized trie to the current node. + // This will converge to the true offset when updateOffset() is run to a + // fixpoint. + size_t offset = 0; + + // Returns whether the new estimated offset differs from the old one. + bool updateOffset(size_t &nextOffset); + void writeTo(uint8_t *buf) const; +}; + +bool TrieNode::updateOffset(size_t &nextOffset) { + // Size of the whole node (including the terminalSize and the outgoing edges.) + // In contrast, terminalSize only records the size of the other data in the + // node. + size_t nodeSize; + if (info) { + uint64_t flags = 0; + uint32_t terminalSize = + getULEB128Size(flags) + getULEB128Size(info->address); + // Overall node size so far is the uleb128 size of the length of the symbol + // info + the symbol info itself. + nodeSize = terminalSize + getULEB128Size(terminalSize); + } else { + nodeSize = 1; // Size of terminalSize (which has a value of 0) + } + // Compute size of all child edges. + ++nodeSize; // Byte for number of children. + for (Edge &edge : edges) { + nodeSize += edge.substring.size() + 1 // String length. + + getULEB128Size(edge.child->offset); // Offset len. + } + // On input, 'nextOffset' is the new preferred location for this node. + bool result = (offset != nextOffset); + // Store new location in node object for use by parents. + offset = nextOffset; + nextOffset += nodeSize; + return result; +} + +void TrieNode::writeTo(uint8_t *buf) const { + buf += offset; + if (info) { + // TrieNodes with Symbol info: size, flags address + uint64_t flags = 0; // TODO: emit proper flags + uint32_t terminalSize = + getULEB128Size(flags) + getULEB128Size(info->address); + buf += encodeULEB128(terminalSize, buf); + buf += encodeULEB128(flags, buf); + buf += encodeULEB128(info->address, buf); + } else { + // TrieNode with no Symbol info. + *buf++ = 0; // terminalSize + } + // Add number of children. TODO: Handle case where we have more than 256. + assert(edges.size() < 256); + *buf++ = edges.size(); + // Append each child edge substring and node offset. + for (const Edge &edge : edges) { + memcpy(buf, edge.substring.data(), edge.substring.size()); + buf += edge.substring.size(); + *buf++ = '\0'; + buf += encodeULEB128(edge.child->offset, buf); + } +} + +TrieNode *TrieBuilder::makeNode() { + auto *node = make(); + nodes.emplace_back(node); + return node; +} + +static int charAt(const Symbol *sym, size_t pos) { + StringRef str = sym->getName(); + if (pos >= str.size()) + return -1; + return str[pos]; +} + +// Build the trie by performing a three-way radix quicksort: We start by sorting +// the strings by their first characters, then sort the strings with the same +// first characters by their second characters, and so on recursively. Each +// time the prefixes diverge, we add a node to the trie. +// +// node: The most recently created node along this path in the trie (i.e. +// the furthest from the root.) +// lastPos: The prefix length of the most recently created node, i.e. the number +// of characters along its path from the root. +// pos: The string index we are currently sorting on. Note that each symbol +// S contained in vec has the same prefix S[0...pos). +void TrieBuilder::sortAndBuild(MutableArrayRef vec, + TrieNode *node, size_t lastPos, size_t pos) { +tailcall: + if (vec.empty()) + return; + + // Partition items so that items in [0, i) are less than the pivot, + // [i, j) are the same as the pivot, and [j, vec.size()) are greater than + // the pivot. + const Symbol *pivotSymbol = vec[vec.size() / 2]; + int pivot = charAt(pivotSymbol, pos); + size_t i = 0; + size_t j = vec.size(); + for (size_t k = 0; k < j;) { + int c = charAt(vec[k], pos); + if (c < pivot) + std::swap(vec[i++], vec[k++]); + else if (c > pivot) + std::swap(vec[--j], vec[k]); + else + k++; + } + + bool isTerminal = pivot == -1; + bool prefixesDiverge = i != 0 || j != vec.size(); + if (lastPos != pos && (isTerminal || prefixesDiverge)) { + TrieNode *newNode = makeNode(); + node->edges.emplace_back(pivotSymbol->getName().slice(lastPos, pos), + newNode); + node = newNode; + lastPos = pos; + } + + sortAndBuild(vec.slice(0, i), node, lastPos, pos); + sortAndBuild(vec.slice(j), node, lastPos, pos); + + if (isTerminal) { + assert(j - i == 1); // no duplicate symbols + node->info = {pivotSymbol->getVA()}; + } else { + // This is the tail-call-optimized version of the following: + // sortAndBuild(vec.slice(i, j - i), node, lastPos, pos + 1); + vec = vec.slice(i, j - i); + ++pos; + goto tailcall; + } +} + +size_t TrieBuilder::build() { + if (exported.empty()) + return 0; + + TrieNode *root = makeNode(); + sortAndBuild(exported, root, 0, 0); + + // Assign each node in the vector an offset in the trie stream, iterating + // until all uleb128 sizes have stabilized. + size_t offset; + bool more; + do { + offset = 0; + more = false; + for (TrieNode *node : nodes) + more |= node->updateOffset(offset); + } while (more); + + return offset; +} + +void TrieBuilder::writeTo(uint8_t *buf) const { + for (TrieNode *node : nodes) + node->writeTo(buf); +} + +} // namespace macho +} // namespace lld diff --git a/lld/MachO/ExportTrie.h b/lld/MachO/ExportTrie.h new file mode 100644 index 0000000000000..a85728c599557 --- /dev/null +++ b/lld/MachO/ExportTrie.h @@ -0,0 +1,41 @@ +//===- ExportTrie.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_EXPORT_TRIE_H +#define LLD_MACHO_EXPORT_TRIE_H + +#include "llvm/ADT/ArrayRef.h" + +#include + +namespace lld { +namespace macho { + +struct TrieNode; +class Symbol; + +class TrieBuilder { +public: + void addSymbol(const Symbol &sym) { exported.push_back(&sym); } + // Returns the size in bytes of the serialized trie. + size_t build(); + void writeTo(uint8_t *buf) const; + +private: + TrieNode *makeNode(); + void sortAndBuild(llvm::MutableArrayRef vec, TrieNode *node, + size_t lastPos, size_t pos); + + std::vector exported; + std::vector nodes; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 7794f3e893fdb..02e7d1c55221f 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -43,7 +43,7 @@ #include "InputFiles.h" #include "InputSection.h" -#include "OutputSegment.h" +#include "OutputSection.h" #include "SymbolTable.h" #include "Symbols.h" #include "Target.h" @@ -81,7 +81,31 @@ Optional macho::readFile(StringRef path) { if (read32be(&hdr->magic) != MachO::FAT_MAGIC) return mbref; - error("TODO: Add support for universal binaries"); + // Object files and archive files may be fat files, which contains + // multiple real files for different CPU ISAs. Here, we search for a + // file that matches with the current link target and returns it as + // a MemoryBufferRef. + auto *arch = reinterpret_cast(buf + sizeof(*hdr)); + + for (uint32_t i = 0, n = read32be(&hdr->nfat_arch); i < n; ++i) { + if (reinterpret_cast(arch + i + 1) > + buf + mbref.getBufferSize()) { + error(path + ": fat_arch struct extends beyond end of file"); + return None; + } + + if (read32be(&arch[i].cputype) != target->cpuType || + read32be(&arch[i].cpusubtype) != target->cpuSubtype) + continue; + + uint32_t offset = read32be(&arch[i].offset); + uint32_t size = read32be(&arch[i].size); + if (offset + size > mbref.getBufferSize()) + error(path + ": slice extends beyond end of file"); + return MemoryBufferRef(StringRef(buf + offset, size), path.copy(bAlloc)); + } + + error("unable to find matching architecture in " + path); return None; } diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 4974d81c9ac72..55e17b6065497 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputSection.h" +#include "OutputSegment.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -20,26 +21,33 @@ using namespace lld::macho; std::vector macho::inputSections; +uint64_t InputSection::getFileOffset() const { + return parent->fileOff + outSecFileOff; +} + +uint64_t InputSection::getVA() const { return parent->addr + outSecOff; } + void InputSection::writeTo(uint8_t *buf) { - memcpy(buf, data.data(), data.size()); + if (!data.empty()) + memcpy(buf, data.data(), data.size()); for (Reloc &r : relocs) { uint64_t va = 0; if (auto *s = r.target.dyn_cast()) { if (auto *dylibSymbol = dyn_cast(s)) { - va = in.got->addr - ImageBase + dylibSymbol->gotIndex * WordSize; + va = in.got->addr + dylibSymbol->gotIndex * WordSize; } else { va = s->getVA(); } } else if (auto *isec = r.target.dyn_cast()) { - va = isec->addr; + va = isec->getVA(); } else { llvm_unreachable("Unknown relocation target"); } uint64_t val = va + r.addend; if (1) // TODO: handle non-pcrel relocations - val -= addr - ImageBase + r.offset; + val -= getVA() + r.offset; target->relocateOne(buf + r.offset, r.type, val); } } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index fea88ea1d2beb..a945b79ad3a0d 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -19,7 +19,7 @@ namespace macho { class InputFile; class InputSection; -class OutputSegment; +class OutputSection; class Symbol; struct Reloc { @@ -32,19 +32,25 @@ struct Reloc { class InputSection { public: virtual ~InputSection() = default; - virtual void writeTo(uint8_t *buf); virtual size_t getSize() const { return data.size(); } + virtual uint64_t getFileSize() const { return getSize(); } + uint64_t getFileOffset() const; + uint64_t getVA() const; + + virtual void writeTo(uint8_t *buf); InputFile *file = nullptr; - OutputSegment *parent = nullptr; StringRef name; StringRef segname; - ArrayRef data; - uint64_t addr = 0; + OutputSection *parent = nullptr; + uint64_t outSecOff = 0; + uint64_t outSecFileOff = 0; + uint32_t align = 1; uint32_t flags = 0; + ArrayRef data; std::vector relocs; }; diff --git a/lld/MachO/MergedOutputSection.cpp b/lld/MachO/MergedOutputSection.cpp new file mode 100644 index 0000000000000..1983736d78794 --- /dev/null +++ b/lld/MachO/MergedOutputSection.cpp @@ -0,0 +1,72 @@ +//===- OutputSection.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MergedOutputSection.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "llvm/BinaryFormat/MachO.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace lld; +using namespace lld::macho; + +void MergedOutputSection::mergeInput(InputSection *input) { + if (inputs.empty()) { + align = input->align; + flags = input->flags; + } else { + mergeFlags(input->flags); + align = std::max(align, input->align); + } + + inputs.push_back(input); + input->parent = this; +} + +void MergedOutputSection::finalize() { + uint64_t isecAddr = addr; + uint64_t isecFileOff = fileOff; + for (InputSection *i : inputs) { + i->outSecOff = alignTo(isecAddr, i->align) - addr; + i->outSecFileOff = alignTo(isecFileOff, i->align) - fileOff; + isecAddr += i->getSize(); + isecFileOff += i->getFileSize(); + } + size = isecAddr - addr; + fileSize = isecFileOff - fileOff; +} + +void MergedOutputSection::writeTo(uint8_t *buf) const { + for (InputSection *isec : inputs) { + isec->writeTo(buf + isec->outSecFileOff); + } +} + +// TODO: this is most likely wrong; reconsider how section flags +// are actually merged. The logic presented here was written without +// any form of informed research. +void MergedOutputSection::mergeFlags(uint32_t inputFlags) { + uint8_t sectionFlag = MachO::SECTION_TYPE & inputFlags; + if (sectionFlag != (MachO::SECTION_TYPE & flags)) + error("Cannot add merge section; inconsistent type flags " + + Twine(sectionFlag)); + + uint32_t inconsistentFlags = + MachO::S_ATTR_DEBUG | MachO::S_ATTR_STRIP_STATIC_SYMS | + MachO::S_ATTR_NO_DEAD_STRIP | MachO::S_ATTR_LIVE_SUPPORT; + if ((inputFlags ^ flags) & inconsistentFlags) + error("Cannot add merge section; cannot merge inconsistent flags"); + + // Negate pure instruction presence if any section isn't pure. + uint32_t pureMask = ~MachO::S_ATTR_PURE_INSTRUCTIONS | (inputFlags & flags); + + // Merge the rest + flags |= inputFlags; + flags &= pureMask; +} diff --git a/lld/MachO/MergedOutputSection.h b/lld/MachO/MergedOutputSection.h new file mode 100644 index 0000000000000..272b7525b3bc3 --- /dev/null +++ b/lld/MachO/MergedOutputSection.h @@ -0,0 +1,51 @@ +//===- OutputSection.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_MERGED_OUTPUT_SECTION_H +#define LLD_MACHO_MERGED_OUTPUT_SECTION_H + +#include "InputSection.h" +#include "OutputSection.h" +#include "lld/Common/LLVM.h" + +namespace lld { +namespace macho { + +// Linking multiple files will inevitably mean resolving sections in different +// files that are labeled with the same segment and section name. This class +// contains all such sections and writes the data from each section sequentially +// in the final binary. +class MergedOutputSection : public OutputSection { +public: + MergedOutputSection(StringRef name) : OutputSection(name) {} + + const InputSection *firstSection() const { return inputs.front(); } + const InputSection *lastSection() const { return inputs.back(); } + + // These accessors will only be valid after finalizing the section + size_t getSize() const override { return size; } + uint64_t getFileSize() const override { return fileSize; } + + void mergeInput(InputSection *input) override; + void finalize() override; + + void writeTo(uint8_t *buf) const override; + + std::vector inputs; + +private: + void mergeFlags(uint32_t inputFlags); + + size_t size = 0; + uint64_t fileSize = 0; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 5a291d022edce..8327bb9d4abc7 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -9,8 +9,14 @@ def Z: Flag<["-"], "Z">, def arch: Separate<["-"], "arch">, MetaVarName<"">, HelpText<"Architecture to link">; +def dylib: Flag<["-"], "dylib">, HelpText<"Emit a shared library">; + def e: Separate<["-"], "e">, HelpText<"Name of entry point symbol">; +def install_name: Separate<["-"], "install_name">, + MetaVarName<"">, + HelpText<"Set the install path of the dynamic library.">; + def l: Joined<["-"], "l">, MetaVarName<"">, HelpText<"Base name of library searched for in -L directories">; diff --git a/lld/MachO/OutputSection.cpp b/lld/MachO/OutputSection.cpp new file mode 100644 index 0000000000000..d4c24f6b73455 --- /dev/null +++ b/lld/MachO/OutputSection.cpp @@ -0,0 +1,23 @@ +//===- OutputSection.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "OutputSection.h" +#include "OutputSegment.h" +#include "lld/Common/ErrorHandler.h" + +using namespace llvm; +using namespace lld; +using namespace lld::macho; + +uint64_t OutputSection::getSegmentOffset() const { + return addr - parent->firstSection()->addr; +} + +void OutputSection::mergeInput(InputSection *input) { + llvm_unreachable("Cannot merge input section into unmergable output section"); +} diff --git a/lld/MachO/OutputSection.h b/lld/MachO/OutputSection.h new file mode 100644 index 0000000000000..90f8a841a7f29 --- /dev/null +++ b/lld/MachO/OutputSection.h @@ -0,0 +1,100 @@ +//===- OutputSection.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_OUTPUT_SECTION_H +#define LLD_MACHO_OUTPUT_SECTION_H + +#include "lld/Common/LLVM.h" +#include "llvm/ADT/DenseMap.h" + +namespace lld { +namespace macho { + +class InputSection; +class OutputSegment; + +// Output sections represent the finalized sections present within the final +// linked executable. They can represent special sections (like the symbol +// table), or represent coalesced sections from the various inputs given to the +// linker with the same segment / section name. +class OutputSection { +public: + OutputSection(StringRef name) : name(name) {} + virtual ~OutputSection() = default; + + // These accessors will only be valid after finalizing the section. + uint64_t getSegmentOffset() const; + + // How much space the section occupies in the address space. + virtual size_t getSize() const = 0; + // How much space the section occupies in the file. Most sections are copied + // as-is so their file size is the same as their address space size. + virtual uint64_t getFileSize() const { return getSize(); } + + // Hidden sections omit header content, but body content is still present. + virtual bool isHidden() const { return !this->isNeeded(); } + // Unneeded sections are omitted entirely (header and body). + virtual bool isNeeded() const { return true; } + + // Some sections may allow coalescing other raw input sections. + virtual void mergeInput(InputSection *input); + + // Specifically finalizes addresses and section size, not content. + virtual void finalize() { + // TODO investigate refactoring synthetic section finalization logic into + // overrides of this function. + } + + virtual void writeTo(uint8_t *buf) const = 0; + + StringRef name; + OutputSegment *parent = nullptr; + + uint32_t index = 0; + uint64_t addr = 0; + uint64_t fileOff = 0; + uint32_t align = 1; + uint32_t flags = 0; +}; + +class OutputSectionComparator { +public: + OutputSectionComparator(uint32_t segmentOrder, + const std::vector §Ordering) + : segmentOrder(segmentOrder) { + for (uint32_t j = 0, m = sectOrdering.size(); j < m; ++j) + sectionOrdering[sectOrdering[j]] = j; + } + + uint32_t sectionOrder(StringRef secname) { + auto sectIt = sectionOrdering.find(secname); + if (sectIt != sectionOrdering.end()) + return sectIt->second; + return sectionOrdering.size(); + } + + // Sort sections within a common segment, which stores them in + // a MapVector of section name -> section + bool operator()(const std::pair &a, + const std::pair &b) { + return sectionOrder(a.first) < sectionOrder(b.first); + } + + bool operator<(const OutputSectionComparator &b) { + return segmentOrder < b.segmentOrder; + } + +private: + uint32_t segmentOrder; + llvm::DenseMap sectionOrdering; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp index 75f5c20146b60..1512e3f302f8f 100644 --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -7,24 +7,140 @@ //===----------------------------------------------------------------------===// #include "OutputSegment.h" +#include "InputSection.h" +#include "MergedOutputSection.h" +#include "SyntheticSections.h" + +#include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "llvm/BinaryFormat/MachO.h" using namespace llvm; +using namespace llvm::MachO; using namespace lld; using namespace lld::macho; -std::vector macho::outputSegments; +static uint32_t initProt(StringRef name) { + if (name == segment_names::text) + return VM_PROT_READ | VM_PROT_EXECUTE; + if (name == segment_names::pageZero) + return 0; + if (name == segment_names::linkEdit) + return VM_PROT_READ; + return VM_PROT_READ | VM_PROT_WRITE; +} + +static uint32_t maxProt(StringRef name) { + if (name == segment_names::pageZero) + return 0; + return VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; +} + +size_t OutputSegment::numNonHiddenSections() const { + size_t count = 0; + for (const OutputSegment::SectionMapEntry &i : sections) { + OutputSection *os = i.second; + count += (os->isHidden() ? 0 : 1); + } + return count; +} -OutputSegment *macho::getOrCreateOutputSegment(StringRef name, uint32_t perms) { - for (OutputSegment *os : outputSegments) - if (os->name == name) - // TODO: assert that os->perms == perms, once we figure out what to do - // about default-created segments. - return os; - - auto *os = make(); - os->name = name; - os->perms = perms; - outputSegments.push_back(os); +void OutputSegment::addOutputSection(OutputSection *os) { + os->parent = this; + std::pair result = + sections.insert(SectionMapEntry(os->name, os)); + if (!result.second) { + llvm_unreachable("Attempted to set section, but a section with the same " + "name already exists"); + } +} + +OutputSection *OutputSegment::getOrCreateOutputSection(StringRef name) { + OutputSegment::SectionMap::iterator i = sections.find(name); + if (i != sections.end()) { + return i->second; + } + + auto *os = make(name); + addOutputSection(os); return os; } + +void OutputSegment::sortOutputSections(OutputSegmentComparator *comparator) { + llvm::stable_sort(sections, *comparator->sectionComparator(this)); +} + +OutputSegmentComparator::OutputSegmentComparator() { + // This defines the order of segments and the sections within each segment. + // Segments that are not mentioned here will end up at defaultPosition; + // sections that are not mentioned will end up at the end of the section + // list for their given segment. + std::vector>> ordering{ + {segment_names::pageZero, {}}, + {segment_names::text, {section_names::header}}, + {defaultPosition, {}}, + // Make sure __LINKEDIT is the last segment (i.e. all its hidden + // sections must be ordered after other sections). + {segment_names::linkEdit, + { + section_names::binding, + section_names::export_, + section_names::symbolTable, + section_names::stringTable, + }}, + }; + + for (uint32_t i = 0, n = ordering.size(); i < n; ++i) { + auto &p = ordering[i]; + StringRef segname = p.first; + const std::vector §Ordering = p.second; + orderMap.insert(std::pair( + segname, OutputSectionComparator(i, sectOrdering))); + } + + // Cache the position for the default comparator since this is the likely + // scenario. + defaultPositionComparator = &orderMap.find(defaultPosition)->second; +} + +static llvm::DenseMap nameToOutputSegment; +std::vector macho::outputSegments; + +OutputSegment *macho::getOutputSegment(StringRef name) { + return nameToOutputSegment.lookup(name); +} + +OutputSegment *macho::getOrCreateOutputSegment(StringRef name) { + OutputSegment *&segRef = nameToOutputSegment[name]; + if (segRef != nullptr) + return segRef; + + segRef = make(); + segRef->name = name; + segRef->maxProt = maxProt(name); + segRef->initProt = initProt(name); + + outputSegments.push_back(segRef); + return segRef; +} + +void macho::sortOutputSegmentsAndSections() { + // Sorting only can happen once all outputs have been collected. + // Since output sections are grouped by segment, sorting happens + // first over all segments, then over sections per segment. + auto comparator = OutputSegmentComparator(); + llvm::stable_sort(outputSegments, comparator); + + // Now that the output sections are sorted, assign the final + // output section indices. + uint32_t sectionIndex = 0; + for (OutputSegment *seg : outputSegments) { + seg->sortOutputSections(&comparator); + for (auto &p : seg->getSections()) { + OutputSection *section = p.second; + if (!section->isHidden()) { + section->index = ++sectionIndex; + } + } + } +} diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h index 211750de8761e..3a801430245c0 100644 --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -9,29 +9,89 @@ #ifndef LLD_MACHO_OUTPUT_SEGMENT_H #define LLD_MACHO_OUTPUT_SEGMENT_H +#include "OutputSection.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/MapVector.h" namespace lld { namespace macho { +namespace segment_names { + +constexpr const char *text = "__TEXT"; +constexpr const char *pageZero = "__PAGEZERO"; +constexpr const char *linkEdit = "__LINKEDIT"; +constexpr const char *dataConst = "__DATA_CONST"; + +} // namespace segment_names + +class OutputSection; +class OutputSegmentComparator; class InputSection; class OutputSegment { public: - InputSection *firstSection() const { return sections.front().second.at(0); } + using SectionMap = typename llvm::MapVector; + using SectionMapEntry = typename std::pair; + + const OutputSection *firstSection() const { return sections.front().second; } + const OutputSection *lastSection() const { return sections.back().second; } - InputSection *lastSection() const { return sections.back().second.back(); } + bool isNeeded() const { + if (name == segment_names::linkEdit) + return true; + for (const SectionMapEntry &i : sections) { + OutputSection *os = i.second; + if (os->isNeeded()) + return true; + } + return false; + } + OutputSection *getOrCreateOutputSection(StringRef name); + void addOutputSection(OutputSection *os); + void sortOutputSections(OutputSegmentComparator *comparator); + + const SectionMap &getSections() const { return sections; } + size_t numNonHiddenSections() const; + + uint64_t fileOff = 0; StringRef name; - uint32_t perms; + uint32_t maxProt = 0; + uint32_t initProt = 0; uint8_t index; - llvm::MapVector> sections; + +private: + SectionMap sections; +}; + +class OutputSegmentComparator { +public: + OutputSegmentComparator(); + + OutputSectionComparator *sectionComparator(const OutputSegment *os) { + auto it = orderMap.find(os->name); + if (it == orderMap.end()) { + return defaultPositionComparator; + } + return &it->second; + } + + bool operator()(const OutputSegment *a, const OutputSegment *b) { + return *sectionComparator(a) < *sectionComparator(b); + } + +private: + const StringRef defaultPosition = StringRef(); + llvm::DenseMap orderMap; + OutputSectionComparator *defaultPositionComparator; }; extern std::vector outputSegments; -OutputSegment *getOrCreateOutputSegment(StringRef name, uint32_t perms); +OutputSegment *getOutputSegment(StringRef name); +OutputSegment *getOrCreateOutputSegment(StringRef name); +void sortOutputSegmentsAndSections(); } // namespace macho } // namespace lld diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h index 95e7a88a38da5..b4b10c684a81b 100644 --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -11,6 +11,7 @@ #include "lld/Common/LLVM.h" #include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Object/Archive.h" namespace lld { diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h index 65c54feb53728..1f5817117e807 100644 --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -81,7 +81,7 @@ class DylibSymbol : public Symbol { inline uint64_t Symbol::getVA() const { if (auto *d = dyn_cast(this)) - return d->isec->addr + d->value - ImageBase; + return d->isec->getVA() + d->value; return 0; } diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 3212aea4cfc44..c23f6abdb5647 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -7,16 +7,68 @@ //===----------------------------------------------------------------------===// #include "SyntheticSections.h" +#include "Config.h" +#include "ExportTrie.h" +#include "InputFiles.h" +#include "OutputSegment.h" +#include "SymbolTable.h" #include "Symbols.h" +#include "Writer.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; using namespace llvm::MachO; +using namespace llvm::support; namespace lld { namespace macho { -GotSection::GotSection() { - segname = "__DATA_CONST"; - name = "__got"; +SyntheticSection::SyntheticSection(const char *segname, const char *name) + : OutputSection(name) { + // Synthetic sections always know which segment they belong to so hook + // them up when they're made + getOrCreateOutputSegment(segname)->addOutputSection(this); +} + +// dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts +// from the beginning of the file (i.e. the header). +MachHeaderSection::MachHeaderSection() + : SyntheticSection(segment_names::text, section_names::header) {} + +void MachHeaderSection::addLoadCommand(LoadCommand *lc) { + loadCommands.push_back(lc); + sizeOfCmds += lc->getSize(); +} + +size_t MachHeaderSection::getSize() const { + return sizeof(mach_header_64) + sizeOfCmds; +} + +void MachHeaderSection::writeTo(uint8_t *buf) const { + auto *hdr = reinterpret_cast(buf); + hdr->magic = MH_MAGIC_64; + hdr->cputype = CPU_TYPE_X86_64; + hdr->cpusubtype = CPU_SUBTYPE_X86_64_ALL | CPU_SUBTYPE_LIB64; + hdr->filetype = config->outputType; + hdr->ncmds = loadCommands.size(); + hdr->sizeofcmds = sizeOfCmds; + hdr->flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL; + + uint8_t *p = reinterpret_cast(hdr + 1); + for (LoadCommand *lc : loadCommands) { + lc->writeTo(p); + p += lc->getSize(); + } +} + +PageZeroSection::PageZeroSection() + : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} + +GotSection::GotSection() + : SyntheticSection(segment_names::dataConst, section_names::got) { align = 8; flags = S_NON_LAZY_SYMBOL_POINTERS; @@ -30,6 +82,121 @@ void GotSection::addEntry(DylibSymbol &sym) { } } +BindingSection::BindingSection() + : SyntheticSection(segment_names::linkEdit, section_names::binding) {} + +bool BindingSection::isNeeded() const { return in.got->isNeeded(); } + +// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld +// interprets to update a record with the following fields: +// * segment index (of the segment to write the symbol addresses to, typically +// the __DATA_CONST segment which contains the GOT) +// * offset within the segment, indicating the next location to write a binding +// * symbol type +// * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command) +// * symbol name +// * addend +// When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind +// a symbol in the GOT, and increments the segment offset to point to the next +// entry. It does *not* clear the record state after doing the bind, so +// subsequent opcodes only need to encode the differences between bindings. +void BindingSection::finalizeContents() { + if (!isNeeded()) + return; + + raw_svector_ostream os{contents}; + os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | + in.got->parent->index); + encodeULEB128(in.got->getSegmentOffset(), os); + for (const DylibSymbol *sym : in.got->getEntries()) { + // TODO: Implement compact encoding -- we only need to encode the + // differences between consecutive symbol entries. + if (sym->file->ordinal <= BIND_IMMEDIATE_MASK) { + os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + sym->file->ordinal); + } else { + error("TODO: Support larger dylib symbol ordinals"); + continue; + } + os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) + << sym->getName() << '\0' + << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) + << static_cast(BIND_OPCODE_DO_BIND); + } + + os << static_cast(BIND_OPCODE_DONE); +} + +void BindingSection::writeTo(uint8_t *buf) const { + memcpy(buf, contents.data(), contents.size()); +} + +ExportSection::ExportSection() + : SyntheticSection(segment_names::linkEdit, section_names::export_) {} + +void ExportSection::finalizeContents() { + // TODO: We should check symbol visibility. + for (const Symbol *sym : symtab->getSymbols()) + if (auto *defined = dyn_cast(sym)) + trieBuilder.addSymbol(*defined); + size = trieBuilder.build(); +} + +void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); } + +SymtabSection::SymtabSection(StringTableSection &stringTableSection) + : SyntheticSection(segment_names::linkEdit, section_names::symbolTable), + stringTableSection(stringTableSection) { + // TODO: When we introduce the SyntheticSections superclass, we should make + // all synthetic sections aligned to WordSize by default. + align = WordSize; +} + +size_t SymtabSection::getSize() const { + return symbols.size() * sizeof(nlist_64); +} + +void SymtabSection::finalizeContents() { + // TODO support other symbol types + for (Symbol *sym : symtab->getSymbols()) + if (isa(sym)) + symbols.push_back({sym, stringTableSection.addString(sym->getName())}); +} + +void SymtabSection::writeTo(uint8_t *buf) const { + auto *nList = reinterpret_cast(buf); + for (const SymtabEntry &entry : symbols) { + nList->n_strx = entry.strx; + // TODO support other symbol types + // TODO populate n_desc + if (auto defined = dyn_cast(entry.sym)) { + nList->n_type = N_EXT | N_SECT; + nList->n_sect = defined->isec->parent->index; + // For the N_SECT symbol type, n_value is the address of the symbol + nList->n_value = defined->value + defined->isec->getVA(); + } + ++nList; + } +} + +StringTableSection::StringTableSection() + : SyntheticSection(segment_names::linkEdit, section_names::stringTable) {} + +uint32_t StringTableSection::addString(StringRef str) { + uint32_t strx = size; + strings.push_back(str); + size += str.size() + 1; // account for null terminator + return strx; +} + +void StringTableSection::writeTo(uint8_t *buf) const { + uint32_t off = 0; + for (StringRef str : strings) { + memcpy(buf + off, str.data(), str.size()); + off += str.size() + 1; // account for null terminator + } +} + InStruct in; } // namespace macho diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index ba5a8f35b28be..c8dadf9bb0ded 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -9,18 +9,63 @@ #ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H #define LLD_MACHO_SYNTHETIC_SECTIONS_H -#include "InputSection.h" +#include "ExportTrie.h" +#include "OutputSection.h" #include "Target.h" #include "llvm/ADT/SetVector.h" namespace lld { namespace macho { +namespace section_names { + +constexpr const char *pageZero = "__pagezero"; +constexpr const char *header = "__mach_header"; +constexpr const char *binding = "__binding"; +constexpr const char *export_ = "__export"; +constexpr const char *symbolTable = "__symbol_table"; +constexpr const char *stringTable = "__string_table"; +constexpr const char *got = "__got"; + +} // namespace section_names + class DylibSymbol; +class LoadCommand; + +class SyntheticSection : public OutputSection { +public: + SyntheticSection(const char *segname, const char *name); + virtual ~SyntheticSection() = default; +}; + +// The header of the Mach-O file, which must have a file offset of zero. +class MachHeaderSection : public SyntheticSection { +public: + MachHeaderSection(); + void addLoadCommand(LoadCommand *); + bool isHidden() const override { return true; } + size_t getSize() const override; + void writeTo(uint8_t *buf) const override; + +private: + std::vector loadCommands; + uint32_t sizeOfCmds = 0; +}; + +// A hidden section that exists solely for the purpose of creating the +// __PAGEZERO segment, which is used to catch null pointer dereferences. +class PageZeroSection : public SyntheticSection { +public: + PageZeroSection(); + bool isHidden() const override { return true; } + size_t getSize() const override { return ImageBase; } + uint64_t getFileSize() const override { return 0; } + void writeTo(uint8_t *buf) const override {} +}; // This section will be populated by dyld with addresses to non-lazily-loaded // dylib symbols. -class GotSection : public InputSection { +class GotSection : public SyntheticSection { public: GotSection(); @@ -29,9 +74,11 @@ class GotSection : public InputSection { return entries; } + bool isNeeded() const override { return !entries.empty(); } + size_t getSize() const override { return entries.size() * WordSize; } - void writeTo(uint8_t *buf) override { + void writeTo(uint8_t *buf) const override { // Nothing to write, GOT contains all zeros at link time; it's populated at // runtime by dyld. } @@ -40,8 +87,84 @@ class GotSection : public InputSection { llvm::SetVector entries; }; +// Stores bind opcodes for telling dyld which symbols to load non-lazily. +class BindingSection : public SyntheticSection { +public: + BindingSection(); + void finalizeContents(); + size_t getSize() const override { return contents.size(); } + // Like other sections in __LINKEDIT, the binding section is special: its + // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in + // section headers. + bool isHidden() const override { return true; } + bool isNeeded() const override; + void writeTo(uint8_t *buf) const override; + + SmallVector contents; +}; + +// Stores a trie that describes the set of exported symbols. +class ExportSection : public SyntheticSection { +public: + ExportSection(); + void finalizeContents(); + size_t getSize() const override { return size; } + // Like other sections in __LINKEDIT, the export section is special: its + // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in + // section headers. + bool isHidden() const override { return true; } + void writeTo(uint8_t *buf) const override; + +private: + TrieBuilder trieBuilder; + size_t size = 0; +}; + +// Stores the strings referenced by the symbol table. +class StringTableSection : public SyntheticSection { +public: + StringTableSection(); + // Returns the start offset of the added string. + uint32_t addString(StringRef); + size_t getSize() const override { return size; } + // Like other sections in __LINKEDIT, the string table section is special: its + // offsets are recorded in the LC_SYMTAB load command, instead of in section + // headers. + bool isHidden() const override { return true; } + void writeTo(uint8_t *buf) const override; + +private: + // An n_strx value of 0 always indicates the empty string, so we must locate + // our non-empty string values at positive offsets in the string table. + // Therefore we insert a dummy value at position zero. + std::vector strings{"\0"}; + size_t size = 1; +}; + +struct SymtabEntry { + Symbol *sym; + size_t strx; +}; + +class SymtabSection : public SyntheticSection { +public: + SymtabSection(StringTableSection &); + void finalizeContents(); + size_t getNumSymbols() const { return symbols.size(); } + size_t getSize() const override; + // Like other sections in __LINKEDIT, the symtab section is special: its + // offsets are recorded in the LC_SYMTAB load command, instead of in section + // headers. + bool isHidden() const override { return true; } + void writeTo(uint8_t *buf) const override; + +private: + StringTableSection &stringTableSection; + std::vector symbols; +}; + struct InStruct { - GotSection *got; + GotSection *got = nullptr; }; extern InStruct in; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 7c38487c063cc..80915528e11d2 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -19,13 +19,11 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/BinaryFormat/MachO.h" -#include "llvm/Support/EndianStream.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" using namespace llvm; using namespace llvm::MachO; -using namespace llvm::support; using namespace lld; using namespace lld::macho; @@ -34,92 +32,55 @@ class LCLinkEdit; class LCDyldInfo; class LCSymtab; -class LoadCommand { -public: - virtual ~LoadCommand() = default; - virtual uint32_t getSize() const = 0; - virtual void writeTo(uint8_t *buf) const = 0; -}; - class Writer { public: Writer() : buffer(errorHandler().outputBuffer) {} - void createLoadCommands(); void scanRelocations(); - void assignAddresses(); - - void createDyldInfoContents(); + void createOutputSections(); + void createLoadCommands(); + void assignAddresses(OutputSegment *); + void createSymtabContents(); void openFile(); - void writeHeader(); void writeSections(); void run(); - std::vector loadCommands; std::unique_ptr &buffer; - uint64_t fileSize = 0; - uint64_t sizeofCmds = 0; - LCLinkEdit *linkEditSeg = nullptr; - LCDyldInfo *dyldInfoSeg = nullptr; - LCSymtab *symtabSeg = nullptr; -}; - -class LCPagezero : public LoadCommand { -public: - uint32_t getSize() const override { return sizeof(segment_command_64); } - - void writeTo(uint8_t *buf) const override { - auto *c = reinterpret_cast(buf); - c->cmd = LC_SEGMENT_64; - c->cmdsize = getSize(); - strcpy(c->segname, "__PAGEZERO"); - c->vmsize = PageSize; - } -}; - -class LCLinkEdit : public LoadCommand { -public: - uint32_t getSize() const override { return sizeof(segment_command_64); } - - void writeTo(uint8_t *buf) const override { - auto *c = reinterpret_cast(buf); - c->cmd = LC_SEGMENT_64; - c->cmdsize = getSize(); - strcpy(c->segname, "__LINKEDIT"); - c->vmaddr = addr; - c->fileoff = fileOff; - c->filesize = c->vmsize = contents.size(); - c->maxprot = VM_PROT_READ | VM_PROT_WRITE; - c->initprot = VM_PROT_READ; - } - - uint64_t getOffset() const { return fileOff + contents.size(); } - - uint64_t fileOff = 0; uint64_t addr = 0; - SmallVector contents; + uint64_t fileOff = 0; + MachHeaderSection *headerSection = nullptr; + BindingSection *bindingSection = nullptr; + ExportSection *exportSection = nullptr; + StringTableSection *stringTableSection = nullptr; + SymtabSection *symtabSection = nullptr; }; +// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. class LCDyldInfo : public LoadCommand { public: + LCDyldInfo(BindingSection *bindingSection, ExportSection *exportSection) + : bindingSection(bindingSection), exportSection(exportSection) {} + uint32_t getSize() const override { return sizeof(dyld_info_command); } void writeTo(uint8_t *buf) const override { auto *c = reinterpret_cast(buf); c->cmd = LC_DYLD_INFO_ONLY; c->cmdsize = getSize(); - c->bind_off = bindOff; - c->bind_size = bindSize; - c->export_off = exportOff; - c->export_size = exportSize; + if (bindingSection->isNeeded()) { + c->bind_off = bindingSection->fileOff; + c->bind_size = bindingSection->getFileSize(); + } + if (exportSection->isNeeded()) { + c->export_off = exportSection->fileOff; + c->export_size = exportSection->getFileSize(); + } } - uint64_t bindOff = 0; - uint64_t bindSize = 0; - uint64_t exportOff = 0; - uint64_t exportSize = 0; + BindingSection *bindingSection; + ExportSection *exportSection; }; class LCDysymtab : public LoadCommand { @@ -139,7 +100,7 @@ class LCSegment : public LoadCommand { uint32_t getSize() const override { return sizeof(segment_command_64) + - seg->sections.size() * sizeof(section_64); + seg->numNonHiddenSections() * sizeof(section_64); } void writeTo(uint8_t *buf) const override { @@ -149,22 +110,24 @@ class LCSegment : public LoadCommand { c->cmd = LC_SEGMENT_64; c->cmdsize = getSize(); memcpy(c->segname, name.data(), name.size()); + c->fileoff = seg->fileOff; + c->maxprot = seg->maxProt; + c->initprot = seg->initProt; + + if (!seg->isNeeded()) + return; - // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts - // from the beginning of the file (i.e. the header). - // TODO: replace this logic by creating a synthetic __TEXT,__mach_header - // section instead. - c->fileoff = name == "__TEXT" ? 0 : seg->firstSection()->addr - ImageBase; - c->vmaddr = c->fileoff + ImageBase; - c->vmsize = c->filesize = + c->vmaddr = seg->firstSection()->addr; + c->vmsize = seg->lastSection()->addr + seg->lastSection()->getSize() - c->vmaddr; - c->maxprot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; - c->initprot = seg->perms; - c->nsects = seg->sections.size(); + c->nsects = seg->numNonHiddenSections(); - for (auto &p : seg->sections) { + for (auto &p : seg->getSections()) { StringRef s = p.first; - std::vector §ions = p.second; + OutputSection *section = p.second; + c->filesize += section->getFileSize(); + if (section->isHidden()) + continue; auto *sectHdr = reinterpret_cast(buf); buf += sizeof(section_64); @@ -172,16 +135,11 @@ class LCSegment : public LoadCommand { memcpy(sectHdr->sectname, s.data(), s.size()); memcpy(sectHdr->segname, name.data(), name.size()); - sectHdr->addr = sections[0]->addr; - sectHdr->offset = sections[0]->addr - ImageBase; - sectHdr->align = sections[0]->align; - uint32_t maxAlign = 0; - for (const InputSection *section : sections) - maxAlign = std::max(maxAlign, section->align); - sectHdr->align = Log2_32(maxAlign); - sectHdr->flags = sections[0]->flags; - sectHdr->size = sections.back()->addr + sections.back()->getSize() - - sections[0]->addr; + sectHdr->addr = section->addr; + sectHdr->offset = section->fileOff; + sectHdr->align = Log2_32(section->align); + sectHdr->flags = section->flags; + sectHdr->size = section->getSize(); } } @@ -197,20 +155,30 @@ class LCMain : public LoadCommand { auto *c = reinterpret_cast(buf); c->cmd = LC_MAIN; c->cmdsize = getSize(); - c->entryoff = config->entry->getVA(); + c->entryoff = config->entry->getVA() - ImageBase; c->stacksize = 0; } }; class LCSymtab : public LoadCommand { public: + LCSymtab(SymtabSection *symtabSection, StringTableSection *stringTableSection) + : symtabSection(symtabSection), stringTableSection(stringTableSection) {} + uint32_t getSize() const override { return sizeof(symtab_command); } void writeTo(uint8_t *buf) const override { auto *c = reinterpret_cast(buf); c->cmd = LC_SYMTAB; c->cmdsize = getSize(); + c->symoff = symtabSection->fileOff; + c->nsyms = symtabSection->getNumSymbols(); + c->stroff = stringTableSection->fileOff; + c->strsize = stringTableSection->getFileSize(); } + + SymtabSection *symtabSection = nullptr; + StringTableSection *stringTableSection = nullptr; }; class LCLoadDylib : public LoadCommand { @@ -237,6 +205,30 @@ class LCLoadDylib : public LoadCommand { StringRef path; }; +class LCIdDylib : public LoadCommand { +public: + LCIdDylib(StringRef name) : name(name) {} + + uint32_t getSize() const override { + return alignTo(sizeof(dylib_command) + name.size() + 1, 8); + } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast(buf); + buf += sizeof(dylib_command); + + c->cmd = LC_ID_DYLIB; + c->cmdsize = getSize(); + c->dylib.name = sizeof(dylib_command); + + memcpy(buf, name.data(), name.size()); + buf[name.size()] = '\0'; + } + +private: + StringRef name; +}; + class LCLoadDylinker : public LoadCommand { public: uint32_t getSize() const override { @@ -262,23 +254,37 @@ class LCLoadDylinker : public LoadCommand { }; } // namespace +void Writer::scanRelocations() { + for (InputSection *sect : inputSections) + for (Reloc &r : sect->relocs) + if (auto *s = r.target.dyn_cast()) + if (auto *dylibSymbol = dyn_cast(s)) + in.got->addEntry(*dylibSymbol); +} + void Writer::createLoadCommands() { - linkEditSeg = make(); - dyldInfoSeg = make(); - symtabSeg = make(); - - loadCommands.push_back(linkEditSeg); - loadCommands.push_back(dyldInfoSeg); - loadCommands.push_back(symtabSeg); - loadCommands.push_back(make()); - loadCommands.push_back(make()); - loadCommands.push_back(make()); - loadCommands.push_back(make()); - - uint8_t segIndex = 1; // LCPagezero is a segment load command + headerSection->addLoadCommand( + make(bindingSection, exportSection)); + headerSection->addLoadCommand( + make(symtabSection, stringTableSection)); + headerSection->addLoadCommand(make()); + + switch (config->outputType) { + case MH_EXECUTE: + headerSection->addLoadCommand(make()); + headerSection->addLoadCommand(make()); + break; + case MH_DYLIB: + headerSection->addLoadCommand(make(config->installName)); + break; + default: + llvm_unreachable("unhandled output file type"); + } + + uint8_t segIndex = 0; for (OutputSegment *seg : outputSegments) { - if (!seg->sections.empty()) { - loadCommands.push_back(make(seg->name, seg)); + if (seg->isNeeded()) { + headerSection->addLoadCommand(make(seg->name, seg)); seg->index = segIndex++; } } @@ -286,94 +292,66 @@ void Writer::createLoadCommands() { uint64_t dylibOrdinal = 1; for (InputFile *file : inputFiles) { if (auto *dylibFile = dyn_cast(file)) { - loadCommands.push_back(make(dylibFile->dylibName)); + headerSection->addLoadCommand(make(dylibFile->dylibName)); dylibFile->ordinal = dylibOrdinal++; } } // TODO: dyld requires libSystem to be loaded. libSystem is a universal // binary and we don't have support for that yet, so mock it out here. - loadCommands.push_back(make("/usr/lib/libSystem.B.dylib")); + headerSection->addLoadCommand( + make("/usr/lib/libSystem.B.dylib")); } -void Writer::scanRelocations() { - for (InputSection *sect : inputSections) - for (Reloc &r : sect->relocs) - if (auto *s = r.target.dyn_cast()) - if (auto *dylibSymbol = dyn_cast(s)) - in.got->addEntry(*dylibSymbol); -} - -void Writer::assignAddresses() { - uint64_t addr = ImageBase + sizeof(mach_header_64); - - uint64_t size = 0; - for (LoadCommand *lc : loadCommands) - size += lc->getSize(); - sizeofCmds = size; - addr += size; - - for (OutputSegment *seg : outputSegments) { - addr = alignTo(addr, PageSize); - - for (auto &p : seg->sections) { - ArrayRef sections = p.second; - for (InputSection *isec : sections) { - addr = alignTo(addr, isec->align); - isec->addr = addr; - addr += isec->getSize(); - } - } +void Writer::createOutputSections() { + // First, create hidden sections + headerSection = make(); + bindingSection = make(); + stringTableSection = make(); + symtabSection = make(*stringTableSection); + exportSection = make(); + + switch (config->outputType) { + case MH_EXECUTE: + make(); + break; + case MH_DYLIB: + break; + default: + llvm_unreachable("unhandled output file type"); } - addr = alignTo(addr, PageSize); - linkEditSeg->addr = addr; - linkEditSeg->fileOff = addr - ImageBase; + // Then merge input sections into output sections/segments. + for (InputSection *isec : inputSections) { + getOrCreateOutputSegment(isec->segname) + ->getOrCreateOutputSection(isec->name) + ->mergeInput(isec); + } } -// LC_DYLD_INFO_ONLY contains symbol import/export information. Imported -// symbols are described by a sequence of bind opcodes, which allow for a -// compact encoding. Exported symbols are described using a trie. -void Writer::createDyldInfoContents() { - uint64_t sectionStart = linkEditSeg->getOffset(); - raw_svector_ostream os{linkEditSeg->contents}; - - if (in.got->getSize() != 0) { - // Emit bind opcodes, which tell dyld which dylib symbols to load. - - // Tell dyld to write to the section containing the GOT. - os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | - in.got->parent->index); - encodeULEB128(in.got->addr - in.got->parent->firstSection()->addr, os); - for (const DylibSymbol *sym : in.got->getEntries()) { - // TODO: Implement compact encoding -- we only need to encode the - // differences between consecutive symbol entries. - if (sym->file->ordinal <= BIND_IMMEDIATE_MASK) { - os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | - sym->file->ordinal); - } else { - error("TODO: Support larger dylib symbol ordinals"); - continue; - } - os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) - << sym->getName() << '\0' - << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) - << static_cast(BIND_OPCODE_DO_BIND); - } - - os << static_cast(BIND_OPCODE_DONE); - - dyldInfoSeg->bindOff = sectionStart; - dyldInfoSeg->bindSize = linkEditSeg->getOffset() - sectionStart; +void Writer::assignAddresses(OutputSegment *seg) { + addr = alignTo(addr, PageSize); + fileOff = alignTo(fileOff, PageSize); + seg->fileOff = fileOff; + + for (auto &p : seg->getSections()) { + OutputSection *section = p.second; + addr = alignTo(addr, section->align); + // We must align the file offsets too to avoid misaligned writes of + // structs. + fileOff = alignTo(fileOff, section->align); + section->addr = addr; + section->fileOff = fileOff; + section->finalize(); + + addr += section->getSize(); + fileOff += section->getFileSize(); } - - // TODO: emit bind opcodes for lazy symbols. - // TODO: Implement symbol export trie. } void Writer::openFile() { Expected> bufferOrErr = - FileOutputBuffer::create(config->outputFile, fileSize, + FileOutputBuffer::create(config->outputFile, fileOff, FileOutputBuffer::F_executable); if (!bufferOrErr) @@ -383,49 +361,53 @@ void Writer::openFile() { buffer = std::move(*bufferOrErr); } -void Writer::writeHeader() { - auto *hdr = reinterpret_cast(buffer->getBufferStart()); - hdr->magic = MH_MAGIC_64; - hdr->cputype = CPU_TYPE_X86_64; - hdr->cpusubtype = CPU_SUBTYPE_X86_64_ALL | CPU_SUBTYPE_LIB64; - hdr->filetype = MH_EXECUTE; - hdr->ncmds = loadCommands.size(); - hdr->sizeofcmds = sizeofCmds; - hdr->flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL; - - uint8_t *p = reinterpret_cast(hdr + 1); - for (LoadCommand *lc : loadCommands) { - lc->writeTo(p); - p += lc->getSize(); +void Writer::writeSections() { + uint8_t *buf = buffer->getBufferStart(); + for (OutputSegment *seg : outputSegments) { + for (auto &p : seg->getSections()) { + OutputSection *section = p.second; + section->writeTo(buf + section->fileOff); + } } } -void Writer::writeSections() { - uint8_t *buf = buffer->getBufferStart(); +void Writer::run() { + // dyld requires __LINKEDIT segment to always exist (even if empty). + OutputSegment *linkEditSegment = + getOrCreateOutputSegment(segment_names::linkEdit); - for (OutputSegment *seg : outputSegments) - for (auto § : seg->sections) - for (InputSection *isec : sect.second) - isec->writeTo(buf + isec->addr - ImageBase); + scanRelocations(); - memcpy(buf + linkEditSeg->fileOff, linkEditSeg->contents.data(), - linkEditSeg->contents.size()); -} + // Sort and assign sections to their respective segments. No more sections nor + // segments may be created after this method runs. + createOutputSections(); + sortOutputSegmentsAndSections(); -void Writer::run() { createLoadCommands(); - scanRelocations(); - assignAddresses(); - // Fill __LINKEDIT contents - createDyldInfoContents(); - fileSize = linkEditSeg->fileOff + linkEditSeg->contents.size(); + // Ensure that segments (and the sections they contain) are allocated + // addresses in ascending order, which dyld requires. + // + // Note that at this point, __LINKEDIT sections are empty, but we need to + // determine addresses of other segments/sections before generating its + // contents. + for (OutputSegment *seg : outputSegments) + if (seg != linkEditSegment) + assignAddresses(seg); + + // Fill __LINKEDIT contents. + bindingSection->finalizeContents(); + exportSection->finalizeContents(); + symtabSection->finalizeContents(); + + // Now that __LINKEDIT is filled out, do a proper calculation of its + // addresses and offsets. + assignAddresses(linkEditSegment); openFile(); if (errorCount()) return; - writeHeader(); writeSections(); if (auto e = buffer->commit()) @@ -434,7 +416,4 @@ void Writer::run() { void macho::writeResult() { Writer().run(); } -void macho::createSyntheticSections() { - in.got = make(); - inputSections.push_back(in.got); -} +void macho::createSyntheticSections() { in.got = make(); } diff --git a/lld/MachO/Writer.h b/lld/MachO/Writer.h index accdedca142b9..7f846233107a6 100644 --- a/lld/MachO/Writer.h +++ b/lld/MachO/Writer.h @@ -9,9 +9,18 @@ #ifndef LLD_MACHO_WRITER_H #define LLD_MACHO_WRITER_H +#include + namespace lld { namespace macho { +class LoadCommand { +public: + virtual ~LoadCommand() = default; + virtual uint32_t getSize() const = 0; + virtual void writeTo(uint8_t *buf) const = 0; +}; + void writeResult(); void createSyntheticSections(); diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index 1d55cf052c9bc..54c0d7162d6b8 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -414,6 +414,9 @@ List removed unused sections. List identical folded sections. .It Fl -print-map Print a link map to the standard output. +.It Fl -print-archive-stats Ns = Ns Ar file +Write archive usage statistics to the specified file. +Print the numbers of members and fetched members for each archive. .It Fl -push-state Save the current state of .Fl -as-needed , diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt index 80721384c287c..150865245965e 100644 --- a/lld/test/CMakeLists.txt +++ b/lld/test/CMakeLists.txt @@ -35,8 +35,9 @@ set(LLD_TEST_DEPS lld) if (NOT LLD_BUILT_STANDALONE) list(APPEND LLD_TEST_DEPS FileCheck count llc llvm-ar llvm-as llvm-bcanalyzer llvm-config llvm-cvtres - llvm-dis llvm-dwarfdump llvm-lib llvm-mc llvm-nm llvm-objcopy llvm-objdump - llvm-pdbutil llvm-readelf llvm-readobj not obj2yaml opt yaml2obj + llvm-dis llvm-dwarfdump llvm-lib llvm-lipo llvm-mc llvm-nm llvm-objcopy + llvm-objdump llvm-pdbutil llvm-readelf llvm-readobj not obj2yaml opt + yaml2obj ) endif() diff --git a/lld/test/COFF/lto-obj-path.ll b/lld/test/COFF/lto-obj-path.ll index 987e2bd778961..ab0c82e9b1704 100644 --- a/lld/test/COFF/lto-obj-path.ll +++ b/lld/test/COFF/lto-obj-path.ll @@ -11,6 +11,18 @@ ; RUN: llvm-nm %t4.obj 2>&1 | FileCheck %s -check-prefix=SYMBOLS ; RUN: llvm-nm %t4.obj 2>&1 | count 1 +;; Ensure lld emits empty combined module if specific obj-path. +; RUN: rm -fr %t.dir/objpath && mkdir -p %t.dir/objpath +; RUN: lld-link /out:%t.dir/objpath/a.exe -lto-obj-path:%t4.obj \ +; RUN: -entry:main %t1.obj %t2.obj -lldsavetemps +; RUN: ls %t.dir/objpath/a.exe.lto.* | count 3 + +;; Ensure lld does not emit empty combined module in default. +; RUN: rm -fr %t.dir/objpath && mkdir -p %t.dir/objpath +; RUN: lld-link /out:%t.dir/objpath/a.exe \ +; RUN: -entry:main %t1.obj %t2.obj -lldsavetemps +; RUN: ls %t.dir/objpath/a.exe.lto.* | count 2 + ; CHECK: Format: COFF-x86-64 ; SYMBOLS: @feat.00 diff --git a/lld/test/COFF/pdb-thinlto.ll b/lld/test/COFF/pdb-thinlto.ll index de0eef2b67da4..4954f841e21dc 100644 --- a/lld/test/COFF/pdb-thinlto.ll +++ b/lld/test/COFF/pdb-thinlto.ll @@ -29,10 +29,8 @@ declare void @foo() ; CHECK: Modules ; CHECK: ============================================================ -; CHECK: Mod 0000 | `{{.*}}main.exe.lto.obj`: -; CHECK: Obj: `{{.*}}main.exe.lto.obj`: -; CHECK: Mod 0001 | `{{.*}}main.exe.lto.1.obj`: +; CHECK: Mod 0000 | `{{.*}}main.exe.lto.1.obj`: ; CHECK: Obj: `{{.*}}main.exe.lto.1.obj`: -; CHECK: Mod 0002 | `{{.*}}main.exe.lto.2.obj`: +; CHECK: Mod 0001 | `{{.*}}main.exe.lto.2.obj`: ; CHECK: Obj: `{{.*}}main.exe.lto.2.obj`: -; CHECK: Mod 0003 | `* Linker *`: +; CHECK: Mod 0002 | `* Linker *`: diff --git a/lld/test/ELF/arm-exidx-mapping-symbols.s b/lld/test/ELF/arm-exidx-mapping-symbols.s new file mode 100644 index 0000000000000..623eba630ed37 --- /dev/null +++ b/lld/test/ELF/arm-exidx-mapping-symbols.s @@ -0,0 +1,26 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -triple=armv7a-none-linux-gnueabi %s -o %t +// RUN: ld.lld %t -o %t2 +// RUN: llvm-readelf -s %t2 | FileCheck %s +// CHECK-NOT: $d.exidx.foo + +/// Test that symbols which point to input .ARM.exidx sections are eliminated. +/// These symbols might be produced, for example, by GNU tools. + + .syntax unified + .section .text.foo,"axG",%progbits,foo,comdat +foo: + bx lr + +/// GNU as adds mapping symbols "$d" for .ARM.exidx sections it generates. +/// llvm-mc does not do that, so reproduce that manually. + .section .ARM.exidx.text.foo,"ao?",%0x70000001,.text.foo +$d.exidx.foo: + .reloc 0, R_ARM_NONE, __aeabi_unwind_cpp_pr0 + .long .text.foo(PREL31) + .long 0x80b0b0b0 + + .section .text.h,"ax" + .global __aeabi_unwind_cpp_pr0 +__aeabi_unwind_cpp_pr0: + bx lr diff --git a/lld/test/ELF/arm-exidx-range.s b/lld/test/ELF/arm-exidx-range.s new file mode 100644 index 0000000000000..69763705276c1 --- /dev/null +++ b/lld/test/ELF/arm-exidx-range.s @@ -0,0 +1,35 @@ +// REQUIRES: arm +// RUN: llvm-mc --arm-add-build-attributes --triple=armv7a-linux-gnueabihf -filetype=obj %s -o %t.o +// RUN: echo "SECTIONS { \ +// RUN: . = 0x80000000; \ +// RUN: .text : { *(.text) } \ +// RUN: .vectors 0xffff0000 : { *(.vectors) } \ +// RUN: } " > %t.script +// RUN: ld.lld --script %t.script %t.o -o %t +// RUN: llvm-readobj -x .ARM.exidx %t | FileCheck %s +/// Adapted from Linux kernel linker script failing due to out of range +/// relocation. The .vectors at 0xffff0000 is a common occurrence as the vector +/// table can only be placed at either 0 or 0xffff0000 in older ARM CPUs. +/// In the example the .vectors won't have an exception table so if LLD creates +/// one then we'll get a relocation out of range error. Check that we don't +/// synthesise a table entry or place a sentinel out of range. + +/// Expect only .ARM.exidx from _start and sentinel +// CHECK: Hex dump of section '.ARM.exidx': +// CHECK-NEXT: 0x80000000 10000000 01000000 0c000000 01000000 +// CHECK-NOT: 0x80000010 + + .text + .global _start + .type _start, %function +_start: + .fnstart + bx lr + .cantunwind + .fnend + + .section .vectors, "ax", %progbits + .global vecs + .type vecs, %function +vecs: + bx lr diff --git a/lld/test/ELF/gdb-index-loclists.s b/lld/test/ELF/gdb-index-loclists.s new file mode 100644 index 0000000000000..e3769b6953725 --- /dev/null +++ b/lld/test/ELF/gdb-index-loclists.s @@ -0,0 +1,37 @@ +# REQUIRES: x86 +## Regression test that we don't crash on DWARF v5 .debug_loclists + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld --gdb-index %t.o -o /dev/null + +.section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .ascii "\214\001" # DW_AT_loclists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 + +.section .debug_info,"",@progbits +.Lcu_begin0: + .long .Lcu_end0-.Lcu_begin0-4 # Length of Unit + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size + .long 0 # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .long .Lloclists_table_base0 # DW_AT_loclists_base +.Lcu_end0: + +.section .debug_loclists,"",@progbits + .long .Ldebug_loclist_table_end0-.Ldebug_loclist_table_start0 # Length +.Ldebug_loclist_table_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 0 # Offset entry count +.Lloclists_table_base0: + .byte 0 # DW_LLE_end_of_list +.Ldebug_loclist_table_end0: diff --git a/lld/test/ELF/hexagon-tls-gd-nonpreemptible.s b/lld/test/ELF/hexagon-tls-gd-nonpreemptible.s index ba0eee999c806..ff5e6dbaac710 100644 --- a/lld/test/ELF/hexagon-tls-gd-nonpreemptible.s +++ b/lld/test/ELF/hexagon-tls-gd-nonpreemptible.s @@ -3,6 +3,7 @@ # RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %s -o %t.o # RUN: ld.lld -shared %t.o -o %t.so # RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=RELOC %s +# RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=REL %s # RUN: llvm-objdump -d --no-show-raw-insn --print-imm-hex %t.so | FileCheck %s ## Prior to D77021 lld would error "relocation R_HEX_GD_PLT_B22_PCREL cannot refer to absolute symbol". @@ -17,17 +18,28 @@ # RELOC-NEXT: R_HEX_JMP_SLOT __tls_get_addr 0x0 # RELOC-NEXT: } +# REL: R_HEX_B32_PCREL_X _GLOBAL_OFFSET_TABLE_ 0x0 +# REL-NEXT: R_HEX_6_PCREL_X _GLOBAL_OFFSET_TABLE_ 0x4 +# REL-NEXT: R_HEX_GD_GOT_32_6_X a 0x0 +# REL-NEXT: R_HEX_GD_GOT_16_X a 0x0 +# REL-NEXT: R_HEX_GD_PLT_B22_PCREL a 0x0 +# REL-NEXT: R_HEX_GD_PLT_B32_PCREL_X a 0x0 +# REL-NEXT: R_HEX_GD_PLT_B22_PCREL_X a 0x4 + # CHECK: { immext(#{{.*}}) # CHECK-NEXT: r2 = add(pc,##{{.*}}) } # CHECK-NEXT: { immext(#{{.*}}) # CHECK-NEXT: r0 = add(r2,##-{{.*}}) } # CHECK-NEXT: { call {{.*}} } +# CHECK-NEXT: { immext({{.*}}) +# CHECK-NEXT: call {{.*}} } # CHECK-NEXT: { r0 = memw(r0+#0x0) } _start: r2 = add(pc,##_GLOBAL_OFFSET_TABLE_@PCREL) r0 = add(r2,##a@GDGOT) call a@GDPLT + call ##a@GDPLT r0 = memw(r0+#0) ## a is non-preemptible due to STV_HIDDEN visibility. diff --git a/lld/test/ELF/linkerscript/at8.test b/lld/test/ELF/linkerscript/at8.test index ab0e3c63095d2..c611c2e244040 100644 --- a/lld/test/ELF/linkerscript/at8.test +++ b/lld/test/ELF/linkerscript/at8.test @@ -20,7 +20,7 @@ SECTIONS { # PT_LOAD header. # CHECK: Name Type Address Off -# CHECK: .text PROGBITS 0000000008000000 001000 +# CHECK: .text PROGBITS 0000000008000000 000158 # CHECK: .sec1 PROGBITS 0000000020000000 001000 # CHECK: .sec2 PROGBITS 0000000020000008 001008 # CHECK: .sec3 PROGBITS 0000000020000010 001010 diff --git a/lld/test/ELF/linkerscript/empty-sections-expressions.test b/lld/test/ELF/linkerscript/empty-sections-expressions.test index 3f1da48f325a9..ed6c45de6656b 100644 --- a/lld/test/ELF/linkerscript/empty-sections-expressions.test +++ b/lld/test/ELF/linkerscript/empty-sections-expressions.test @@ -2,19 +2,25 @@ # RUN: echo ".text; nop; .data; .byte 0" \ # RUN: | llvm-mc -filetype=obj -triple=x86_64-pc-linux - -o %t.o # RUN: ld.lld -o %t --script %s %t.o -# RUN: llvm-readelf -program-headers %t | FileCheck %s +# RUN: llvm-readelf -S -l %t | FileCheck %s ## Check we do not remove the empty output sections used in LOADADDR/ADDR ## expressions and hence can evaluate the correct addresses. +# CHECK: Name Type Address Off Size +# CHECK-NEXT: NULL 0000000000000000 000000 000000 +# CHECK-NEXT: .empty PROGBITS 0000000000080000 000158 000000 +# CHECK-NEXT: .text PROGBITS 0000000000080000 001000 000001 +# CHECK-NEXT: .data PROGBITS 0000000000080001 001001 000001 + # CHECK: Program Headers: # CHECK-NEXT: Type Offset VirtAddr PhysAddr # CHECK-NEXT: LOAD 0x001000 0x0000000000080000 0x0000000000080000 # CHECK-NEXT: LOAD 0x001001 0x0000000000080001 0x0000000000082000 # CHECK: Section to Segment mapping: -# CHECK: 00 .empty .text -# CHECK-NEXT: 01 .data +# CHECK: 00 .text {{$}} +# CHECK-NEXT: 01 .data {{$}} SECTIONS { . = 0x00080000; diff --git a/lld/test/ELF/linkerscript/input-archive.s b/lld/test/ELF/linkerscript/input-archive.s index f70f3c6fa4113..211c671409bfb 100644 --- a/lld/test/ELF/linkerscript/input-archive.s +++ b/lld/test/ELF/linkerscript/input-archive.s @@ -11,7 +11,7 @@ ## *.a:b.o matches /path/to/input-archive.s.tmp.a:b.o ## *b.o matches /path/to/input-archive.s.tmp.a:b.o # RUN: echo 'SECTIONS { \ -# RUN: .foo : { %t.a:a.o(.data) } \ +# RUN: .foo : { "%t.a:a.o"(.data) } \ # RUN: .bar : { *.a:b.o(.data) } \ # RUN: .qux : { *b.o(.data1) } \ # RUN: }' > %t.script diff --git a/lld/test/ELF/linkerscript/nobits-offset.s b/lld/test/ELF/linkerscript/nobits-offset.s index 051f3f99da3b0..35397ec330ea5 100644 --- a/lld/test/ELF/linkerscript/nobits-offset.s +++ b/lld/test/ELF/linkerscript/nobits-offset.s @@ -12,13 +12,16 @@ ## sh_offset to sh_addr modulo max-page-size, so that p_vaddr=p_offset (mod ## p_align). -# CHECK: Name Type Address Off Size ES Flg Lk Inf Al -# CHECK: .bss NOBITS 0000000000000400 001400 000001 00 WA 0 0 1024 +# CHECK: Name Type Address Off Size +# CHECK-NEXT: NULL 0000000000000000 000000 000000 +# CHECK-NEXT: .text PROGBITS 0000000000000000 000158 000000 +# CHECK-NEXT: .sec1 NOBITS 0000000000000000 000158 000001 +# CHECK-NEXT: .bss NOBITS 0000000000000400 000400 000001 -# CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align -# CHECK: LOAD 0x001400 0x0000000000000400 0x0000000000000400 0x000000 0x000001 RW 0x1000 +# CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK-NEXT: LOAD 0x000400 0x0000000000000400 0x0000000000000400 0x000000 0x000001 RW 0x1000 -# CHECK: 00 .bss +# CHECK: 00 .bss {{$}} .bss .p2align 10 diff --git a/lld/test/ELF/linkerscript/thunk-gen-mips.s b/lld/test/ELF/linkerscript/thunk-gen-mips.s index 97d06d0ee6542..aa74164b15688 100644 --- a/lld/test/ELF/linkerscript/thunk-gen-mips.s +++ b/lld/test/ELF/linkerscript/thunk-gen-mips.s @@ -4,20 +4,20 @@ # SECTIONS command with the first pattern that does not match. # Linking a PIC and non-PIC object files triggers the LA25 thunk generation. -# RUN: echo "SECTIONS { \ +# RUN: echo 'SECTIONS { \ # RUN: .text : { \ # RUN: *(.nomatch) \ -# RUN: %t(.text) \ +# RUN: "%t"(.text) \ # RUN: . = . + 0x100000 ; \ -# RUN: %t1(.text) \ +# RUN: "%t1"(.text) \ # RUN: } \ -# RUN: }" > %t.script +# RUN: }' > %t.script # RUN: ld.lld -o %t.exe --script %t.script %t %t1 # RUN: llvm-objdump -t %t.exe | FileCheck %s # CHECK: SYMBOL TABLE: -# CHECK-ANY: 00000000 .text 00000000 _start -# CHECK-ANY: 0010000c l F .text 00000010 __LA25Thunk_too_far -# CHECK-ANY: 00100020 g F .text 00000024 too_far +# CHECK-DAG: [[#%x, START_ADDR:]] g .text 00000000 _start +# CHECK-DAG: {{0*}}[[#THUNK_ADDR:START_ADDR+0x100000+12]] l F .text 00000010 __LA25Thunk_too_far +# CHECK-DAG: {{0*}}[[#THUNK_ADDR+20]] g F .text 0000000c too_far .ifdef MAIN .global _start diff --git a/lld/test/ELF/linkorder-script.s b/lld/test/ELF/linkorder-script.s new file mode 100644 index 0000000000000..6bf2887abb2a5 --- /dev/null +++ b/lld/test/ELF/linkorder-script.s @@ -0,0 +1,32 @@ +// REQUIRES: x86 +// RUN: llvm-mc --triple=x86_64 -filetype=obj %s -o %t.o +// RUN: echo "SECTIONS { \ +// RUN: . = 0x80000000; \ +// RUN: .linkorder : { *(.linkorder.*) } \ +// RUN: .text : { *(.text) } \ +// RUN: .text.1 0x80000200 : AT(0x1000) { *(.text.1) } \ +// RUN: .text.2 0x80000100 : AT(0x2000) { *(.text.2) } \ +// RUN: } " > %t.script +// RUN: ld.lld --script %t.script %t.o -o %t +// RUN: llvm-readobj -x .linkorder %t | FileCheck %s + +/// When a linker script does not have monotonically increasing addresses +/// the SHF_LINK_ORDER sections should still be in monotonically increasing +/// order. + +// CHECK: Hex dump of section '.linkorder': +// CHECK-NEXT: 0x80000000 0201 + +.section .text.1, "ax", %progbits +.global _start +_start: +nop + +.section .text.2, "ax", %progbits +.byte 0 + +.section .linkorder.1, "ao", %progbits, .text.1 +.byte 1 + +.section .linkorder.2, "ao", %progbits, .text.2 +.byte 2 diff --git a/lld/test/ELF/lto/emit-asm.ll b/lld/test/ELF/lto/emit-asm.ll new file mode 100644 index 0000000000000..d0719411a5bad --- /dev/null +++ b/lld/test/ELF/lto/emit-asm.ll @@ -0,0 +1,24 @@ +; REQUIRES: x86 +; RUN: llvm-as %s -o %t.o +; RUN: ld.lld --lto-emit-asm -shared %t.o -o - | FileCheck %s +; RUN: ld.lld --plugin-opt=emit-asm --plugin-opt=lto-partitions=2 -shared %t.o -o %t2.s +; RUN: cat %t2.s %t2.s1 | FileCheck %s + +; RUN: ld.lld --lto-emit-asm --save-temps -shared %t.o -o %t3.s +; RUN: FileCheck --input-file %t3.s %s +; RUN: llvm-dis %t3.s.0.4.opt.bc -o - | FileCheck --check-prefix=OPT %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-DAG: f1: +; OPT-DAG: define void @f1() +define void @f1() { + ret void +} + +; CHECK-DAG: f2: +; OPT-DAG: define void @f2() +define void @f2() { + ret void +} diff --git a/lld/test/ELF/lto/linker-script-symbols-assign.ll b/lld/test/ELF/lto/linker-script-symbols-assign.ll index 079113e1fcfdf..c66bd1dda0158 100644 --- a/lld/test/ELF/lto/linker-script-symbols-assign.ll +++ b/lld/test/ELF/lto/linker-script-symbols-assign.ll @@ -1,13 +1,13 @@ ; REQUIRES: x86 ; RUN: llvm-as %s -o %t.o +; RUN: rm -f %t2.* ; RUN: echo "foo = 1;" > %t.script ; RUN: ld.lld %t.o -o %t2 --script %t.script -save-temps +;; Combined module is not empty, but it will be empty after optimization. +;; Ensure lld still emits empty combined obj in this case. ; RUN: llvm-nm %t2.lto.o | count 0 -; CHECK-NOT: bar -; CHECK-NOT: foo - ; RUN: llvm-readobj --symbols %t2 | FileCheck %s --check-prefix=VAL ; VAL: Symbol { ; VAL: Name: foo diff --git a/lld/test/ELF/lto/thinlto-obj-path.ll b/lld/test/ELF/lto/thinlto-obj-path.ll index 787c1c03b6651..5d22360fc282b 100644 --- a/lld/test/ELF/lto/thinlto-obj-path.ll +++ b/lld/test/ELF/lto/thinlto-obj-path.ll @@ -14,6 +14,16 @@ ; RUN: ld.lld -thinlto-index-only -lto-obj-path=%t4.o -shared %t1.o %t2.o -o /dev/null ; RUN: llvm-readobj -h %t4.o | FileCheck %s +;; Ensure lld emits empty combined module if specific obj-path. +; RUN: rm -fr %t.dir/objpath && mkdir -p %t.dir/objpath +; RUN: ld.lld --plugin-opt=obj-path=%t4.o -shared %t1.o %t2.o -o %t.dir/objpath/a.out --save-temps +; RUN: ls %t.dir/objpath/a.out*.lto.* | count 3 + +;; Ensure lld does not emit empty combined module in default. +; RUN: rm -fr %t.dir/objpath && mkdir -p %t.dir/objpath +; RUN: ld.lld %t1.o %t2.o -o %t.dir/objpath/a.out --save-temps +; RUN: ls %t.dir/objpath/a.out*.lto.* | count 2 + ; CHECK: Format: elf64-x86-64 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/lld/test/ELF/ppc64-toc-relax2.s b/lld/test/ELF/ppc64-toc-relax2.s new file mode 100644 index 0000000000000..77b4a1c99948e --- /dev/null +++ b/lld/test/ELF/ppc64-toc-relax2.s @@ -0,0 +1,66 @@ +# REQUIRES: ppc +# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o +# RUN: echo 'addis 5, 2, .LC0@toc@ha; ld 5, .LC0@toc@l(5); foo: \ +# RUN: .section .toc,"aw",@progbits; .LC0: .tc foo[TC], foo' \ +# RUN: | llvm-mc -filetype=obj -triple=powerpc64le - -o %t1.o +# RUN: ld.lld %t.o %t1.o -o %t +# RUN: llvm-objdump -d %t | FileCheck %s + +# CHECK-LABEL: <_start>: +.globl _start +_start: +## Perform toc-indirect to toc-relative relaxation even if there are unrelated instructions in between. +# CHECK-NEXT: addis 3, 2, -2 +# CHECK-NEXT: li 9, 0 +# CHECK-NEXT: addi 3, 3, 32752 +# CHECK-NEXT: lwa 3, 0(3) + addis 3, 2, .LC1@toc@ha # R_PPC64_TOC16_HA + li 9, 0 + ld 3, .LC1@toc@l(3) # R_PPC64_TOC16_LO_DS + lwa 3, 0(3) + +## R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS can interleave. +# CHECK-NEXT: addis 3, 2, -2 +# CHECK-NEXT: addis 4, 2, -2 +# CHECK-NEXT: addi 3, 3, 32752 +# CHECK-NEXT: addi 4, 4, 32756 + addis 3, 2, .LC1@toc@ha + addis 4, 2, .LC2@toc@ha + ld 3, .LC1@toc@l(3) + ld 4, .LC2@toc@l(4) + +## We choose to be conservative: the presence of R_PPC64_TOC16_LO +## suppresses relaxation for the symbol. +## R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS pairs are not relaxed as well. +# CHECK-NEXT: nop +# CHECK-NEXT: addi 3, 2, -32768 +# CHECK-NEXT: li 9, 0 +# CHECK-NEXT: nop +# CHECK-NEXT: ld 4, -32768(2) + addis 3, 2, .LC0@toc@ha # R_PPC64_TOC16_HA + addi 3, 3, .LC0@toc@l # R_PPC64_TOC16_LO + li 9, 0 + addis 4, 2, .LC0@toc@ha + ld 4, .LC0@toc@l(4) + +# CHECK-COUNT-3: blr +AES_encrypt: + blr +AES_decrypt: + blr +BN_free: + blr + +## %t1.o has relaxable relocation pairs referencing its .toc which is different +## from %t.o(.toc). The suppression in %t.o does not affect %t1.o even if +## the relocation addends are the same. +# CHECK-NEXT: addis 5, 2, -1 +# CHECK-NEXT: addi 5, 5, -32768 + +.section .toc,"aw",@progbits +.LC0: + .tc AES_encrypt[TC], AES_encrypt +.LC1: + .tc AES_decrypt[TC], AES_decrypt +.LC2: + .tc BN_free[TC], BN_free diff --git a/lld/test/ELF/print-archive-stats.s b/lld/test/ELF/print-archive-stats.s new file mode 100644 index 0000000000000..3f5c4820f0c6a --- /dev/null +++ b/lld/test/ELF/print-archive-stats.s @@ -0,0 +1,38 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: echo '.globl weak; weak:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tweak.o +# RUN: echo '.global foo; foo:' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o +# RUN: echo '.global bar; bar:' | llvm-mc -filetype=obj -triple=x86_64 - -o %t2.o +# RUN: echo '.global baz; baz:' | llvm-mc -filetype=obj -triple=x86_64 - -o %t3.o +# RUN: rm -f %tweak.a && llvm-ar rc %tweak.a %tweak.o +# RUN: rm -f %t1.a && llvm-ar rc %t1.a %t1.o %t2.o %t3.o + +# RUN: ld.lld %t.o %tweak.a %t1.a --print-archive-stats=%t.txt -o /dev/null +# RUN: FileCheck --input-file=%t.txt -DT=%t %s --match-full-lines --strict-whitespace + +## Fetches 0 member from %tweak.a and 2 members from %t1.a +# CHECK:members fetched archive +# CHECK-NEXT:1 0 [[T]]weak.a +# CHECK-NEXT:3 2 [[T]]1.a + +## - means stdout. +# RUN: ld.lld %t.o %tweak.a %t1.a --print-archive-stats=- -o /dev/null | diff %t.txt - + +## The second %t1.a has 0 fetched member. +# RUN: ld.lld %t.o %tweak.a %t1.a %t1.a --print-archive-stats=- -o /dev/null | \ +# RUN: FileCheck --check-prefix=CHECK2 %s +# CHECK2: members fetched archive +# CHECK2-NEXT: 1 0 {{.*}}weak.a +# CHECK2-NEXT: 3 2 {{.*}}1.a +# CHECK2-NEXT: 3 0 {{.*}}1.a + +# RUN: not ld.lld -shared %t.o --print-archive-stats=/ -o /dev/null 2>&1 | FileCheck --check-prefix=ERR %s +# ERR: error: --print-archive-stats=: cannot open /: {{.*}} + +.globl _start +.weak weak +_start: + call foo + call bar + call weak diff --git a/lld/test/ELF/reproduce-linkerscript.s b/lld/test/ELF/reproduce-linkerscript.s index 2a2033562c53e..7cc070c3737f3 100644 --- a/lld/test/ELF/reproduce-linkerscript.s +++ b/lld/test/ELF/reproduce-linkerscript.s @@ -7,8 +7,7 @@ # RUN: echo "INCLUDE \"%t.dir/build/bar.script\"" >> %t.dir/build/foo.script # RUN: echo "/* empty */" > %t.dir/build/bar.script # RUN: cd %t.dir -# RUN: ld.lld build/foo.script --verbose --reproduce repro.tar -# RUN: tar tf repro.tar +# RUN: ld.lld build/foo.script -o /dev/null --reproduce repro.tar # RUN: tar tf repro.tar | FileCheck -DPATH='%:t.dir' %s # CHECK: [[PATH]]/build/foo.script diff --git a/lld/test/ELF/segments.s b/lld/test/ELF/segments.s index 5165fc3c48589..9b88f48ac0902 100644 --- a/lld/test/ELF/segments.s +++ b/lld/test/ELF/segments.s @@ -3,8 +3,8 @@ # RUN: ld.lld %t -o %t1 # RUN: llvm-readobj --program-headers %t1 | FileCheck --check-prefix=ROSEGMENT %s -# RUN: ld.lld --omagic --no-omagic %t -o %t1 -# RUN: llvm-readobj --program-headers %t1 | FileCheck --check-prefix=ROSEGMENT %s +# RUN: ld.lld --no-rosegment --rosegment %t -o - | cmp - %t1 +# RUN: ld.lld --omagic --no-omagic %t -o - | cmp - %t1 # ROSEGMENT: ProgramHeader { # ROSEGMENT: Type: PT_LOAD diff --git a/lld/test/MachO/Inputs/goodbye-dylib.yaml b/lld/test/MachO/Inputs/goodbye-dylib.yaml deleted file mode 100644 index 55797f7cc22af..0000000000000 --- a/lld/test/MachO/Inputs/goodbye-dylib.yaml +++ /dev/null @@ -1,175 +0,0 @@ -## This yaml file was originally generated from linking the following source -## input with ld64: -## -## .section __TEXT,__cstring -## .globl _goodbye_world -## -## _goodbye_world: -## .asciz "Goodbye world!\n" -## -## When lld can produce dylibs, we will use that instead for our test setup. - ---- !mach-o -FileHeader: - magic: 0xFEEDFACF - cputype: 0x01000007 - cpusubtype: 0x00000003 - filetype: 0x00000006 - ncmds: 11 - sizeofcmds: 624 - flags: 0x00100085 - reserved: 0x00000000 -LoadCommands: - - cmd: LC_SEGMENT_64 - cmdsize: 232 - segname: __TEXT - vmaddr: 0 - vmsize: 4096 - fileoff: 0 - filesize: 4096 - maxprot: 5 - initprot: 5 - nsects: 2 - flags: 0 - Sections: - - sectname: __text - segname: __TEXT - addr: 0x0000000000000FF0 - size: 0 - offset: 0x00000FF0 - align: 0 - reloff: 0x00000000 - nreloc: 0 - flags: 0x80000400 - reserved1: 0x00000000 - reserved2: 0x00000000 - reserved3: 0x00000000 - content: '' - - sectname: __cstring - segname: __TEXT - addr: 0x0000000000000FF0 - size: 16 - offset: 0x00000FF0 - align: 0 - reloff: 0x00000000 - nreloc: 0 - flags: 0x00000002 - reserved1: 0x00000000 - reserved2: 0x00000000 - reserved3: 0x00000000 - content: 476F6F6462796520776F726C64210A00 - - cmd: LC_SEGMENT_64 - cmdsize: 72 - segname: __LINKEDIT - vmaddr: 4096 - vmsize: 4096 - fileoff: 4096 - filesize: 72 - maxprot: 1 - initprot: 1 - nsects: 0 - flags: 0 - - cmd: LC_ID_DYLIB - cmdsize: 64 - dylib: - name: 24 - timestamp: 1 - current_version: 0 - compatibility_version: 0 - PayloadString: '@executable_path/libgoodbye.dylib' - ZeroPadBytes: 7 - - cmd: LC_DYLD_INFO_ONLY - cmdsize: 48 - rebase_off: 0 - rebase_size: 0 - bind_off: 0 - bind_size: 0 - weak_bind_off: 0 - weak_bind_size: 0 - lazy_bind_off: 0 - lazy_bind_size: 0 - export_off: 4096 - export_size: 24 - - cmd: LC_SYMTAB - cmdsize: 24 - symoff: 4128 - nsyms: 1 - stroff: 4144 - strsize: 24 - - cmd: LC_DYSYMTAB - cmdsize: 80 - ilocalsym: 0 - nlocalsym: 0 - iextdefsym: 0 - nextdefsym: 1 - iundefsym: 1 - nundefsym: 0 - tocoff: 0 - ntoc: 0 - modtaboff: 0 - nmodtab: 0 - extrefsymoff: 0 - nextrefsyms: 0 - indirectsymoff: 0 - nindirectsyms: 0 - extreloff: 0 - nextrel: 0 - locreloff: 0 - nlocrel: 0 - - cmd: LC_UUID - cmdsize: 24 - uuid: EA09CDDC-A3EA-3EB9-8C4F-334077FE6E5A - - cmd: LC_BUILD_VERSION - cmdsize: 32 - platform: 1 - minos: 659200 - sdk: 659200 - ntools: 1 - Tools: - - tool: 3 - version: 34734080 - - cmd: LC_SOURCE_VERSION - cmdsize: 16 - version: 0 - - cmd: LC_FUNCTION_STARTS - cmdsize: 16 - dataoff: 4120 - datasize: 8 - - cmd: LC_DATA_IN_CODE - cmdsize: 16 - dataoff: 4128 - datasize: 0 -LinkEditData: - ExportTrie: - TerminalSize: 0 - NodeOffset: 0 - Name: '' - Flags: 0x0000000000000000 - Address: 0x0000000000000000 - Other: 0x0000000000000000 - ImportName: '' - Children: - - TerminalSize: 3 - NodeOffset: 18 - Name: _goodbye_world - Flags: 0x0000000000000000 - Address: 0x0000000000000FF0 - Other: 0x0000000000000000 - ImportName: '' - NameList: - - n_strx: 2 - n_type: 0x0F - n_sect: 2 - n_desc: 0 - n_value: 4080 - StringTable: - - ' ' - - _goodbye_world - - '' - - '' - - '' - - '' - - '' - - '' - - '' -... diff --git a/lld/test/MachO/Inputs/hello-dylib.yaml b/lld/test/MachO/Inputs/hello-dylib.yaml deleted file mode 100644 index fde922856efa8..0000000000000 --- a/lld/test/MachO/Inputs/hello-dylib.yaml +++ /dev/null @@ -1,169 +0,0 @@ -## This yaml file was originally generated from linking the following source -## input with ld64: -## -## .section __TEXT,__cstring -## .globl _hello_world -## -## _hello_world: -## .asciz "Hello world!\n" -## -## When lld can produce dylibs, we will use that instead for our test setup. - ---- !mach-o -FileHeader: - magic: 0xFEEDFACF - cputype: 0x01000007 - cpusubtype: 0x00000003 - filetype: 0x00000006 - ncmds: 11 - sizeofcmds: 616 - flags: 0x00100085 - reserved: 0x00000000 -LoadCommands: - - cmd: LC_SEGMENT_64 - cmdsize: 232 - segname: __TEXT - vmaddr: 0 - vmsize: 4096 - fileoff: 0 - filesize: 4096 - maxprot: 5 - initprot: 5 - nsects: 2 - flags: 0 - Sections: - - sectname: __text - segname: __TEXT - addr: 0x0000000000000FF2 - size: 0 - offset: 0x00000FF2 - align: 0 - reloff: 0x00000000 - nreloc: 0 - flags: 0x80000400 - reserved1: 0x00000000 - reserved2: 0x00000000 - reserved3: 0x00000000 - content: '' - - sectname: __cstring - segname: __TEXT - addr: 0x0000000000000FF2 - size: 14 - offset: 0x00000FF2 - align: 0 - reloff: 0x00000000 - nreloc: 0 - flags: 0x00000002 - reserved1: 0x00000000 - reserved2: 0x00000000 - reserved3: 0x00000000 - content: 48656C6C6F20776F726C64210A00 - - cmd: LC_SEGMENT_64 - cmdsize: 72 - segname: __LINKEDIT - vmaddr: 4096 - vmsize: 4096 - fileoff: 4096 - filesize: 64 - maxprot: 1 - initprot: 1 - nsects: 0 - flags: 0 - - cmd: LC_ID_DYLIB - cmdsize: 56 - dylib: - name: 24 - timestamp: 1 - current_version: 0 - compatibility_version: 0 - PayloadString: '@executable_path/libhello.dylib' - ZeroPadBytes: 1 - - cmd: LC_DYLD_INFO_ONLY - cmdsize: 48 - rebase_off: 0 - rebase_size: 0 - bind_off: 0 - bind_size: 0 - weak_bind_off: 0 - weak_bind_size: 0 - lazy_bind_off: 0 - lazy_bind_size: 0 - export_off: 4096 - export_size: 24 - - cmd: LC_SYMTAB - cmdsize: 24 - symoff: 4128 - nsyms: 1 - stroff: 4144 - strsize: 16 - - cmd: LC_DYSYMTAB - cmdsize: 80 - ilocalsym: 0 - nlocalsym: 0 - iextdefsym: 0 - nextdefsym: 1 - iundefsym: 1 - nundefsym: 0 - tocoff: 0 - ntoc: 0 - modtaboff: 0 - nmodtab: 0 - extrefsymoff: 0 - nextrefsyms: 0 - indirectsymoff: 0 - nindirectsyms: 0 - extreloff: 0 - nextrel: 0 - locreloff: 0 - nlocrel: 0 - - cmd: LC_UUID - cmdsize: 24 - uuid: 4826226E-9210-3984-A388-D5BD6D6DB368 - - cmd: LC_BUILD_VERSION - cmdsize: 32 - platform: 1 - minos: 659200 - sdk: 659200 - ntools: 1 - Tools: - - tool: 3 - version: 34734080 - - cmd: LC_SOURCE_VERSION - cmdsize: 16 - version: 0 - - cmd: LC_FUNCTION_STARTS - cmdsize: 16 - dataoff: 4120 - datasize: 8 - - cmd: LC_DATA_IN_CODE - cmdsize: 16 - dataoff: 4128 - datasize: 0 -LinkEditData: - ExportTrie: - TerminalSize: 0 - NodeOffset: 0 - Name: '' - Flags: 0x0000000000000000 - Address: 0x0000000000000000 - Other: 0x0000000000000000 - ImportName: '' - Children: - - TerminalSize: 3 - NodeOffset: 16 - Name: _hello_world - Flags: 0x0000000000000000 - Address: 0x0000000000000FF2 - Other: 0x0000000000000000 - ImportName: '' - NameList: - - n_strx: 2 - n_type: 0x0F - n_sect: 2 - n_desc: 0 - n_value: 4082 - StringTable: - - ' ' - - _hello_world - - '' -... diff --git a/lld/test/MachO/Inputs/libfunction.s b/lld/test/MachO/Inputs/libfunction.s new file mode 100644 index 0000000000000..fe0b3879a41ab --- /dev/null +++ b/lld/test/MachO/Inputs/libfunction.s @@ -0,0 +1,6 @@ +.section __TEXT,__text +.globl _some_function + +_some_function: + mov $1, %rax + ret diff --git a/lld/test/MachO/Inputs/libgoodbye.s b/lld/test/MachO/Inputs/libgoodbye.s new file mode 100644 index 0000000000000..205f877fbf01f --- /dev/null +++ b/lld/test/MachO/Inputs/libgoodbye.s @@ -0,0 +1,5 @@ +.section __TEXT,__cstring +.globl _goodbye_world + +_goodbye_world: +.asciz "Goodbye world!\n" diff --git a/lld/test/MachO/Inputs/libhello.s b/lld/test/MachO/Inputs/libhello.s new file mode 100644 index 0000000000000..9fc880c85cb5a --- /dev/null +++ b/lld/test/MachO/Inputs/libhello.s @@ -0,0 +1,8 @@ +.section __TEXT,__cstring +.globl _hello_world, _hello_its_me + +_hello_world: +.asciz "Hello world!\n" + +_hello_its_me: +.asciz "Hello, it's me\n" diff --git a/lld/test/MachO/alignment-too-large.yaml b/lld/test/MachO/alignment-too-large.yaml index 18d133f558690..79950524bcf83 100644 --- a/lld/test/MachO/alignment-too-large.yaml +++ b/lld/test/MachO/alignment-too-large.yaml @@ -1,7 +1,7 @@ # RUN: yaml2obj %s -o %t.o # RUN: not lld -flavor darwinnew -o %t %t.o 2>&1 | FileCheck %s # -# CHECK: alignment 32 of section __text is too large +# CHECK: error: alignment 32 of section __text is too large --- !mach-o FileHeader: magic: 0xFEEDFACF diff --git a/lld/test/MachO/arch.s b/lld/test/MachO/arch.s index 313654937c801..e119eb910d947 100644 --- a/lld/test/MachO/arch.s +++ b/lld/test/MachO/arch.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-darwin %s -o %t.o # RUN: lld -flavor darwinnew -arch x86_64 -o /dev/null %t.o # RUN: not lld -flavor darwinnew -arch i386 -o /dev/null %t.o 2>&1 | FileCheck %s -# CHECK: missing or unsupported -arch i386 +# CHECK: error: missing or unsupported -arch i386 .text .global _main diff --git a/lld/test/MachO/duplicate-symbol.s b/lld/test/MachO/duplicate-symbol.s index 47f597f3ec024..d08f34fc0de01 100644 --- a/lld/test/MachO/duplicate-symbol.s +++ b/lld/test/MachO/duplicate-symbol.s @@ -3,7 +3,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t-dup.o # RUN: not lld -flavor darwinnew -o /dev/null %t-dup.o %t.o 2>&1 | FileCheck %s -# CHECK: duplicate symbol: _main +# CHECK: error: duplicate symbol: _main .text .global _main diff --git a/lld/test/MachO/dylib.s b/lld/test/MachO/dylib.s new file mode 100644 index 0000000000000..507a7de874237 --- /dev/null +++ b/lld/test/MachO/dylib.s @@ -0,0 +1,35 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o + +# RUN: lld -flavor darwinnew -dylib -install_name @executable_path/libfoo.dylib \ +# RUN: %t.o -o %t.dylib +# RUN: llvm-objdump --macho --dylib-id %t.dylib | FileCheck %s +# CHECK: @executable_path/libfoo.dylib + +## If we are building a dylib, we shouldn't error out even if we are passed +## a flag for a missing entry symbol (since dylibs don't have entry symbols). +## Also check that we come up with the right install name if one isn't +## specified. +# RUN: lld -flavor darwinnew -dylib %t.o -o %t.defaultInstallName.dylib -e missing_entry +# RUN: obj2yaml %t.defaultInstallName.dylib | FileCheck %s -DOUTPUT=%t.defaultInstallName.dylib --check-prefix=DEFAULT-INSTALL-NAME +# DEFAULT-INSTALL-NAME: [[OUTPUT]] + +## Check for the absence of load commands / segments that should not be in a +## dylib. +# RUN: llvm-objdump --macho --all-headers %t.dylib | FileCheck %s --check-prefix=NCHECK +# NCHECK-NOT: cmd LC_LOAD_DYLINKER +# NCHECK-NOT: cmd LC_MAIN +# NCHECK-NOT: segname __PAGEZERO + +# RUN: llvm-objdump --syms --exports-trie %t.dylib | \ +# RUN: FileCheck %s --check-prefix=EXPORTS +# EXPORTS-LABEL: SYMBOL TABLE: +# EXPORTS: [[#%x, HELLO_WORLD_ADDR:]] {{.*}} _hello_world +# EXPORTS-LABEL: Exports trie: +# EXPORTS: 0x{{0*}}[[#%X, HELLO_WORLD_ADDR]] _hello_world + +.section __TEXT,__cstring +.globl _hello_world + +_hello_world: +.asciz "Hello world!\n" diff --git a/lld/test/MachO/dylink.s b/lld/test/MachO/dylink.s index 6ec7ed05a40fb..26302013035d7 100644 --- a/lld/test/MachO/dylink.s +++ b/lld/test/MachO/dylink.s @@ -1,7 +1,13 @@ # REQUIRES: x86 # RUN: mkdir -p %t -# RUN: yaml2obj %p/Inputs/hello-dylib.yaml -o %t/libhello.dylib -# RUN: yaml2obj %p/Inputs/goodbye-dylib.yaml -o %t/libgoodbye.dylib +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libhello.s \ +# RUN: -o %t/libhello.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libgoodbye.s \ +# RUN: -o %t/libgoodbye.o +# RUN: lld -flavor darwinnew -dylib -install_name \ +# RUN: @executable_path/libhello.dylib %t/libhello.o -o %t/libhello.dylib +# RUN: lld -flavor darwinnew -dylib -install_name \ +# RUN: @executable_path/libgoodbye.dylib %t/libgoodbye.o -o %t/libgoodbye.dylib # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/dylink.o # RUN: lld -flavor darwinnew -o %t/dylink -Z -L%t -lhello -lgoodbye %t/dylink.o # RUN: llvm-objdump --bind -d %t/dylink | FileCheck %s @@ -9,12 +15,16 @@ # CHECK: movq [[#%u, HELLO_OFF:]](%rip), %rsi # CHECK-NEXT: [[#%x, HELLO_RIP:]]: +# CHECK: movq [[#%u, HELLO_ITS_ME_OFF:]](%rip), %rsi +# CHECK-NEXT: [[#%x, HELLO_ITS_ME_RIP:]]: + # CHECK: movq [[#%u, GOODBYE_OFF:]](%rip), %rsi # CHECK-NEXT: [[#%x, GOODBYE_RIP:]]: # CHECK-LABEL: Bind table: -# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]] pointer 0 libhello _hello_world -# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]] pointer 0 libgoodbye _goodbye_world +# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_RIP + HELLO_OFF]] pointer 0 libhello _hello_world +# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, HELLO_ITS_ME_RIP + HELLO_ITS_ME_OFF]] pointer 0 libhello _hello_its_me +# CHECK-DAG: __DATA_CONST __got 0x{{0*}}[[#%x, GOODBYE_RIP + GOODBYE_OFF]] pointer 0 libgoodbye _goodbye_world .section __TEXT,__text .globl _main @@ -26,6 +36,12 @@ _main: mov $13, %rdx # length of str syscall + movl $0x2000004, %eax # write() syscall + mov $1, %rdi # stdout + movq _hello_its_me@GOTPCREL(%rip), %rsi + mov $15, %rdx # length of str + syscall + movl $0x2000004, %eax # write() syscall mov $1, %rdi # stdout movq _goodbye_world@GOTPCREL(%rip), %rsi diff --git a/lld/test/MachO/entry-symbol.s b/lld/test/MachO/entry-symbol.s index 1890f407f1adc..31ad84047a5c5 100644 --- a/lld/test/MachO/entry-symbol.s +++ b/lld/test/MachO/entry-symbol.s @@ -2,10 +2,10 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: lld -flavor darwinnew -o /dev/null %t.o -e _not_main # RUN: not lld -flavor darwinnew -o /dev/null %t.o -e _missing 2>&1 | FileCheck %s -# RUN: not lld -flavor darwinnew -o /dev/null %t.o 2>&1 | FileCheck %s --check-prefix=DEFAULT_ENTRY +# RUN: not lld -flavor darwinnew -o /dev/null %t.o 2>&1 | FileCheck %s --check-prefix=DEFAULT-ENTRY -# CHECK: undefined symbol: _missing -# DEFAULT_ENTRY: undefined symbol: _main +# CHECK: error: undefined symbol: _missing +# DEFAULT-ENTRY: error: undefined symbol: _main .text .global _not_main diff --git a/lld/test/MachO/export-trie.s b/lld/test/MachO/export-trie.s new file mode 100644 index 0000000000000..fc1f6d110dba7 --- /dev/null +++ b/lld/test/MachO/export-trie.s @@ -0,0 +1,44 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -dylib %t.o -o %t.dylib + +# RUN: llvm-objdump --syms --exports-trie %t.dylib | \ +# RUN: FileCheck %s --check-prefix=EXPORTS +# EXPORTS-LABEL: SYMBOL TABLE: +# EXPORTS-DAG: [[#%x, HELLO_ADDR:]] {{.*}} _hello +# EXPORTS-DAG: [[#%x, HELLO_WORLD_ADDR:]] {{.*}} _hello_world +# EXPORTS-DAG: [[#%x, HELLO_ITS_ME_ADDR:]] {{.*}} _hello_its_me +# EXPORTS-DAG: [[#%x, HELLO_ITS_YOU_ADDR:]] {{.*}} _hello_its_you +# EXPORTS-LABEL: Exports trie: +# EXPORTS-DAG: 0x{{0*}}[[#%X, HELLO_ADDR]] _hello +# EXPORTS-DAG: 0x{{0*}}[[#%X, HELLO_WORLD_ADDR]] _hello_world +# EXPORTS-DAG: 0x{{0*}}[[#%x, HELLO_ITS_ME_ADDR:]] _hello_its_me +# EXPORTS-DAG: 0x{{0*}}[[#%x, HELLO_ITS_YOU_ADDR:]] _hello_its_you + +## Check that we are sharing prefixes in the trie. +# RUN: obj2yaml %t.dylib | FileCheck %s +# CHECK-LABEL: ExportTrie: +# CHECK: Name: '' +# CHECK: Name: _hello +# CHECK: Name: _ +# CHECK: Name: world +# CHECK: Name: its_ +# CHECK: Name: me +# CHECK: Name: you + +.section __TEXT,__cstring +.globl _hello, _hello_world, _hello_its_me, _hello_its_you + +## Test for when an entire symbol name is a prefix of another. +_hello: +.asciz "Hello!\n" + +_hello_world: +.asciz "Hello world!\n" + +.data +_hello_its_me: +.asciz "Hello, it's me\n" + +_hello_its_you: +.asciz "Hello, it's you\n" diff --git a/lld/test/MachO/fat-arch.s b/lld/test/MachO/fat-arch.s new file mode 100644 index 0000000000000..7cdd800728ea2 --- /dev/null +++ b/lld/test/MachO/fat-arch.s @@ -0,0 +1,16 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=i386-apple-darwin %s -o %t.i386.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.x86_64.o +# RUN: llvm-lipo %t.i386.o %t.x86_64.o -create -o %t.fat.o +# RUN: lld -flavor darwinnew -arch x86_64 -o /dev/null %t.fat.o + +# RUN: llvm-lipo %t.i386.o -create -o %t.noarch.o +# RUN: not lld -flavor darwinnew -arch x86_64 -o /dev/null %t.noarch.o 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.noarch.o +# CHECK: error: unable to find matching architecture in [[FILE]] + +.text +.global _main +_main: + mov $0, %eax + ret diff --git a/lld/test/MachO/invalid-executable.s b/lld/test/MachO/invalid-executable.s index 9ad9f51e41700..d8d7accf49c6d 100644 --- a/lld/test/MachO/invalid-executable.s +++ b/lld/test/MachO/invalid-executable.s @@ -1,8 +1,8 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-darwin %s -o %t.o # RUN: lld -flavor darwinnew -o %t %t.o -# RUN: not lld -flavor darwinnew -o /dev/null %t 2>&1 | FileCheck %s -# CHECK: unhandled file type +# RUN: not lld -flavor darwinnew -o /dev/null %t 2>&1 | FileCheck %s -DFILE=%t +# CHECK: error: [[FILE]]: unhandled file type .text .global _main diff --git a/lld/test/MachO/invalid-fat-narch.s b/lld/test/MachO/invalid-fat-narch.s new file mode 100644 index 0000000000000..265b7b8fa41c3 --- /dev/null +++ b/lld/test/MachO/invalid-fat-narch.s @@ -0,0 +1,12 @@ +# REQUIRES: x86 +# RUN: yaml2obj %s -o %t.o +# RUN: not lld -flavor darwinnew -arch x86_64 -o /dev/null %t.o 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.o +# CHECK: error: [[FILE]]: fat_arch struct extends beyond end of file + +!fat-mach-o +FatHeader: + magic: 0xCAFEBABE + nfat_arch: 2 +FatArchs: +Slices: diff --git a/lld/test/MachO/invalid-fat-offset.s b/lld/test/MachO/invalid-fat-offset.s new file mode 100644 index 0000000000000..fc662e3f56c71 --- /dev/null +++ b/lld/test/MachO/invalid-fat-offset.s @@ -0,0 +1,22 @@ +# REQUIRES: x86 +# RUN: yaml2obj %s -o %t.o +# RUN: not lld -flavor darwinnew -arch x86_64 -o /dev/null %t.o 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.o +# CHECK: error: [[FILE]]: slice extends beyond end of file + +!fat-mach-o +FatHeader: + magic: 0xCAFEBABE + nfat_arch: 2 +FatArchs: + - cputype: 0x01000007 + cpusubtype: 0x00000003 + offset: 0x0000000000001000 + size: 0 + align: 12 + - cputype: 0x00000007 + cpusubtype: 0x00000003 + offset: 0x000000000000B000 + size: 0 + align: 12 +Slices: diff --git a/lld/test/MachO/load-commands.s b/lld/test/MachO/load-commands.s index 298cb76cc55a4..c9f5d9b5c2186 100644 --- a/lld/test/MachO/load-commands.s +++ b/lld/test/MachO/load-commands.s @@ -1,14 +1,19 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: lld -flavor darwinnew -o %t %t.o -# RUN: obj2yaml %t | FileCheck %s -# Check for the presence of a couple of load commands that are essential for -# a working binary. +## Check for the presence of load commands that are essential for a working +## executable. +# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s +# CHECK-DAG: cmd LC_DYLD_INFO_ONLY +# CHECK-DAG: cmd LC_SYMTAB +# CHECK-DAG: cmd LC_DYSYMTAB +# CHECK-DAG: cmd LC_MAIN +# CHECK-DAG: cmd LC_LOAD_DYLINKER -# CHECK-DAG: cmd: LC_DYLD_INFO_ONLY -# CHECK-DAG: cmd: LC_SYMTAB -# CHECK-DAG: cmd: LC_DYSYMTAB +## Check for the absence of load commands that should not be in an executable. +# RUN: llvm-objdump --macho --all-headers %t | FileCheck %s --check-prefix=NCHECK +# NCHECK-NOT: cmd: LC_ID_DYLIB .text .global _main diff --git a/lld/test/MachO/missing-dylib.s b/lld/test/MachO/missing-dylib.s index d66ff52564010..ad7e51130c402 100644 --- a/lld/test/MachO/missing-dylib.s +++ b/lld/test/MachO/missing-dylib.s @@ -2,4 +2,4 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: not lld -flavor darwinnew -Z -o %t -lmissing %t.o 2>&1 | FileCheck %s -# CHECK: library not found for -lmissing +# CHECK: error: library not found for -lmissing diff --git a/lld/test/MachO/no-exports-dylib.s b/lld/test/MachO/no-exports-dylib.s new file mode 100644 index 0000000000000..896c31ef3c2fe --- /dev/null +++ b/lld/test/MachO/no-exports-dylib.s @@ -0,0 +1,6 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -dylib %t.o -o %t.dylib + +# RUN: obj2yaml %t.dylib | FileCheck %s +# CHECK: export_size: 0 diff --git a/lld/test/MachO/no-id-dylink.s b/lld/test/MachO/no-id-dylink.s index 2fcd10da384cb..7353b408b9b10 100644 --- a/lld/test/MachO/no-id-dylink.s +++ b/lld/test/MachO/no-id-dylink.s @@ -3,7 +3,7 @@ # RUN: yaml2obj %p/Inputs/no-id-dylib.yaml -o %t/libnoid.dylib # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/no-id-dylink.o # RUN: not lld -flavor darwinnew -o %t/no-id-dylink -Z -L%t -lnoid %t/no-id-dylink.o 2>&1 | FileCheck %s -# CHECK: dylib {{.*}}libnoid.dylib missing LC_ID_DYLIB load command +# CHECK: error: dylib {{.*}}libnoid.dylib missing LC_ID_DYLIB load command .text .globl _main diff --git a/lld/test/MachO/no-such-file.s b/lld/test/MachO/no-such-file.s index 840b8f9e8a470..0122c6105fba8 100644 --- a/lld/test/MachO/no-such-file.s +++ b/lld/test/MachO/no-such-file.s @@ -1,4 +1,4 @@ # REQUIRES: x86 # RUN: not lld -flavor darwinnew -o /dev/null %t-no-such-file.o 2>&1 | FileCheck %s -# CHECK: cannot open {{.*}}no-such-file.o +# CHECK: error: cannot open {{.*}}no-such-file.o diff --git a/lld/test/MachO/relocations.s b/lld/test/MachO/relocations.s index cf38f96d0177b..7f41a6c93283f 100644 --- a/lld/test/MachO/relocations.s +++ b/lld/test/MachO/relocations.s @@ -1,19 +1,34 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: lld -flavor darwinnew -o %t %t.o -# RUN: llvm-objdump -d %t | FileCheck %s +# RUN: llvm-objdump --section-headers --syms -d %t | FileCheck %s -# CHECK: leaq 17(%rip), %rsi +# CHECK-LABEL: Sections: +# CHECK: __cstring {{[0-9a-z]+}} [[#%x, CSTRING_ADDR:]] + +# CHECK-LABEL: SYMBOL TABLE: +# CHECK: [[#%x, F_ADDR:]] {{.*}} _f + +# CHECK-LABEL: <_main>: +## Test X86_64_RELOC_BRANCH +# CHECK: callq 0x[[#%x, F_ADDR]] <_f> +## Test X86_64_RELOC_SIGNED +# CHECK: leaq [[#%u, STR_OFF:]](%rip), %rsi +# CHECK-NEXT: [[#%x, CSTRING_ADDR - STR_OFF]] .section __TEXT,__text -.globl _main +.globl _main, _f _main: + callq _f + mov $0, %rax + ret + +_f: movl $0x2000004, %eax # write() syscall mov $1, %rdi # stdout leaq str(%rip), %rsi mov $13, %rdx # length of str syscall - mov $0, %rax ret .section __TEXT,__cstring diff --git a/lld/test/MachO/section-merge.s b/lld/test/MachO/section-merge.s new file mode 100644 index 0000000000000..bd3563718aa98 --- /dev/null +++ b/lld/test/MachO/section-merge.s @@ -0,0 +1,35 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libhello.s \ +# RUN: -o %t/libhello.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libgoodbye.s \ +# RUN: -o %t/libgoodbye.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libfunction.s \ +# RUN: -o %t/libfunction.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s \ +# RUN: -o %t/main.o +# RUN: lld -flavor darwinnew -o %t/output %t/libfunction.o %t/libgoodbye.o %t/libhello.o %t/main.o + +# RUN: llvm-objdump --syms %t/output | FileCheck %s +# CHECK: SYMBOL TABLE: +# CHECK-DAG: {{[0-9a-z]+}} g O __TEXT,__cstring _goodbye_world +# CHECK-DAG: {{[0-9a-z]+}} g O __TEXT,__cstring _hello_its_me +# CHECK-DAG: {{[0-9a-z]+}} g O __TEXT,__cstring _hello_world +# CHECK-DAG: {{[0-9a-z]+}} g F __TEXT,__text _main +# CHECK-DAG: {{[0-9a-z]+}} g F __TEXT,__text _some_function + +# RUN: llvm-objdump -d %t/output | FileCheck %s --check-prefix DATA +# DATA: Disassembly of section __TEXT,__text: +# DATA: {{0*}}[[#%x,BASE:]] <_some_function>: +# DATA-NEXT: [[#BASE]]: 48 c7 c0 01 00 00 00 movq $1, %rax +# DATA-NEXT: [[#BASE + 0x7]]: c3 retq +# DATA: {{0*}}[[#BASE + 0x8]] <_main>: +# DATA-NEXT: [[#BASE + 0x8]]: 48 c7 c0 00 00 00 00 movq $0, %rax +# DATA-NEXT: [[#BASE + 0xf]]: c3 retq + +.section __TEXT,__text +.global _main + +_main: + mov $0, %rax + ret diff --git a/lld/test/MachO/segments.s b/lld/test/MachO/segments.s index 69e3d9f030b0a..15e6c7968f853 100644 --- a/lld/test/MachO/segments.s +++ b/lld/test/MachO/segments.s @@ -3,13 +3,46 @@ # RUN: lld -flavor darwinnew -o %t %t.o # RUN: llvm-readobj --macho-segment %t | FileCheck %s -# These segments must always be present. -# CHECK-DAG: Name: __PAGEZERO -# CHECK-DAG: Name: __LINKEDIT -# CHECK-DAG: Name: __TEXT +## These two segments must always be present at the start of an executable. +# CHECK-NOT: Segment { +# CHECK: Segment { +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK: Name: __PAGEZERO +# CHECK: Size: 72 +# CHECK: vmaddr: +# CHECK: vmsize: +# CHECK: fileoff: 0 +# CHECK: filesize: 0 +## The kernel won't execute a binary with the wrong protections for __PAGEZERO. +# CHECK: maxprot: --- +# CHECK: initprot: --- +# CHECK: nsects: 0 +# CHECK: flags: 0x0 +# CHECK: } +# CHECK: Segment { +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK: Name: __TEXT +# CHECK: Size: 152 +# CHECK: vmaddr: +# CHECK: vmsize: +## dyld3 assumes that the __TEXT segment starts from the file header +# CHECK: fileoff: 0 +# CHECK: filesize: +# CHECK: maxprot: rwx +# CHECK: initprot: r-x +# CHECK: nsects: 1 +# CHECK: flags: 0x0 +# CHECK: } -# Check that we handle max-length names correctly. -# CHECK-DAG: Name: maxlen_16ch_name +## Check that we handle max-length names correctly. +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK-NEXT: Name: maxlen_16ch_name + +## This segment must always be present at the end of an executable. +# CHECK: Name: __LINKEDIT +# CHECK: maxprot: rwx +# CHECK: initprot: r-- +# CHECK-NOT: Cmd: LC_SEGMENT_64 .text .global _main diff --git a/lld/test/MachO/silent-ignore.test b/lld/test/MachO/silent-ignore.test index e863aac0b6251..de95cff01572f 100644 --- a/lld/test/MachO/silent-ignore.test +++ b/lld/test/MachO/silent-ignore.test @@ -5,4 +5,4 @@ RUN: -no_deduplicate \ RUN: -lto_library /lib/foo \ RUN: -macosx_version_min 0 RUN: not lld -flavor darwinnew -v --not-an-ignored-argument 2>&1 | FileCheck %s -CHECK: unknown argument: --not-an-ignored-argument +CHECK: error: unknown argument: --not-an-ignored-argument diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s new file mode 100644 index 0000000000000..44a016912bd6e --- /dev/null +++ b/lld/test/MachO/symtab.s @@ -0,0 +1,54 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: llvm-readobj -symbols %t | FileCheck %s + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _main +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: bar +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: foo +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data +# CHECK-NEXT: RefType: +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: +# CHECK-NEXT: } +# CHECK-NEXT: ] + +.data +.global foo +foo: + .asciz "Hello world!\n" + +.text +.global bar +.global _main + +_main: + mov $0, %rax + ret + +bar: + mov $2, %rax + ret diff --git a/lld/test/MachO/text-segment.s b/lld/test/MachO/text-segment.s deleted file mode 100644 index a3c7edbc61bff..0000000000000 --- a/lld/test/MachO/text-segment.s +++ /dev/null @@ -1,15 +0,0 @@ -# REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o -# RUN: lld -flavor darwinnew -o %t %t.o -# RUN: llvm-readobj --macho-segment %t | FileCheck %s - -# CHECK: Name: __TEXT -# CHECK-NOT: } -# dyld3 assumes that the __TEXT segment starts from the file header -# CHECK: fileoff: 0 - -.text -.global _main -_main: - mov $0, %rax - ret diff --git a/lld/test/MachO/x86-64-reloc-signed.s b/lld/test/MachO/x86-64-reloc-signed.s new file mode 100644 index 0000000000000..9ff91567d7641 --- /dev/null +++ b/lld/test/MachO/x86-64-reloc-signed.s @@ -0,0 +1,37 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: llvm-objdump -d %t | FileCheck %s + +# CHECK: <_main>: +# CHECK-NEXT: movl {{.*}} # 2000 <_s> +# CHECK-NEXT: callq {{.*}} +# CHECK-NEXT: movl {{.*}} # 2002 <_s+0x2> +# CHECK-NEXT: callq {{.*}} +# CHECK-NEXT: movb {{.*}} # 2000 <_s> +# CHECK-NEXT: callq {{.*}} + +.section __TEXT,__text +.globl _main +_main: + movl $0x434241, _s(%rip) # X86_64_RELOC_SIGNED_4 + callq _f + movl $0x44, _s+2(%rip) # X86_64_RELOC_SIGNED_2 + callq _f + movb $0x45, _s(%rip) # X86_64_RELOC_SIGNED_1 + callq _f + xorq %rax, %rax + ret + +_f: + movl $0x2000004, %eax # write() syscall + mov $1, %rdi # stdout + leaq _s(%rip), %rsi + mov $3, %rdx # length + syscall + ret + +.section __DATA,__data +.globl _s +_s: + .space 5 diff --git a/lld/test/Unit/lit.site.cfg.py.in b/lld/test/Unit/lit.site.cfg.py.in index cc12117ad0bfe..d96e20f11d0a6 100644 --- a/lld/test/Unit/lit.site.cfg.py.in +++ b/lld/test/Unit/lit.site.cfg.py.in @@ -11,7 +11,7 @@ config.lld_src_root = "@LLD_SOURCE_DIR@" config.lld_libs_dir = "@LLVM_LIBRARY_OUTPUT_INTDIR@" config.lld_tools_dir = "@LLVM_RUNTIME_OUTPUT_INTDIR@" config.target_triple = "@TARGET_TRIPLE@" -config.python_executable = "@PYTHON_EXECUTABLE@" +config.python_executable = "@Python3_EXECUTABLE@" # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time. diff --git a/lld/test/lit.site.cfg.py.in b/lld/test/lit.site.cfg.py.in index 02840f8d6a303..4aa2fcda73bb4 100644 --- a/lld/test/lit.site.cfg.py.in +++ b/lld/test/lit.site.cfg.py.in @@ -13,7 +13,7 @@ config.lld_obj_root = "@LLD_BINARY_DIR@" config.lld_libs_dir = "@LLVM_LIBRARY_OUTPUT_INTDIR@" config.lld_tools_dir = "@LLVM_RUNTIME_OUTPUT_INTDIR@" config.target_triple = "@TARGET_TRIPLE@" -config.python_executable = "@PYTHON_EXECUTABLE@" +config.python_executable = "@Python3_EXECUTABLE@" config.have_zlib = @HAVE_LIBZ@ config.sizeof_void_p = @CMAKE_SIZEOF_VOID_P@ diff --git a/lld/test/mach-o/Inputs/linker-as-ld.yaml b/lld/test/mach-o/Inputs/linker-as-ld.yaml deleted file mode 100644 index 0463154fcf289..0000000000000 --- a/lld/test/mach-o/Inputs/linker-as-ld.yaml +++ /dev/null @@ -1,6 +0,0 @@ ---- !mach-o -arch: x86_64 -file-type: MH_DYLIB -install-name: /usr/lib/libSystem.B.dylib -exports: - - name: dyld_stub_binder diff --git a/lld/test/mach-o/linker-as-ld.yaml b/lld/test/mach-o/linker-as-ld.yaml deleted file mode 100644 index 21afdb2ea02eb..0000000000000 --- a/lld/test/mach-o/linker-as-ld.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# REQUIRES: system-linker-mach-o -# -# RUN: rm -rf %T/ld && ln -s `which lld` %T/ld \ -# RUN: && %T/ld -arch x86_64 -macosx_version_min 10.8 %s \ -# RUN: %p/Inputs/linker-as-ld.yaml -o %t \ -# RUN: && llvm-nm %t | FileCheck %s -# -# Test linker run as "ld" on darwin works as darwin linker. -# - ---- !mach-o -arch: x86_64 -file-type: MH_OBJECT -flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] -has-UUID: false -OS: unknown -sections: - - segment: __TEXT - section: __text - type: S_REGULAR - attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ] - address: 0x0000000000000000 - content: [ 0xC3 ] -global-symbols: - - name: _main - type: N_SECT - scope: [ N_EXT ] - sect: 1 - value: 0x0000000000000000 -... - -# CHECK: T _main diff --git a/lld/test/wasm/globals.s b/lld/test/wasm/globals.s new file mode 100644 index 0000000000000..ec8d247779de1 --- /dev/null +++ b/lld/test/wasm/globals.s @@ -0,0 +1,53 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld %t.o -o %t.wasm +# RUN: obj2yaml %t.wasm | FileCheck %s + +.globl _start +.globl read_global +.globl write_global + +.globaltype foo_global, i32 +.globaltype bar_global, f32 + +read_global: + .functype read_global () -> (i32) + global.get foo_global + end_function + +write_global: + .functype write_global (i32) -> () + local.get 0 + global.set foo_global + f32.const 1.0 + global.set bar_global + end_function + +_start: + .functype _start () -> () + i32.const 1 + call write_global + end_function + +foo_global: +bar_global: + +# CHECK: - Type: GLOBAL +# CHECK-NEXT: Globals: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Type: I32 +# CHECK-NEXT: Mutable: true +# CHECK-NEXT: InitExpr: +# CHECK-NEXT: Opcode: I32_CONST +# CHECK-NEXT: Value: 66560 +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Type: I32 +# CHECK-NEXT: Mutable: true +# CHECK-NEXT: InitExpr: +# CHECK-NEXT: Opcode: I32_CONST +# CHECK-NEXT: Value: 0 +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: Type: F32 +# CHECK-NEXT: Mutable: true +# CHECK-NEXT: InitExpr: +# CHECK-NEXT: Opcode: F32_CONST +# CHECK-NEXT: Value: 0 diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp index 58a291d1e9210..8a8f8d04bbda6 100644 --- a/lld/tools/lld/lld.cpp +++ b/lld/tools/lld/lld.cpp @@ -96,17 +96,9 @@ static bool isPETarget(std::vector &v) { } static Flavor parseProgname(StringRef progname) { -#if __APPLE__ - // Use Darwin driver for "ld" on Darwin. - if (progname == "ld") - return Darwin; -#endif - -#if LLVM_ON_UNIX - // Use GNU driver for "ld" on other Unix-like system. + // Use GNU driver for "ld" by default. if (progname == "ld") return Gnu; -#endif // Progname may be something like "lld-gnu". Parse it. SmallVector v; diff --git a/lld/wasm/WriterUtils.cpp b/lld/wasm/WriterUtils.cpp index 4f1bce61d9367..30e6bdf92ac93 100644 --- a/lld/wasm/WriterUtils.cpp +++ b/lld/wasm/WriterUtils.cpp @@ -100,6 +100,11 @@ void writeU32(raw_ostream &os, uint32_t number, const Twine &msg) { support::endian::write(os, number, support::little); } +void writeU64(raw_ostream &os, uint64_t number, const Twine &msg) { + debugWrite(os.tell(), msg + "[0x" + utohexstr(number) + "]"); + support::endian::write(os, number, support::little); +} + void writeValueType(raw_ostream &os, ValType type, const Twine &msg) { writeU8(os, static_cast(type), msg + "[type: " + toString(type) + "]"); @@ -141,6 +146,12 @@ void writeInitExpr(raw_ostream &os, const WasmInitExpr &initExpr) { case WASM_OPCODE_I64_CONST: writeSleb128(os, initExpr.Value.Int64, "literal (i64)"); break; + case WASM_OPCODE_F32_CONST: + writeU32(os, initExpr.Value.Float32, "literal (f32)"); + break; + case WASM_OPCODE_F64_CONST: + writeU64(os, initExpr.Value.Float64, "literal (f64)"); + break; case WASM_OPCODE_GLOBAL_GET: writeUleb128(os, initExpr.Value.Global, "literal (global index)"); break; diff --git a/lldb/bindings/headers.swig b/lldb/bindings/headers.swig index 42ccd36c96076..6e1668ea4c425 100644 --- a/lldb/bindings/headers.swig +++ b/lldb/bindings/headers.swig @@ -15,6 +15,7 @@ #include "lldb/API/SBBreakpointName.h" #include "lldb/API/SBBroadcaster.h" #include "lldb/API/SBCommandInterpreter.h" +#include "lldb/API/SBCommandInterpreterRunOptions.h" #include "lldb/API/SBCommandReturnObject.h" #include "lldb/API/SBCommunication.h" #include "lldb/API/SBCompileUnit.h" diff --git a/lldb/bindings/interface/SBCommandInterpreter.i b/lldb/bindings/interface/SBCommandInterpreter.i index ad2e8f1662cd9..498084ae3ab1f 100644 --- a/lldb/bindings/interface/SBCommandInterpreter.i +++ b/lldb/bindings/interface/SBCommandInterpreter.i @@ -8,70 +8,6 @@ namespace lldb { -%feature("docstring", -"SBCommandInterpreterRunOptions controls how the RunCommandInterpreter runs the code it is fed. -A default SBCommandInterpreterRunOptions object has: - StopOnContinue: false - StopOnError: false - StopOnCrash: false - EchoCommands: true - PrintResults: true - AddToHistory: true - -") SBCommandInterpreterRunOptions; -class SBCommandInterpreterRunOptions -{ -friend class SBDebugger; -public: - SBCommandInterpreterRunOptions(); - ~SBCommandInterpreterRunOptions(); - - bool - GetStopOnContinue () const; - - void - SetStopOnContinue (bool); - - bool - GetStopOnError () const; - - void - SetStopOnError (bool); - - bool - GetStopOnCrash () const; - - void - SetStopOnCrash (bool); - - bool - GetEchoCommands () const; - - void - SetEchoCommands (bool); - - bool - GetPrintResults () const; - - void - SetPrintResults (bool); - - bool - GetAddToHistory () const; - - void - SetAddToHistory (bool); -private: - lldb_private::CommandInterpreterRunOptions * - get () const; - - lldb_private::CommandInterpreterRunOptions & - ref () const; - - // This is set in the constructor and will always be valid. - mutable std::unique_ptr m_opaque_up; -}; - %feature("docstring", "SBCommandInterpreter handles/interprets commands for lldb. You get the command interpreter from the SBDebugger instance. For example (from test/ diff --git a/lldb/bindings/interface/SBCommandInterpreterRunOptions.i b/lldb/bindings/interface/SBCommandInterpreterRunOptions.i new file mode 100644 index 0000000000000..f9ccbbd24dbea --- /dev/null +++ b/lldb/bindings/interface/SBCommandInterpreterRunOptions.i @@ -0,0 +1,75 @@ +//===-- SWIG Interface for SBCommandInterpreter -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +namespace lldb { + +%feature("docstring", +"SBCommandInterpreterRunOptions controls how the RunCommandInterpreter runs the code it is fed. +A default SBCommandInterpreterRunOptions object has: + StopOnContinue: false + StopOnError: false + StopOnCrash: false + EchoCommands: true + PrintResults: true + AddToHistory: true + +") SBCommandInterpreterRunOptions; +class SBCommandInterpreterRunOptions +{ +friend class SBDebugger; +public: + SBCommandInterpreterRunOptions(); + ~SBCommandInterpreterRunOptions(); + + bool + GetStopOnContinue () const; + + void + SetStopOnContinue (bool); + + bool + GetStopOnError () const; + + void + SetStopOnError (bool); + + bool + GetStopOnCrash () const; + + void + SetStopOnCrash (bool); + + bool + GetEchoCommands () const; + + void + SetEchoCommands (bool); + + bool + GetPrintResults () const; + + void + SetPrintResults (bool); + + bool + GetAddToHistory () const; + + void + SetAddToHistory (bool); +private: + lldb_private::CommandInterpreterRunOptions * + get () const; + + lldb_private::CommandInterpreterRunOptions & + ref () const; + + // This is set in the constructor and will always be valid. + mutable std::unique_ptr m_opaque_up; +}; + +} // namespace lldb diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig index 025a3e33c024a..2df7a05b4f481 100644 --- a/lldb/bindings/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -22,6 +22,7 @@ %include "./interface/SBBreakpointName.i" %include "./interface/SBBroadcaster.i" %include "./interface/SBCommandInterpreter.i" +%include "./interface/SBCommandInterpreterRunOptions.i" %include "./interface/SBCommandReturnObject.i" %include "./interface/SBCommunication.i" %include "./interface/SBCompileUnit.i" diff --git a/lldb/cmake/modules/FindPythonInterpAndLibs.cmake b/lldb/cmake/modules/FindPythonInterpAndLibs.cmake index daf51ba54ad29..aae82a68bcfdf 100644 --- a/lldb/cmake/modules/FindPythonInterpAndLibs.cmake +++ b/lldb/cmake/modules/FindPythonInterpAndLibs.cmake @@ -9,19 +9,50 @@ if(PYTHON_LIBRARIES AND PYTHON_INCLUDE_DIRS AND PYTHON_EXECUTABLE AND SWIG_EXECU else() find_package(SWIG 2.0) if (SWIG_FOUND) - if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") + if(NOT CMAKE_VERSION VERSION_LESS 3.12) # Use PYTHON_HOME as a hint to find Python 3. set(Python3_ROOT_DIR "${PYTHON_HOME}") find_package(Python3 COMPONENTS Interpreter Development) - if (Python3_FOUND AND Python3_Interpreter_FOUND) + if(Python3_FOUND AND Python3_Interpreter_FOUND) set(PYTHON_LIBRARIES ${Python3_LIBRARIES}) set(PYTHON_INCLUDE_DIRS ${Python3_INCLUDE_DIRS}) set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE}) + + # The install name for the Python 3 framework in Xcode is relative to + # the framework's location and not the dylib itself. + # + # @rpath/Python3.framework/Versions/3.x/Python3 + # + # This means that we need to compute the path to the Python3.framework + # and use that as the RPATH instead of the usual dylib's directory. + # + # The check below shouldn't match Homebrew's Python framework as it is + # called Python.framework instead of Python3.framework. + if (APPLE AND Python3_LIBRARIES MATCHES "Python3.framework") + string(FIND "${Python3_LIBRARIES}" "Python3.framework" python_framework_pos) + string(SUBSTRING "${Python3_LIBRARIES}" "0" ${python_framework_pos} PYTHON_RPATH) + endif() + mark_as_advanced( PYTHON_LIBRARIES PYTHON_INCLUDE_DIRS PYTHON_EXECUTABLE + PYTHON_RPATH SWIG_EXECUTABLE) + elseif(NOT CMAKE_SYSTEM_NAME STREQUAL Windows) + # Use PYTHON_HOME as a hint to find Python 2. + set(Python2_ROOT_DIR "${PYTHON_HOME}") + find_package(Python2 COMPONENTS Interpreter Development) + if(Python2_FOUND AND Python2_Interpreter_FOUND) + set(PYTHON_LIBRARIES ${Python2_LIBRARIES}) + set(PYTHON_INCLUDE_DIRS ${Python2_INCLUDE_DIRS}) + set(PYTHON_EXECUTABLE ${Python2_EXECUTABLE}) + mark_as_advanced( + PYTHON_LIBRARIES + PYTHON_INCLUDE_DIRS + PYTHON_EXECUTABLE + SWIG_EXECUTABLE) + endif() endif() else() find_package(PythonInterp) @@ -49,6 +80,7 @@ else() message(STATUS "SWIG 2 or later is required for Python support in LLDB but could not be found") endif() + include(FindPackageHandleStandardArgs) find_package_handle_standard_args(PythonInterpAndLibs FOUND_VAR diff --git a/lldb/include/lldb/API/SBCommandInterpreter.h b/lldb/include/lldb/API/SBCommandInterpreter.h index e07eeb58bf6a1..a70e060bec996 100644 --- a/lldb/include/lldb/API/SBCommandInterpreter.h +++ b/lldb/include/lldb/API/SBCommandInterpreter.h @@ -16,52 +16,6 @@ namespace lldb { -class LLDB_API SBCommandInterpreterRunOptions { - friend class SBDebugger; - friend class SBCommandInterpreter; - -public: - SBCommandInterpreterRunOptions(); - ~SBCommandInterpreterRunOptions(); - - bool GetStopOnContinue() const; - - void SetStopOnContinue(bool); - - bool GetStopOnError() const; - - void SetStopOnError(bool); - - bool GetStopOnCrash() const; - - void SetStopOnCrash(bool); - - bool GetEchoCommands() const; - - void SetEchoCommands(bool); - - bool GetEchoCommentCommands() const; - - void SetEchoCommentCommands(bool echo); - - bool GetPrintResults() const; - - void SetPrintResults(bool); - - bool GetAddToHistory() const; - - void SetAddToHistory(bool); - -private: - lldb_private::CommandInterpreterRunOptions *get() const; - - lldb_private::CommandInterpreterRunOptions &ref() const; - - // This is set in the constructor and will always be valid. - mutable std::unique_ptr - m_opaque_up; -}; - class SBCommandInterpreter { public: enum { diff --git a/lldb/include/lldb/API/SBCommandInterpreterRunOptions.h b/lldb/include/lldb/API/SBCommandInterpreterRunOptions.h new file mode 100644 index 0000000000000..82d6feedc02e7 --- /dev/null +++ b/lldb/include/lldb/API/SBCommandInterpreterRunOptions.h @@ -0,0 +1,102 @@ +//===-- SBCommandInterpreterRunOptions.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBCOMMANDINTERPRETERRUNOPTIONS_H +#define LLDB_API_SBCOMMANDINTERPRETERRUNOPTIONS_H + +#include + +#include "lldb/API/SBDefines.h" + +namespace lldb_private { +class CommandInterpreterRunOptions; +class CommandInterpreterRunResult; +} // namespace lldb_private + +namespace lldb { + +class LLDB_API SBCommandInterpreterRunOptions { + friend class SBDebugger; + friend class SBCommandInterpreter; + +public: + SBCommandInterpreterRunOptions(); + ~SBCommandInterpreterRunOptions(); + + bool GetStopOnContinue() const; + + void SetStopOnContinue(bool); + + bool GetStopOnError() const; + + void SetStopOnError(bool); + + bool GetStopOnCrash() const; + + void SetStopOnCrash(bool); + + bool GetEchoCommands() const; + + void SetEchoCommands(bool); + + bool GetEchoCommentCommands() const; + + void SetEchoCommentCommands(bool echo); + + bool GetPrintResults() const; + + void SetPrintResults(bool); + + bool GetAddToHistory() const; + + void SetAddToHistory(bool); + + bool GetAutoHandleEvents() const; + + void SetAutoHandleEvents(bool); + + bool GetSpawnThread() const; + + void SetSpawnThread(bool); + +private: + lldb_private::CommandInterpreterRunOptions *get() const; + + lldb_private::CommandInterpreterRunOptions &ref() const; + + // This is set in the constructor and will always be valid. + mutable std::unique_ptr + m_opaque_up; +}; + +class LLDB_API SBCommandInterpreterRunResult { + friend class SBDebugger; + friend class SBCommandInterpreter; + +public: + SBCommandInterpreterRunResult(); + SBCommandInterpreterRunResult(const SBCommandInterpreterRunResult &rhs); + ~SBCommandInterpreterRunResult(); + + SBCommandInterpreterRunResult & + operator=(const SBCommandInterpreterRunResult &rhs); + + int GetNumberOfErrors() const; + lldb::CommandInterpreterResult GetResult() const; + +private: + SBCommandInterpreterRunResult( + const lldb_private::CommandInterpreterRunResult &rhs); + + // This is set in the constructor and will always be valid. + std::unique_ptr m_opaque_up; +}; + +} // namespace lldb + +#endif // LLDB_API_SBCOMMANDINTERPRETERRUNOPTIONS_H diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index 21fe77fa4f153..b3bfa230139c9 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -290,13 +290,50 @@ class LLDB_API SBDebugger { SBTypeSynthetic GetSyntheticForType(SBTypeNameSpecifier); + /// Run the command interpreter. + /// + /// \param[in] auto_handle_events + /// If true, automatically handle resulting events. This takes precedence + /// and overrides the corresponding option in + /// SBCommandInterpreterRunOptions. + /// + /// \param[in] spawn_thread + /// If true, start a new thread for IO handling. This takes precedence + /// and overrides the corresponding option in + /// SBCommandInterpreterRunOptions. void RunCommandInterpreter(bool auto_handle_events, bool spawn_thread); + /// Run the command interpreter. + /// + /// \param[in] auto_handle_events + /// If true, automatically handle resulting events. This takes precedence + /// and overrides the corresponding option in + /// SBCommandInterpreterRunOptions. + /// + /// \param[in] spawn_thread + /// If true, start a new thread for IO handling. This takes precedence + /// and overrides the corresponding option in + /// SBCommandInterpreterRunOptions. + /// + /// \param[in] options + /// Parameter collection of type SBCommandInterpreterRunOptions. + /// + /// \param[out] num_errors + /// The number of errors. + /// + /// \param[out] quit_requested + /// Whether a quit was requested. + /// + /// \param[out] stopped_for_crash + /// Whether the interpreter stopped for a crash. void RunCommandInterpreter(bool auto_handle_events, bool spawn_thread, SBCommandInterpreterRunOptions &options, int &num_errors, bool &quit_requested, bool &stopped_for_crash); + SBCommandInterpreterRunResult + RunCommandInterpreter(const SBCommandInterpreterRunOptions &options); + SBError RunREPL(lldb::LanguageType language, const char *repl_options); private: diff --git a/lldb/include/lldb/API/SBDefines.h b/lldb/include/lldb/API/SBDefines.h index 0ddf594e5cb50..a5b639c6dc73a 100644 --- a/lldb/include/lldb/API/SBDefines.h +++ b/lldb/include/lldb/API/SBDefines.h @@ -28,6 +28,7 @@ class LLDB_API SBBroadcaster; class LLDB_API SBCommand; class LLDB_API SBCommandInterpreter; class LLDB_API SBCommandInterpreterRunOptions; +class LLDB_API SBCommandInterpreterRunResult; class LLDB_API SBCommandPluginInterface; class LLDB_API SBCommandReturnObject; class LLDB_API SBCommunication; diff --git a/lldb/include/lldb/DataFormatters/StringPrinter.h b/lldb/include/lldb/DataFormatters/StringPrinter.h index 5842cde893d89..17c645f8637a1 100644 --- a/lldb/include/lldb/DataFormatters/StringPrinter.h +++ b/lldb/include/lldb/DataFormatters/StringPrinter.h @@ -24,6 +24,8 @@ class StringPrinter { enum class GetPrintableElementType { ASCII, UTF8 }; + enum class EscapeStyle { CXX, Swift }; + class DumpToStreamOptions { public: DumpToStreamOptions() = default; @@ -68,9 +70,9 @@ class StringPrinter { bool GetIgnoreMaxLength() const { return m_ignore_max_length; } - void SetLanguage(lldb::LanguageType l) { m_language_type = l; } + void SetEscapeStyle(EscapeStyle style) { m_escape_style = style; } - lldb::LanguageType GetLanguage() const { return m_language_type; } + EscapeStyle GetEscapeStyle() const { return m_escape_style; } private: /// The used output stream. @@ -93,12 +95,8 @@ class StringPrinter { /// True iff a zero bytes ('\0') should terminate the memory region that /// is being dumped. bool m_zero_is_terminator = true; - /// The language that the generated string literal is supposed to be valid - /// for. This changes for example what and how certain characters are - /// escaped. - /// For example, printing the a string containing only a quote (") char - /// with eLanguageTypeC would escape the quote character. - lldb::LanguageType m_language_type = lldb::eLanguageTypeUnknown; + /// The language-specific style for escaping special characters. + EscapeStyle m_escape_style = EscapeStyle::CXX; }; class ReadStringAndDumpToStreamOptions : public DumpToStreamOptions { @@ -147,71 +145,6 @@ class StringPrinter { bool m_is_truncated = false; }; - // I can't use a std::unique_ptr for this because the Deleter is a template - // argument there - // and I want the same type to represent both pointers I want to free and - // pointers I don't need to free - which is what this class essentially is - // It's very specialized to the needs of this file, and not suggested for - // general use - struct StringPrinterBufferPointer { - public: - typedef std::function Deleter; - - StringPrinterBufferPointer(std::nullptr_t ptr) - : m_data(nullptr), m_size(0), m_deleter() {} - - StringPrinterBufferPointer(const uint8_t *bytes, size_t size, - Deleter deleter = nullptr) - : m_data(bytes), m_size(size), m_deleter(deleter) {} - - StringPrinterBufferPointer(const char *bytes, size_t size, - Deleter deleter = nullptr) - : m_data(reinterpret_cast(bytes)), m_size(size), - m_deleter(deleter) {} - - StringPrinterBufferPointer(StringPrinterBufferPointer &&rhs) - : m_data(rhs.m_data), m_size(rhs.m_size), m_deleter(rhs.m_deleter) { - rhs.m_data = nullptr; - } - - ~StringPrinterBufferPointer() { - if (m_data && m_deleter) - m_deleter(m_data); - m_data = nullptr; - } - - const uint8_t *GetBytes() const { return m_data; } - - size_t GetSize() const { return m_size; } - - StringPrinterBufferPointer & - operator=(StringPrinterBufferPointer &&rhs) { - if (m_data && m_deleter) - m_deleter(m_data); - m_data = rhs.m_data; - m_size = rhs.m_size; - m_deleter = rhs.m_deleter; - rhs.m_data = nullptr; - return *this; - } - - private: - DISALLOW_COPY_AND_ASSIGN(StringPrinterBufferPointer); - - const uint8_t *m_data; - size_t m_size; - Deleter m_deleter; - }; - - typedef std::function - EscapingHelper; - typedef std::function - EscapingHelperGenerator; - - static EscapingHelper - GetDefaultEscapingHelper(GetPrintableElementType elem_type); - template static bool ReadStringAndDumpToStream(const ReadStringAndDumpToStreamOptions &options); diff --git a/lldb/include/lldb/Expression/Expression.h b/lldb/include/lldb/Expression/Expression.h index 1e4453de04ff7..aaac889e6ed2a 100644 --- a/lldb/include/lldb/Expression/Expression.h +++ b/lldb/include/lldb/Expression/Expression.h @@ -51,7 +51,9 @@ class Expression { /// Return the language that should be used when parsing. To use the /// default, return eLanguageTypeUnknown. - virtual lldb::LanguageType Language() { return lldb::eLanguageTypeUnknown; } + virtual lldb::LanguageType Language() const { + return lldb::eLanguageTypeUnknown; + } /// Return the Materializer that the parser should use when registering /// external values. diff --git a/lldb/include/lldb/Expression/UserExpression.h b/lldb/include/lldb/Expression/UserExpression.h index 8f3a505addead..8236c417f73a0 100644 --- a/lldb/include/lldb/Expression/UserExpression.h +++ b/lldb/include/lldb/Expression/UserExpression.h @@ -194,7 +194,7 @@ class UserExpression : public Expression { /// Return the language that should be used when parsing. To use the /// default, return eLanguageTypeUnknown. - lldb::LanguageType Language() override { return m_language; } + lldb::LanguageType Language() const override { return m_language; } /// Return the desired result type of the function, or eResultTypeAny if /// indifferent. diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h index 6a2ffc09cdd7d..a0a9bcb499690 100644 --- a/lldb/include/lldb/Interpreter/CommandInterpreter.h +++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h @@ -26,6 +26,32 @@ #include namespace lldb_private { +class CommandInterpreter; + +class CommandInterpreterRunResult { +public: + CommandInterpreterRunResult() + : m_num_errors(0), m_result(lldb::eCommandInterpreterResultSuccess) {} + + uint32_t GetNumErrors() const { return m_num_errors; } + + lldb::CommandInterpreterResult GetResult() const { return m_result; } + + bool IsResult(lldb::CommandInterpreterResult result) { + return m_result == result; + } + +protected: + friend CommandInterpreter; + + void IncrementNumberOfErrors() { m_num_errors++; } + + void SetResult(lldb::CommandInterpreterResult result) { m_result = result; } + +private: + int m_num_errors; + lldb::CommandInterpreterResult m_result; +}; class CommandInterpreterRunOptions { public: @@ -144,6 +170,20 @@ class CommandInterpreterRunOptions { m_add_to_history = add_to_history ? eLazyBoolYes : eLazyBoolNo; } + bool GetAutoHandleEvents() const { + return DefaultToYes(m_auto_handle_events); + } + + void SetAutoHandleEvents(bool auto_handle_events) { + m_auto_handle_events = auto_handle_events ? eLazyBoolYes : eLazyBoolNo; + } + + bool GetSpawnThread() const { return DefaultToNo(m_spawn_thread); } + + void SetSpawnThread(bool spawn_thread) { + m_spawn_thread = spawn_thread ? eLazyBoolYes : eLazyBoolNo; + } + LazyBool m_stop_on_continue; LazyBool m_stop_on_error; LazyBool m_stop_on_crash; @@ -152,6 +192,8 @@ class CommandInterpreterRunOptions { LazyBool m_print_results; LazyBool m_print_errors; LazyBool m_add_to_history; + LazyBool m_auto_handle_events; + LazyBool m_spawn_thread; private: static bool DefaultToYes(LazyBool flag) { @@ -426,8 +468,8 @@ class CommandInterpreter : public Broadcaster, bool IsActive(); - void RunCommandInterpreter(bool auto_handle_events, bool spawn_thread, - CommandInterpreterRunOptions &options); + CommandInterpreterRunResult + RunCommandInterpreter(CommandInterpreterRunOptions &options); void GetLLDBCommandsFromIOHandler(const char *prompt, IOHandlerDelegate &delegate, @@ -474,16 +516,10 @@ class CommandInterpreter : public Broadcaster, bool GetStopCmdSourceOnError() const; - uint32_t GetNumErrors() const { return m_num_errors; } - - bool GetQuitRequested() const { return m_quit_requested; } - lldb::IOHandlerSP GetIOHandler(bool force_create = false, CommandInterpreterRunOptions *options = nullptr); - bool GetStoppedForCrash() const { return m_stopped_for_crash; } - bool GetSpaceReplPrompts() const; protected: @@ -574,9 +610,7 @@ class CommandInterpreter : public Broadcaster, // the user has been told uint32_t m_command_source_depth; std::vector m_command_source_flags; - uint32_t m_num_errors; - bool m_quit_requested; - bool m_stopped_for_crash; + CommandInterpreterRunResult m_result; // The exit code the user has requested when calling the 'quit' command. // No value means the user hasn't set a custom exit code so far. diff --git a/lldb/include/lldb/Symbol/SymbolFile.h b/lldb/include/lldb/Symbol/SymbolFile.h index bc2b6b8f212e6..86c8af665d32a 100644 --- a/lldb/include/lldb/Symbol/SymbolFile.h +++ b/lldb/include/lldb/Symbol/SymbolFile.h @@ -136,12 +136,16 @@ class SymbolFile : public PluginInterface { /// \p comp_unit. Recursively also descends into the referenced external /// modules of any encountered compilation unit. /// + /// This function can be used to traverse Clang -gmodules debug + /// information, which is stored in DWARF files separate from the + /// object files. + /// /// \param comp_unit /// When this SymbolFile consists of multiple auxilliary /// SymbolFiles, for example, a Darwin debug map that references /// multiple .o files, comp_unit helps choose the auxilliary /// file. In most other cases comp_unit's symbol file is - /// identiacal with *this. + /// identical with *this. /// /// \param[in] lambda /// The lambda that should be applied to every function. The lambda can diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h index 118b4dcb79ab1..e6fb51e5bcf40 100644 --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -180,10 +180,6 @@ class Language : public PluginInterface { GetPossibleFormattersMatches(ValueObject &valobj, lldb::DynamicValueType use_dynamic); - virtual lldb_private::formatters::StringPrinter::EscapingHelper - GetStringPrinterEscapingHelper( - lldb_private::formatters::StringPrinter::GetPrintableElementType); - virtual std::unique_ptr GetTypeScavenger(); virtual const char *GetLanguageSpecificTypeLookupHelp(); diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h index 872e5301d9847..1b130cd02c3d4 100644 --- a/lldb/include/lldb/Target/Platform.h +++ b/lldb/include/lldb/Target/Platform.h @@ -435,9 +435,7 @@ class Platform : public PluginInterface { return lldb_private::ConstString(); } - virtual llvm::StringRef GetSDKPath(lldb_private::XcodeSDK sdk) { - return {}; - } + virtual llvm::StringRef GetSDKPath(lldb_private::XcodeSDK sdk) { return {}; } const std::string &GetRemoteURL() const { return m_remote_url; } diff --git a/lldb/include/lldb/Utility/RegisterValue.h b/lldb/include/lldb/Utility/RegisterValue.h index eeb3ce52a82bf..494f8be5391c0 100644 --- a/lldb/include/lldb/Utility/RegisterValue.h +++ b/lldb/include/lldb/Utility/RegisterValue.h @@ -26,7 +26,8 @@ struct RegisterInfo; class RegisterValue { public: - enum { kMaxRegisterByteSize = 64u }; + // big enough to support up to 256 byte AArch64 SVE + enum { kMaxRegisterByteSize = 256u }; enum Type { eTypeInvalid, @@ -261,7 +262,7 @@ class RegisterValue { struct { uint8_t bytes[kMaxRegisterByteSize]; // This must be big enough to hold any // register for any supported target. - uint8_t length; + uint16_t length; lldb::ByteOrder byte_order; } buffer; }; diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 543e454c27c8b..e5032a6cd97fa 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -1081,6 +1081,20 @@ enum TypeSummaryCapping { eTypeSummaryCapped = true, eTypeSummaryUncapped = false }; + +/// The result from a command interpreter run. +enum CommandInterpreterResult { + /// Command interpreter finished successfully. + eCommandInterpreterResultSuccess, + /// Stopped because the corresponding option was set and the inferior + /// crashed. + eCommandInterpreterResultInferiorCrash, + /// Stopped because the corresponding option was set and a command returned + /// an error. + eCommandInterpreterResultCommandError, + /// Stopped because quit was requested. + eCommandInterpreterResultQuitRequested, +}; } // namespace lldb #endif // LLDB_LLDB_ENUMERATIONS_H diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py index 3b06fa07d0482..790628d2b0fde 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py @@ -239,7 +239,7 @@ def continue_to_exit(self, exitCode=0): def attach(self, program=None, pid=None, waitFor=None, trace=None, initCommands=None, preRunCommands=None, stopCommands=None, - exitCommands=None, attachCommands=None): + exitCommands=None, attachCommands=None, coreFile=None): '''Build the default Makefile target, create the VSCode debug adaptor, and attach to the process. ''' @@ -257,7 +257,7 @@ def cleanup(): program=program, pid=pid, waitFor=waitFor, trace=trace, initCommands=initCommands, preRunCommands=preRunCommands, stopCommands=stopCommands, exitCommands=exitCommands, - attachCommands=attachCommands) + attachCommands=attachCommands, coreFile=coreFile) if not (response and response['success']): self.assertTrue(response['success'], 'attach failed (%s)' % (response['message'])) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index 805de43f25f37..643a313dca1cb 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -450,7 +450,7 @@ def replay_packets(self, replay_file_path): def request_attach(self, program=None, pid=None, waitFor=None, trace=None, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, - attachCommands=None): + attachCommands=None, coreFile=None): args_dict = {} if pid is not None: args_dict['pid'] = pid @@ -471,6 +471,8 @@ def request_attach(self, program=None, pid=None, waitFor=None, trace=None, args_dict['exitCommands'] = exitCommands if attachCommands: args_dict['attachCommands'] = attachCommands + if coreFile: + args_dict['coreFile'] = coreFile command_dict = { 'command': 'attach', 'type': 'request', diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index f8ed1b37f4fa0..b0ada3ef81451 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -29,6 +29,7 @@ add_lldb_library(liblldb SHARED ${option_framework} SBBreakpointOptionCommon.cpp SBBroadcaster.cpp SBCommandInterpreter.cpp + SBCommandInterpreterRunOptions.cpp SBCommandReturnObject.cpp SBCommunication.cpp SBCompileUnit.cpp @@ -120,6 +121,10 @@ if(LLDB_ENABLE_PYTHON AND (BUILD_SHARED_LIBS OR LLVM_LINK_LLVM_DYLIB) AND UNIX A set_property(TARGET liblldb APPEND PROPERTY INSTALL_RPATH "\$ORIGIN/../../../../lib${LLVM_LIBDIR_SUFFIX}") endif() +if(PYTHON_RPATH) + set_property(TARGET liblldb APPEND PROPERTY INSTALL_RPATH "${PYTHON_RPATH}") +endif() + if (MSVC) set_source_files_properties(SBReproducer.cpp PROPERTIES COMPILE_FLAGS /bigobj) endif() diff --git a/lldb/source/API/SBCommandInterpreter.cpp b/lldb/source/API/SBCommandInterpreter.cpp index 14d738b276808..d06b33c760fcd 100644 --- a/lldb/source/API/SBCommandInterpreter.cpp +++ b/lldb/source/API/SBCommandInterpreter.cpp @@ -17,6 +17,7 @@ #include "lldb/API/SBBroadcaster.h" #include "lldb/API/SBCommandInterpreter.h" +#include "lldb/API/SBCommandInterpreterRunOptions.h" #include "lldb/API/SBCommandReturnObject.h" #include "lldb/API/SBEvent.h" #include "lldb/API/SBExecutionContext.h" @@ -31,122 +32,6 @@ using namespace lldb; using namespace lldb_private; -SBCommandInterpreterRunOptions::SBCommandInterpreterRunOptions() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommandInterpreterRunOptions); - - m_opaque_up.reset(new CommandInterpreterRunOptions()); -} - -SBCommandInterpreterRunOptions::~SBCommandInterpreterRunOptions() = default; - -bool SBCommandInterpreterRunOptions::GetStopOnContinue() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetStopOnContinue); - - return m_opaque_up->GetStopOnContinue(); -} - -void SBCommandInterpreterRunOptions::SetStopOnContinue(bool stop_on_continue) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnContinue, - (bool), stop_on_continue); - - m_opaque_up->SetStopOnContinue(stop_on_continue); -} - -bool SBCommandInterpreterRunOptions::GetStopOnError() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetStopOnError); - - return m_opaque_up->GetStopOnError(); -} - -void SBCommandInterpreterRunOptions::SetStopOnError(bool stop_on_error) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnError, - (bool), stop_on_error); - - m_opaque_up->SetStopOnError(stop_on_error); -} - -bool SBCommandInterpreterRunOptions::GetStopOnCrash() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetStopOnCrash); - - return m_opaque_up->GetStopOnCrash(); -} - -void SBCommandInterpreterRunOptions::SetStopOnCrash(bool stop_on_crash) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnCrash, - (bool), stop_on_crash); - - m_opaque_up->SetStopOnCrash(stop_on_crash); -} - -bool SBCommandInterpreterRunOptions::GetEchoCommands() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetEchoCommands); - - return m_opaque_up->GetEchoCommands(); -} - -void SBCommandInterpreterRunOptions::SetEchoCommands(bool echo_commands) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetEchoCommands, - (bool), echo_commands); - - m_opaque_up->SetEchoCommands(echo_commands); -} - -bool SBCommandInterpreterRunOptions::GetEchoCommentCommands() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetEchoCommentCommands); - - return m_opaque_up->GetEchoCommentCommands(); -} - -void SBCommandInterpreterRunOptions::SetEchoCommentCommands(bool echo) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, - SetEchoCommentCommands, (bool), echo); - - m_opaque_up->SetEchoCommentCommands(echo); -} - -bool SBCommandInterpreterRunOptions::GetPrintResults() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetPrintResults); - - return m_opaque_up->GetPrintResults(); -} - -void SBCommandInterpreterRunOptions::SetPrintResults(bool print_results) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetPrintResults, - (bool), print_results); - - m_opaque_up->SetPrintResults(print_results); -} - -bool SBCommandInterpreterRunOptions::GetAddToHistory() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetAddToHistory); - - return m_opaque_up->GetAddToHistory(); -} - -void SBCommandInterpreterRunOptions::SetAddToHistory(bool add_to_history) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetAddToHistory, - (bool), add_to_history); - - m_opaque_up->SetAddToHistory(add_to_history); -} - -lldb_private::CommandInterpreterRunOptions * -SBCommandInterpreterRunOptions::get() const { - return m_opaque_up.get(); -} - -lldb_private::CommandInterpreterRunOptions & -SBCommandInterpreterRunOptions::ref() const { - return *m_opaque_up; -} - class CommandPluginInterfaceImplementation : public CommandObjectParsed { public: CommandPluginInterfaceImplementation(CommandInterpreter &interpreter, @@ -861,37 +746,7 @@ void SBCommand::SetFlags(uint32_t flags) { namespace lldb_private { namespace repro { -template <> -void RegisterMethods(Registry &R) { - LLDB_REGISTER_CONSTRUCTOR(SBCommandInterpreterRunOptions, ()); - LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, - GetStopOnContinue, ()); - LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, - SetStopOnContinue, (bool)); - LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, - GetStopOnError, ()); - LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnError, - (bool)); - LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, - GetStopOnCrash, ()); - LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnCrash, - (bool)); - LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, - GetEchoCommands, ()); - LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetEchoCommands, - (bool)); - LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, - GetEchoCommentCommands, ()); - LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, - SetEchoCommentCommands, (bool)); - LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, - GetPrintResults, ()); - LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetPrintResults, - (bool)); - LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, - GetAddToHistory, ()); - LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetAddToHistory, - (bool)); +template <> void RegisterMethods(Registry &R) { LLDB_REGISTER_CONSTRUCTOR(SBCommandInterpreter, (lldb_private::CommandInterpreter *)); LLDB_REGISTER_CONSTRUCTOR(SBCommandInterpreter, @@ -1000,6 +855,5 @@ void RegisterMethods(Registry &R) { LLDB_REGISTER_METHOD(uint32_t, SBCommand, GetFlags, ()); LLDB_REGISTER_METHOD(void, SBCommand, SetFlags, (uint32_t)); } - } } diff --git a/lldb/source/API/SBCommandInterpreterRunOptions.cpp b/lldb/source/API/SBCommandInterpreterRunOptions.cpp new file mode 100644 index 0000000000000..7f880dc4605b2 --- /dev/null +++ b/lldb/source/API/SBCommandInterpreterRunOptions.cpp @@ -0,0 +1,272 @@ +//===-- SBCommandInterpreterRunOptions.cpp --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/lldb-types.h" + +#include "SBReproducerPrivate.h" + +#include "lldb/API/SBCommandInterpreterRunOptions.h" +#include "lldb/Interpreter/CommandInterpreter.h" + +#include + +using namespace lldb; +using namespace lldb_private; + +SBCommandInterpreterRunOptions::SBCommandInterpreterRunOptions() { + LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommandInterpreterRunOptions); + + m_opaque_up.reset(new CommandInterpreterRunOptions()); +} + +SBCommandInterpreterRunOptions::~SBCommandInterpreterRunOptions() = default; + +bool SBCommandInterpreterRunOptions::GetStopOnContinue() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, + GetStopOnContinue); + + return m_opaque_up->GetStopOnContinue(); +} + +void SBCommandInterpreterRunOptions::SetStopOnContinue(bool stop_on_continue) { + LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnContinue, + (bool), stop_on_continue); + + m_opaque_up->SetStopOnContinue(stop_on_continue); +} + +bool SBCommandInterpreterRunOptions::GetStopOnError() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, + GetStopOnError); + + return m_opaque_up->GetStopOnError(); +} + +void SBCommandInterpreterRunOptions::SetStopOnError(bool stop_on_error) { + LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnError, + (bool), stop_on_error); + + m_opaque_up->SetStopOnError(stop_on_error); +} + +bool SBCommandInterpreterRunOptions::GetStopOnCrash() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, + GetStopOnCrash); + + return m_opaque_up->GetStopOnCrash(); +} + +void SBCommandInterpreterRunOptions::SetStopOnCrash(bool stop_on_crash) { + LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnCrash, + (bool), stop_on_crash); + + m_opaque_up->SetStopOnCrash(stop_on_crash); +} + +bool SBCommandInterpreterRunOptions::GetEchoCommands() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, + GetEchoCommands); + + return m_opaque_up->GetEchoCommands(); +} + +void SBCommandInterpreterRunOptions::SetEchoCommands(bool echo_commands) { + LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetEchoCommands, + (bool), echo_commands); + + m_opaque_up->SetEchoCommands(echo_commands); +} + +bool SBCommandInterpreterRunOptions::GetEchoCommentCommands() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, + GetEchoCommentCommands); + + return m_opaque_up->GetEchoCommentCommands(); +} + +void SBCommandInterpreterRunOptions::SetEchoCommentCommands(bool echo) { + LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, + SetEchoCommentCommands, (bool), echo); + + m_opaque_up->SetEchoCommentCommands(echo); +} + +bool SBCommandInterpreterRunOptions::GetPrintResults() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, + GetPrintResults); + + return m_opaque_up->GetPrintResults(); +} + +void SBCommandInterpreterRunOptions::SetPrintResults(bool print_results) { + LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetPrintResults, + (bool), print_results); + + m_opaque_up->SetPrintResults(print_results); +} + +bool SBCommandInterpreterRunOptions::GetAddToHistory() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, + GetAddToHistory); + + return m_opaque_up->GetAddToHistory(); +} + +void SBCommandInterpreterRunOptions::SetAddToHistory(bool add_to_history) { + LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetAddToHistory, + (bool), add_to_history); + + m_opaque_up->SetAddToHistory(add_to_history); +} + +bool SBCommandInterpreterRunOptions::GetAutoHandleEvents() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, + GetAutoHandleEvents); + + return m_opaque_up->GetAutoHandleEvents(); +} + +void SBCommandInterpreterRunOptions::SetAutoHandleEvents( + bool auto_handle_events) { + LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetAutoHandleEvents, + (bool), auto_handle_events); + + m_opaque_up->SetAutoHandleEvents(auto_handle_events); +} + +bool SBCommandInterpreterRunOptions::GetSpawnThread() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, + GetSpawnThread); + + return m_opaque_up->GetSpawnThread(); +} + +void SBCommandInterpreterRunOptions::SetSpawnThread(bool spawn_thread) { + LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetSpawnThread, + (bool), spawn_thread); + + m_opaque_up->SetSpawnThread(spawn_thread); +} + +lldb_private::CommandInterpreterRunOptions * +SBCommandInterpreterRunOptions::get() const { + return m_opaque_up.get(); +} + +lldb_private::CommandInterpreterRunOptions & +SBCommandInterpreterRunOptions::ref() const { + return *m_opaque_up; +} + +SBCommandInterpreterRunResult::SBCommandInterpreterRunResult() + : m_opaque_up(new CommandInterpreterRunResult()) + +{ + LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommandInterpreterRunResult); +} + +SBCommandInterpreterRunResult::SBCommandInterpreterRunResult( + const SBCommandInterpreterRunResult &rhs) + : m_opaque_up(new CommandInterpreterRunResult()) { + LLDB_RECORD_CONSTRUCTOR(SBCommandInterpreterRunResult, + (const lldb::SBCommandInterpreterRunResult &), rhs); + + *m_opaque_up = *rhs.m_opaque_up; +} + +SBCommandInterpreterRunResult::SBCommandInterpreterRunResult( + const CommandInterpreterRunResult &rhs) + : m_opaque_up() { + m_opaque_up.reset(new CommandInterpreterRunResult(rhs)); +} + +SBCommandInterpreterRunResult::~SBCommandInterpreterRunResult() = default; + +SBCommandInterpreterRunResult &SBCommandInterpreterRunResult::operator=( + const SBCommandInterpreterRunResult &rhs) { + LLDB_RECORD_METHOD(lldb::SBCommandInterpreterRunResult &, + SBCommandInterpreterRunResult, + operator=,(const lldb::SBCommandInterpreterRunResult &), + rhs); + + if (this == &rhs) + return *this; + *m_opaque_up = *rhs.m_opaque_up; + return LLDB_RECORD_RESULT(*this); +} + +int SBCommandInterpreterRunResult::GetNumberOfErrors() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(int, SBCommandInterpreterRunResult, + GetNumberOfErrors); + + return m_opaque_up->GetNumErrors(); +} + +lldb::CommandInterpreterResult +SBCommandInterpreterRunResult::GetResult() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::CommandInterpreterResult, + SBCommandInterpreterRunResult, GetResult); + + return m_opaque_up->GetResult(); +} + +namespace lldb_private { +namespace repro { + +template <> void RegisterMethods(Registry &R) { + LLDB_REGISTER_CONSTRUCTOR(SBCommandInterpreterRunOptions, ()); + LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, + GetStopOnContinue, ()); + LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnContinue, + (bool)); + LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, + GetStopOnError, ()); + LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnError, + (bool)); + LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, + GetStopOnCrash, ()); + LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnCrash, + (bool)); + LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, + GetEchoCommands, ()); + LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetEchoCommands, + (bool)); + LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, + GetEchoCommentCommands, ()); + LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, + SetEchoCommentCommands, (bool)); + LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, + GetPrintResults, ()); + LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetPrintResults, + (bool)); + LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, + GetAddToHistory, ()); + LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetAddToHistory, + (bool)); + LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, + GetAutoHandleEvents, ()); + LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, + SetAutoHandleEvents, (bool)); + LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, + GetSpawnThread, ()); + LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetSpawnThread, + (bool)); + LLDB_REGISTER_CONSTRUCTOR(SBCommandInterpreterRunResult, ()); + LLDB_REGISTER_CONSTRUCTOR(SBCommandInterpreterRunResult, + (const lldb::SBCommandInterpreterRunResult &)); + LLDB_REGISTER_METHOD(lldb::SBCommandInterpreterRunResult &, + SBCommandInterpreterRunResult, + operator=,(const lldb::SBCommandInterpreterRunResult &)); + LLDB_REGISTER_METHOD_CONST(int, SBCommandInterpreterRunResult, + GetNumberOfErrors, ()); + LLDB_REGISTER_METHOD_CONST(lldb::CommandInterpreterResult, + SBCommandInterpreterRunResult, GetResult, ()); +} + +} // namespace repro +} // namespace lldb_private diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp index bd5e256de2ac5..978d0befa5ff5 100644 --- a/lldb/source/API/SBDebugger.cpp +++ b/lldb/source/API/SBDebugger.cpp @@ -15,6 +15,7 @@ #include "lldb/API/SBBroadcaster.h" #include "lldb/API/SBCommandInterpreter.h" +#include "lldb/API/SBCommandInterpreterRunOptions.h" #include "lldb/API/SBCommandReturnObject.h" #include "lldb/API/SBError.h" #include "lldb/API/SBEvent.h" @@ -1166,9 +1167,9 @@ void SBDebugger::RunCommandInterpreter(bool auto_handle_events, if (m_opaque_sp) { CommandInterpreterRunOptions options; - - m_opaque_sp->GetCommandInterpreter().RunCommandInterpreter( - auto_handle_events, spawn_thread, options); + options.SetAutoHandleEvents(auto_handle_events); + options.SetSpawnThread(spawn_thread); + m_opaque_sp->GetCommandInterpreter().RunCommandInterpreter(options); } } @@ -1186,15 +1187,35 @@ void SBDebugger::RunCommandInterpreter(bool auto_handle_events, quit_requested, stopped_for_crash); if (m_opaque_sp) { + options.SetAutoHandleEvents(auto_handle_events); + options.SetSpawnThread(spawn_thread); CommandInterpreter &interp = m_opaque_sp->GetCommandInterpreter(); - interp.RunCommandInterpreter(auto_handle_events, spawn_thread, - options.ref()); - num_errors = interp.GetNumErrors(); - quit_requested = interp.GetQuitRequested(); - stopped_for_crash = interp.GetStoppedForCrash(); + CommandInterpreterRunResult result = + interp.RunCommandInterpreter(options.ref()); + num_errors = result.GetNumErrors(); + quit_requested = + result.IsResult(lldb::eCommandInterpreterResultQuitRequested); + stopped_for_crash = + result.IsResult(lldb::eCommandInterpreterResultInferiorCrash); } } +SBCommandInterpreterRunResult SBDebugger::RunCommandInterpreter( + const SBCommandInterpreterRunOptions &options) { + LLDB_RECORD_METHOD(lldb::SBCommandInterpreterRunResult, SBDebugger, + RunCommandInterpreter, + (const lldb::SBCommandInterpreterRunOptions &), options); + + if (!m_opaque_sp) + return LLDB_RECORD_RESULT(SBCommandInterpreterRunResult()); + + CommandInterpreter &interp = m_opaque_sp->GetCommandInterpreter(); + CommandInterpreterRunResult result = + interp.RunCommandInterpreter(options.ref()); + + return LLDB_RECORD_RESULT(SBCommandInterpreterRunResult(result)); +} + SBError SBDebugger::RunREPL(lldb::LanguageType language, const char *repl_options) { LLDB_RECORD_METHOD(lldb::SBError, SBDebugger, RunREPL, @@ -1821,6 +1842,9 @@ template <> void RegisterMethods(Registry &R) { (lldb::SBTypeNameSpecifier)); LLDB_REGISTER_METHOD(bool, SBDebugger, EnableLog, (const char *, const char **)); + LLDB_REGISTER_METHOD(lldb::SBCommandInterpreterRunResult, SBDebugger, + RunCommandInterpreter, + (const lldb::SBCommandInterpreterRunOptions &)); } } // namespace repro diff --git a/lldb/source/API/SBReproducer.cpp b/lldb/source/API/SBReproducer.cpp index 56c586b6d2396..329c1b55d16db 100644 --- a/lldb/source/API/SBReproducer.cpp +++ b/lldb/source/API/SBReproducer.cpp @@ -15,6 +15,7 @@ #include "lldb/API/SBBlock.h" #include "lldb/API/SBBreakpoint.h" #include "lldb/API/SBCommandInterpreter.h" +#include "lldb/API/SBCommandInterpreterRunOptions.h" #include "lldb/API/SBData.h" #include "lldb/API/SBDebugger.h" #include "lldb/API/SBDeclaration.h" @@ -40,6 +41,7 @@ SBRegistry::SBRegistry() { RegisterMethods(R); RegisterMethods(R); RegisterMethods(R); + RegisterMethods(R); RegisterMethods(R); RegisterMethods(R); RegisterMethods(R); diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index decdd9c53de2b..8db0eef311636 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -271,15 +271,13 @@ class CommandObjectTargetCreate : public CommandObjectParsed { FileSpec remote_file(m_remote_file.GetOptionValue().GetCurrentValue()); if (core_file) { - if (!FileSystem::Instance().Exists(core_file)) { - result.AppendErrorWithFormat("core file '%s' doesn't exist", - core_file.GetPath().c_str()); - result.SetStatus(eReturnStatusFailed); - return false; - } - if (!FileSystem::Instance().Readable(core_file)) { - result.AppendErrorWithFormat("core file '%s' is not readable", - core_file.GetPath().c_str()); + auto file = FileSystem::Instance().Open( + core_file, lldb_private::File::eOpenOptionRead); + + if (!file) { + result.AppendErrorWithFormatv("Cannot open '{0}': {1}.", + core_file.GetPath(), + llvm::toString(file.takeError())); result.SetStatus(eReturnStatusFailed); return false; } @@ -288,18 +286,13 @@ class CommandObjectTargetCreate : public CommandObjectParsed { if (argc == 1 || core_file || remote_file) { FileSpec symfile(m_symbol_file.GetOptionValue().GetCurrentValue()); if (symfile) { - if (FileSystem::Instance().Exists(symfile)) { - if (!FileSystem::Instance().Readable(symfile)) { - result.AppendErrorWithFormat("symbol file '%s' is not readable", - symfile.GetPath().c_str()); - result.SetStatus(eReturnStatusFailed); - return false; - } - } else { - char symfile_path[PATH_MAX]; - symfile.GetPath(symfile_path, sizeof(symfile_path)); - result.AppendErrorWithFormat("invalid symbol file path '%s'", - symfile_path); + auto file = FileSystem::Instance().Open( + symfile, lldb_private::File::eOpenOptionRead); + + if (!file) { + result.AppendErrorWithFormatv("Cannot open '{0}': {1}.", + symfile.GetPath(), + llvm::toString(file.takeError())); result.SetStatus(eReturnStatusFailed); return false; } @@ -401,48 +394,34 @@ class CommandObjectTargetCreate : public CommandObjectParsed { if (module_sp) module_sp->SetPlatformFileSpec(remote_file); } + if (core_file) { - char core_path[PATH_MAX]; - core_file.GetPath(core_path, sizeof(core_path)); - if (FileSystem::Instance().Exists(core_file)) { - if (!FileSystem::Instance().Readable(core_file)) { - result.AppendMessageWithFormat( - "Core file '%s' is not readable.\n", core_path); - result.SetStatus(eReturnStatusFailed); - return false; - } - FileSpec core_file_dir; - core_file_dir.GetDirectory() = core_file.GetDirectory(); - target_sp->AppendExecutableSearchPaths(core_file_dir); + FileSpec core_file_dir; + core_file_dir.GetDirectory() = core_file.GetDirectory(); + target_sp->AppendExecutableSearchPaths(core_file_dir); - ProcessSP process_sp(target_sp->CreateProcess( - GetDebugger().GetListener(), llvm::StringRef(), &core_file)); + ProcessSP process_sp(target_sp->CreateProcess( + GetDebugger().GetListener(), llvm::StringRef(), &core_file)); - if (process_sp) { - // Seems weird that we Launch a core file, but that is what we - // do! - error = process_sp->LoadCore(); + if (process_sp) { + // Seems weird that we Launch a core file, but that is what we + // do! + error = process_sp->LoadCore(); - if (error.Fail()) { - result.AppendError( - error.AsCString("can't find plug-in for core file")); - result.SetStatus(eReturnStatusFailed); - return false; - } else { - result.AppendMessageWithFormat( - "Core file '%s' (%s) was loaded.\n", core_path, - target_sp->GetArchitecture().GetArchitectureName()); - result.SetStatus(eReturnStatusSuccessFinishNoResult); - } - } else { - result.AppendErrorWithFormat( - "Unable to find process plug-in for core file '%s'\n", - core_path); + if (error.Fail()) { + result.AppendError( + error.AsCString("can't find plug-in for core file")); result.SetStatus(eReturnStatusFailed); + return false; + } else { + result.AppendMessageWithFormatv("Core file '{0}' ({1}) was loaded.\n", core_file.GetPath(), + target_sp->GetArchitecture().GetArchitectureName()); + result.SetStatus(eReturnStatusSuccessFinishNoResult); } } else { - result.AppendErrorWithFormat("Core file '%s' does not exist\n", - core_path); + result.AppendErrorWithFormatv( + "Unable to find process plug-in for core file '{0}'\n", + core_file.GetPath()); result.SetStatus(eReturnStatusFailed); } } else { diff --git a/lldb/source/DataFormatters/StringPrinter.cpp b/lldb/source/DataFormatters/StringPrinter.cpp index 4515b67b2adfd..53dbc8d76a994 100644 --- a/lldb/source/DataFormatters/StringPrinter.cpp +++ b/lldb/source/DataFormatters/StringPrinter.cpp @@ -24,15 +24,48 @@ using namespace lldb; using namespace lldb_private; using namespace lldb_private::formatters; +using GetPrintableElementType = StringPrinter::GetPrintableElementType; +using StringElementType = StringPrinter::StringElementType; + +/// DecodedCharBuffer stores the decoded contents of a single character. It +/// avoids managing memory on the heap by copying decoded bytes into an in-line +/// buffer. +struct DecodedCharBuffer { + static constexpr unsigned MaxLength = 16; + +public: + DecodedCharBuffer(std::nullptr_t) {} + + DecodedCharBuffer(const uint8_t *bytes, size_t size) : m_size(size) { + if (size > MaxLength) + llvm_unreachable("unsupported length"); + memcpy(m_data, bytes, size); + } + + DecodedCharBuffer(const char *bytes, size_t size) + : DecodedCharBuffer(reinterpret_cast(bytes), size) {} + + const uint8_t *GetBytes() const { return m_data; } + + size_t GetSize() const { return m_size; } + +private: + size_t m_size = 0; + uint8_t m_data[MaxLength] = {0}; +}; + +using EscapingHelper = + std::function; // we define this for all values of type but only implement it for those we // care about that's good because we get linker errors for any unsupported type -template -static StringPrinter::StringPrinterBufferPointer -GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next); +template +static DecodedCharBuffer +GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, + StringPrinter::EscapeStyle escape_style); -// mimic isprint() for Unicode codepoints -static bool isprint(char32_t codepoint) { +// Mimic isprint() for Unicode codepoints. +static bool isprint32(char32_t codepoint) { if (codepoint <= 0x1F || codepoint == 0x7F) // C0 { return false; @@ -59,57 +92,73 @@ static bool isprint(char32_t codepoint) { return true; } -template <> -StringPrinter::StringPrinterBufferPointer -GetPrintableImpl(uint8_t *buffer, - uint8_t *buffer_end, - uint8_t *&next) { - StringPrinter::StringPrinterBufferPointer retval = {nullptr}; - - switch (*buffer) { +DecodedCharBuffer attemptASCIIEscape(char32_t c, + StringPrinter::EscapeStyle escape_style) { + const bool is_swift_escape_style = + escape_style == StringPrinter::EscapeStyle::Swift; + switch (c) { case 0: - retval = {"\\0", 2}; - break; + return {"\\0", 2}; case '\a': - retval = {"\\a", 2}; - break; + return {"\\a", 2}; case '\b': - retval = {"\\b", 2}; - break; + if (is_swift_escape_style) + return nullptr; + return {"\\b", 2}; case '\f': - retval = {"\\f", 2}; - break; + if (is_swift_escape_style) + return nullptr; + return {"\\f", 2}; case '\n': - retval = {"\\n", 2}; - break; + return {"\\n", 2}; case '\r': - retval = {"\\r", 2}; - break; + return {"\\r", 2}; case '\t': - retval = {"\\t", 2}; - break; + return {"\\t", 2}; case '\v': - retval = {"\\v", 2}; - break; + if (is_swift_escape_style) + return nullptr; + return {"\\v", 2}; case '\"': - retval = {"\\\"", 2}; - break; + return {"\\\"", 2}; + case '\'': + if (is_swift_escape_style) + return {"\\'", 2}; + return nullptr; case '\\': - retval = {"\\\\", 2}; - break; - default: - if (isprint(*buffer)) - retval = {buffer, 1}; - else { - uint8_t *data = new uint8_t[5]; - sprintf((char *)data, "\\x%02x", *buffer); - retval = {data, 4, [](const uint8_t *c) { delete[] c; }}; - break; - } + return {"\\\\", 2}; } + return nullptr; +} +template <> +DecodedCharBuffer GetPrintableImpl( + uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, + StringPrinter::EscapeStyle escape_style) { + // The ASCII helper always advances 1 byte at a time. next = buffer + 1; - return retval; + + DecodedCharBuffer retval = attemptASCIIEscape(*buffer, escape_style); + if (retval.GetSize()) + return retval; + if (isprint(*buffer)) + return {buffer, 1}; + + unsigned escaped_len; + constexpr unsigned max_buffer_size = 7; + uint8_t data[max_buffer_size]; + switch (escape_style) { + case StringPrinter::EscapeStyle::CXX: + // Prints 4 characters, then a \0 terminator. + escaped_len = sprintf((char *)data, "\\x%02x", *buffer); + break; + case StringPrinter::EscapeStyle::Swift: + // Prints up to 6 characters, then a \0 terminator. + escaped_len = sprintf((char *)data, "\\u{%x}", *buffer); + break; + } + lldbassert(escaped_len > 0 && "unknown string escape style"); + return {data, escaped_len}; } static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) { @@ -125,28 +174,25 @@ static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1, } template <> -StringPrinter::StringPrinterBufferPointer -GetPrintableImpl(uint8_t *buffer, - uint8_t *buffer_end, - uint8_t *&next) { - StringPrinter::StringPrinterBufferPointer retval{nullptr}; - +DecodedCharBuffer GetPrintableImpl( + uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next, + StringPrinter::EscapeStyle escape_style) { const unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); // If the utf8 encoded length is invalid, or if there aren't enough bytes to // print, this is some kind of corrupted string. if (utf8_encoded_len == 0 || utf8_encoded_len > 4) - return retval; + return nullptr; if ((buffer_end - buffer) < utf8_encoded_len) // There's no room in the buffer for the utf8 sequence. - return retval; + return nullptr; char32_t codepoint = 0; switch (utf8_encoded_len) { case 1: // this is just an ASCII byte - ask ASCII - return GetPrintableImpl( - buffer, buffer_end, next); + return GetPrintableImpl(buffer, buffer_end, next, + escape_style); case 2: codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer, (unsigned char)*(buffer + 1)); @@ -163,105 +209,84 @@ GetPrintableImpl(uint8_t *buffer, break; } - if (codepoint) { - switch (codepoint) { - case 0: - retval = {"\\0", 2}; - break; - case '\a': - retval = {"\\a", 2}; - break; - case '\b': - retval = {"\\b", 2}; - break; - case '\f': - retval = {"\\f", 2}; - break; - case '\n': - retval = {"\\n", 2}; - break; - case '\r': - retval = {"\\r", 2}; - break; - case '\t': - retval = {"\\t", 2}; - break; - case '\v': - retval = {"\\v", 2}; - break; - case '\"': - retval = {"\\\"", 2}; - break; - case '\\': - retval = {"\\\\", 2}; - break; - default: - if (isprint(codepoint)) - retval = {buffer, utf8_encoded_len}; - else { - uint8_t *data = new uint8_t[11]; - sprintf((char *)data, "\\U%08x", (unsigned)codepoint); - retval = {data, 10, [](const uint8_t *c) { delete[] c; }}; - break; - } - } + // We couldn't figure out how to print this codepoint. + if (!codepoint) + return nullptr; - next = buffer + utf8_encoded_len; + // The UTF8 helper always advances by the utf8 encoded length. + next = buffer + utf8_encoded_len; + DecodedCharBuffer retval = attemptASCIIEscape(codepoint, escape_style); + if (retval.GetSize()) return retval; + if (isprint32(codepoint)) + return {buffer, utf8_encoded_len}; + + unsigned escaped_len; + constexpr unsigned max_buffer_size = 13; + uint8_t data[max_buffer_size]; + switch (escape_style) { + case StringPrinter::EscapeStyle::CXX: + // Prints 10 characters, then a \0 terminator. + escaped_len = sprintf((char *)data, "\\U%08x", (unsigned)codepoint); + break; + case StringPrinter::EscapeStyle::Swift: + // Prints up to 12 characters, then a \0 terminator. + escaped_len = sprintf((char *)data, "\\u{%x}", (unsigned)codepoint); + break; } - - // We couldn't figure out how to print this string. - return retval; + lldbassert(escaped_len > 0 && "unknown string escape style"); + return {data, escaped_len}; } // Given a sequence of bytes, this function returns: a sequence of bytes to // actually print out + a length the following unscanned position of the buffer // is in next -static StringPrinter::StringPrinterBufferPointer -GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer, - uint8_t *buffer_end, uint8_t *&next) { +static DecodedCharBuffer GetPrintable(StringElementType type, uint8_t *buffer, + uint8_t *buffer_end, uint8_t *&next, + StringPrinter::EscapeStyle escape_style) { if (!buffer || buffer >= buffer_end) return {nullptr}; switch (type) { - case StringPrinter::StringElementType::ASCII: - return GetPrintableImpl( - buffer, buffer_end, next); - case StringPrinter::StringElementType::UTF8: - return GetPrintableImpl( - buffer, buffer_end, next); + case StringElementType::ASCII: + return GetPrintableImpl(buffer, buffer_end, next, + escape_style); + case StringElementType::UTF8: + return GetPrintableImpl(buffer, buffer_end, next, + escape_style); default: return {nullptr}; } } -StringPrinter::EscapingHelper -StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) { +static EscapingHelper +GetDefaultEscapingHelper(GetPrintableElementType elem_type, + StringPrinter::EscapeStyle escape_style) { switch (elem_type) { case GetPrintableElementType::UTF8: - return [](uint8_t *buffer, uint8_t *buffer_end, - uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer { - return GetPrintable(StringPrinter::StringElementType::UTF8, buffer, - buffer_end, next); - }; case GetPrintableElementType::ASCII: - return [](uint8_t *buffer, uint8_t *buffer_end, - uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer { - return GetPrintable(StringPrinter::StringElementType::ASCII, buffer, - buffer_end, next); + return [escape_style, elem_type](uint8_t *buffer, uint8_t *buffer_end, + uint8_t *&next) -> DecodedCharBuffer { + return GetPrintable(elem_type == GetPrintableElementType::UTF8 + ? StringElementType::UTF8 + : StringElementType::ASCII, + buffer, buffer_end, next, escape_style); }; } llvm_unreachable("bad element type"); } -// use this call if you already have an LLDB-side buffer for the data +/// Read a string encoded in accordance with \tparam SourceDataType from a +/// host-side LLDB buffer, then pretty-print it to a stream using \p style. template -static bool DumpUTFBufferToStream( +static bool DumpEncodedBufferToStream( + GetPrintableElementType style, llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, const SourceDataType *, llvm::UTF8 **, llvm::UTF8 *, llvm::ConversionFlags), const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) { + assert(dump_options.GetStream() && "need a Stream to print the string to"); Stream &stream(*dump_options.GetStream()); if (dump_options.GetPrefixToken() != nullptr) stream.Printf("%s", dump_options.GetPrefixToken()); @@ -321,18 +346,10 @@ static bool DumpUTFBufferToStream( } const bool escape_non_printables = dump_options.GetEscapeNonPrintables(); - lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; - if (escape_non_printables) { - if (Language *language = Language::FindPlugin(dump_options.GetLanguage())) - escaping_callback = language->GetStringPrinterEscapingHelper( - lldb_private::formatters::StringPrinter::GetPrintableElementType:: - UTF8); - else - escaping_callback = - lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( - lldb_private::formatters::StringPrinter:: - GetPrintableElementType::UTF8); - } + EscapingHelper escaping_callback; + if (escape_non_printables) + escaping_callback = + GetDefaultEscapingHelper(style, dump_options.GetEscapeStyle()); // since we tend to accept partial data (and even partially malformed data) // we might end up with no NULL terminator before the end_ptr hence we need @@ -394,144 +411,58 @@ lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions:: SetQuote(options.GetQuote()); SetEscapeNonPrintables(options.GetEscapeNonPrintables()); SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator()); - SetLanguage(options.GetLanguage()); + SetEscapeStyle(options.GetEscapeStyle()); } namespace lldb_private { namespace formatters { -template <> -bool StringPrinter::ReadStringAndDumpToStream< - StringPrinter::StringElementType::ASCII>( - const ReadStringAndDumpToStreamOptions &options) { - assert(options.GetStream() && "need a Stream to print the string to"); - Status my_error; - - ProcessSP process_sp(options.GetProcessSP()); - - if (process_sp.get() == nullptr || options.GetLocation() == 0) - return false; - - size_t size; - const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); - bool is_truncated = false; - - if (options.GetSourceSize() == 0) - size = max_size; - else if (!options.GetIgnoreMaxLength()) { - size = options.GetSourceSize(); - if (size > max_size) { - size = max_size; - is_truncated = true; - } - } else - size = options.GetSourceSize(); - - lldb::DataBufferSP buffer_sp(new DataBufferHeap(size, 0)); - - process_sp->ReadCStringFromMemory( - options.GetLocation(), (char *)buffer_sp->GetBytes(), size, my_error); - - if (my_error.Fail()) - return false; - - const char *prefix_token = options.GetPrefixToken(); - char quote = options.GetQuote(); - - if (prefix_token != nullptr) - options.GetStream()->Printf("%s%c", prefix_token, quote); - else if (quote != 0) - options.GetStream()->Printf("%c", quote); - - uint8_t *data_end = buffer_sp->GetBytes() + buffer_sp->GetByteSize(); - - const bool escape_non_printables = options.GetEscapeNonPrintables(); - lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback; - if (escape_non_printables) { - if (Language *language = Language::FindPlugin(options.GetLanguage())) - escaping_callback = language->GetStringPrinterEscapingHelper( - lldb_private::formatters::StringPrinter::GetPrintableElementType:: - ASCII); - else - escaping_callback = - lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper( - lldb_private::formatters::StringPrinter::GetPrintableElementType:: - ASCII); - } - - // since we tend to accept partial data (and even partially malformed data) - // we might end up with no NULL terminator before the end_ptr hence we need - // to take a slower route and ensure we stay within boundaries - for (uint8_t *data = buffer_sp->GetBytes(); *data && (data < data_end);) { - if (escape_non_printables) { - uint8_t *next_data = nullptr; - auto printable = escaping_callback(data, data_end, next_data); - auto printable_bytes = printable.GetBytes(); - auto printable_size = printable.GetSize(); - - // We failed to figure out how to print this string. - if (!printable_bytes || !next_data) - return false; - - for (unsigned c = 0; c < printable_size; c++) - options.GetStream()->Printf("%c", *(printable_bytes + c)); - data = (uint8_t *)next_data; - } else { - options.GetStream()->Printf("%c", *data); - data++; - } - } - - const char *suffix_token = options.GetSuffixToken(); - - if (suffix_token != nullptr) - options.GetStream()->Printf("%c%s", quote, suffix_token); - else if (quote != 0) - options.GetStream()->Printf("%c", quote); - - if (is_truncated) - options.GetStream()->Printf("..."); - - return true; -} - template -static bool ReadUTFBufferAndDumpToStream( +static bool ReadEncodedBufferAndDumpToStream( + StringElementType elem_type, const StringPrinter::ReadStringAndDumpToStreamOptions &options, llvm::ConversionResult (*ConvertFunction)(const SourceDataType **, const SourceDataType *, llvm::UTF8 **, llvm::UTF8 *, llvm::ConversionFlags)) { assert(options.GetStream() && "need a Stream to print the string to"); + if (!options.GetStream()) + return false; if (options.GetLocation() == 0 || options.GetLocation() == LLDB_INVALID_ADDRESS) return false; lldb::ProcessSP process_sp(options.GetProcessSP()); - if (!process_sp) return false; - const int type_width = sizeof(SourceDataType); - const int origin_encoding = 8 * type_width; + constexpr int type_width = sizeof(SourceDataType); + constexpr int origin_encoding = 8 * type_width; if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32) return false; - // if not UTF8, I need a conversion function to return proper UTF8 + // If not UTF8 or ASCII, conversion to UTF8 is necessary. if (origin_encoding != 8 && !ConvertFunction) return false; - if (!options.GetStream()) - return false; - - uint32_t sourceSize; bool needs_zero_terminator = options.GetNeedsZeroTermination(); bool is_truncated = false; const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary(); - if (options.HasSourceSize()) { + uint32_t sourceSize; + if (elem_type == StringElementType::ASCII && !options.GetSourceSize()) { + // FIXME: The NSString formatter sets HasSourceSize(true) when the size is + // actually unknown, as well as SetBinaryZeroIsTerminator(false). IIUC the + // C++ formatter also sets SetBinaryZeroIsTerminator(false) when it doesn't + // mean to. I don't see how this makes sense: we should fix the formatters. + // + // Until then, the behavior that's expected for ASCII strings with unknown + // lengths is to read up to the max size and then null-terminate. Do that. + sourceSize = max_size; + needs_zero_terminator = true; + } else if (options.HasSourceSize()) { sourceSize = options.GetSourceSize(); if (!options.GetIgnoreMaxLength()) { if (sourceSize > max_size) { @@ -545,7 +476,6 @@ static bool ReadUTFBufferAndDumpToStream( } const int bufferSPSize = sourceSize * type_width; - lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0)); // Check if we got bytes. We never get any bytes if we have an empty @@ -557,14 +487,15 @@ static bool ReadUTFBufferAndDumpToStream( Status error; char *buffer = reinterpret_cast(buffer_sp->GetBytes()); - if (needs_zero_terminator) + if (elem_type == StringElementType::ASCII) + process_sp->ReadCStringFromMemory(options.GetLocation(), buffer, + bufferSPSize, error); + else if (needs_zero_terminator) process_sp->ReadStringFromMemory(options.GetLocation(), buffer, bufferSPSize, error, type_width); else - process_sp->ReadMemoryFromInferior(options.GetLocation(), - (char *)buffer_sp->GetBytes(), + process_sp->ReadMemoryFromInferior(options.GetLocation(), buffer, bufferSPSize, error); - if (error.Fail()) { options.GetStream()->Printf("unable to read data"); return true; @@ -577,67 +508,79 @@ static bool ReadUTFBufferAndDumpToStream( dump_options.SetData(data); dump_options.SetSourceSize(sourceSize); dump_options.SetIsTruncated(is_truncated); + dump_options.SetNeedsZeroTermination(needs_zero_terminator); + if (needs_zero_terminator) + dump_options.SetBinaryZeroIsTerminator(true); - return DumpUTFBufferToStream(ConvertFunction, dump_options); + GetPrintableElementType print_style = (elem_type == StringElementType::ASCII) + ? GetPrintableElementType::ASCII + : GetPrintableElementType::UTF8; + return DumpEncodedBufferToStream(print_style, ConvertFunction, dump_options); } template <> -bool StringPrinter::ReadStringAndDumpToStream< - StringPrinter::StringElementType::UTF8>( +bool StringPrinter::ReadStringAndDumpToStream( const ReadStringAndDumpToStreamOptions &options) { - return ReadUTFBufferAndDumpToStream(options, nullptr); + return ReadEncodedBufferAndDumpToStream(StringElementType::UTF8, + options, nullptr); } template <> -bool StringPrinter::ReadStringAndDumpToStream< - StringPrinter::StringElementType::UTF16>( +bool StringPrinter::ReadStringAndDumpToStream( const ReadStringAndDumpToStreamOptions &options) { - return ReadUTFBufferAndDumpToStream(options, - llvm::ConvertUTF16toUTF8); + return ReadEncodedBufferAndDumpToStream( + StringElementType::UTF16, options, llvm::ConvertUTF16toUTF8); } template <> -bool StringPrinter::ReadStringAndDumpToStream< - StringPrinter::StringElementType::UTF32>( +bool StringPrinter::ReadStringAndDumpToStream( const ReadStringAndDumpToStreamOptions &options) { - return ReadUTFBufferAndDumpToStream(options, - llvm::ConvertUTF32toUTF8); + return ReadEncodedBufferAndDumpToStream( + StringElementType::UTF32, options, llvm::ConvertUTF32toUTF8); } template <> -bool StringPrinter::ReadBufferAndDumpToStream< - StringPrinter::StringElementType::UTF8>( - const ReadBufferAndDumpToStreamOptions &options) { - assert(options.GetStream() && "need a Stream to print the string to"); - - return DumpUTFBufferToStream(nullptr, options); +bool StringPrinter::ReadStringAndDumpToStream( + const ReadStringAndDumpToStreamOptions &options) { + return ReadEncodedBufferAndDumpToStream(StringElementType::ASCII, + options, nullptr); } template <> -bool StringPrinter::ReadBufferAndDumpToStream< - StringPrinter::StringElementType::ASCII>( +bool StringPrinter::ReadBufferAndDumpToStream( const ReadBufferAndDumpToStreamOptions &options) { - // treat ASCII the same as UTF8 - // FIXME: can we optimize ASCII some more? - return ReadBufferAndDumpToStream(options); + return DumpEncodedBufferToStream(GetPrintableElementType::UTF8, + nullptr, options); } template <> -bool StringPrinter::ReadBufferAndDumpToStream< - StringPrinter::StringElementType::UTF16>( +bool StringPrinter::ReadBufferAndDumpToStream( const ReadBufferAndDumpToStreamOptions &options) { - assert(options.GetStream() && "need a Stream to print the string to"); - - return DumpUTFBufferToStream(llvm::ConvertUTF16toUTF8, options); + return DumpEncodedBufferToStream(GetPrintableElementType::UTF8, + llvm::ConvertUTF16toUTF8, options); } template <> -bool StringPrinter::ReadBufferAndDumpToStream< - StringPrinter::StringElementType::UTF32>( +bool StringPrinter::ReadBufferAndDumpToStream( const ReadBufferAndDumpToStreamOptions &options) { - assert(options.GetStream() && "need a Stream to print the string to"); + return DumpEncodedBufferToStream(GetPrintableElementType::UTF8, + llvm::ConvertUTF32toUTF8, options); +} - return DumpUTFBufferToStream(llvm::ConvertUTF32toUTF8, options); +template <> +bool StringPrinter::ReadBufferAndDumpToStream( + const ReadBufferAndDumpToStreamOptions &options) { + // Treat ASCII the same as UTF8. + // + // FIXME: This is probably not the right thing to do (well, it's debatable). + // If an ASCII-encoded string happens to contain a sequence of invalid bytes + // that forms a valid UTF8 character, we'll print out that character. This is + // good if you're playing fast and loose with encodings (probably good for + // std::string users), but maybe not so good if you care about your string + // formatter respecting the semantics of your selected string encoding. In + // the latter case you'd want to see the character byte sequence ('\x..'), not + // the UTF8 character itself. + return ReadBufferAndDumpToStream(options); } } // namespace formatters diff --git a/lldb/source/Expression/IRInterpreter.cpp b/lldb/source/Expression/IRInterpreter.cpp index ddb2d975d5541..6f61a4bd26bf0 100644 --- a/lldb/source/Expression/IRInterpreter.cpp +++ b/lldb/source/Expression/IRInterpreter.cpp @@ -1371,7 +1371,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function, // Find the address of the callee function lldb_private::Scalar I; - const llvm::Value *val = call_inst->getCalledValue(); + const llvm::Value *val = call_inst->getCalledOperand(); if (!frame.EvaluateValue(I, val, module)) { error.SetErrorToGenericError(); diff --git a/lldb/source/Host/common/Host.cpp b/lldb/source/Host/common/Host.cpp index 8a6af3881a0fd..a5705c92afecf 100644 --- a/lldb/source/Host/common/Host.cpp +++ b/lldb/source/Host/common/Host.cpp @@ -521,7 +521,7 @@ Status Host::RunShellCommand(const Args &args, const FileSpec &working_dir, } } - FileSpec output_file_spec(output_file_path.c_str()); + FileSpec output_file_spec(output_file_path.str()); // Set up file descriptors. launch_info.AppendSuppressFileAction(STDIN_FILENO, true, false); if (output_file_spec) diff --git a/lldb/source/Host/common/TCPSocket.cpp b/lldb/source/Host/common/TCPSocket.cpp index 821574e8d822f..047cb0e4c2bf5 100644 --- a/lldb/source/Host/common/TCPSocket.cpp +++ b/lldb/source/Host/common/TCPSocket.cpp @@ -18,6 +18,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/Errno.h" +#include "llvm/Support/WindowsError.h" #include "llvm/Support/raw_ostream.h" #if LLDB_ENABLE_POSIX @@ -42,6 +43,16 @@ typedef const void *set_socket_option_arg_type; using namespace lldb; using namespace lldb_private; +static Status GetLastSocketError() { + std::error_code EC; +#ifdef _WIN32 + EC = llvm::mapWindowsError(WSAGetLastError()); +#else + EC = std::error_code(errno, std::generic_category()); +#endif + return EC; +} + namespace { const int kType = SOCK_STREAM; } @@ -192,10 +203,8 @@ Status TCPSocket::Listen(llvm::StringRef name, int backlog) { for (SocketAddress &address : addresses) { int fd = Socket::CreateSocket(address.GetFamily(), kType, IPPROTO_TCP, m_child_processes_inherit, error); - if (error.Fail()) { - error.Clear(); + if (error.Fail()) continue; - } // enable local address reuse int option_value = 1; @@ -216,6 +225,7 @@ Status TCPSocket::Listen(llvm::StringRef name, int backlog) { err = ::listen(fd, backlog); if (-1 == err) { + error = GetLastSocketError(); CLOSE_SOCKET(fd); continue; } @@ -228,9 +238,11 @@ Status TCPSocket::Listen(llvm::StringRef name, int backlog) { m_listen_sockets[fd] = address; } - if (m_listen_sockets.size() == 0) - error.SetErrorString("Failed to connect port"); - return error; + if (m_listen_sockets.empty()) { + assert(error.Fail()); + return error; + } + return Status(); } void TCPSocket::CloseListenSockets() { diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 65b3cf535bfc4..2cc3d47406b7f 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -116,8 +116,7 @@ CommandInterpreter::CommandInterpreter(Debugger &debugger, m_skip_lldbinit_files(false), m_skip_app_init_files(false), m_command_io_handler_sp(), m_comment_char('#'), m_batch_command_mode(false), m_truncation_warning(eNoTruncation), - m_command_source_depth(0), m_num_errors(0), m_quit_requested(false), - m_stopped_for_crash(false) { + m_command_source_depth(0), m_result() { SetEventName(eBroadcastBitThreadShouldExit, "thread-should-exit"); SetEventName(eBroadcastBitResetPrompt, "reset-prompt"); SetEventName(eBroadcastBitQuitCommandReceived, "quit"); @@ -2816,23 +2815,26 @@ void CommandInterpreter::IOHandlerInputComplete(IOHandler &io_handler, break; case eReturnStatusFailed: - m_num_errors++; - if (io_handler.GetFlags().Test(eHandleCommandFlagStopOnError)) + m_result.IncrementNumberOfErrors(); + if (io_handler.GetFlags().Test(eHandleCommandFlagStopOnError)) { + m_result.SetResult(lldb::eCommandInterpreterResultCommandError); io_handler.SetIsDone(true); + } break; case eReturnStatusQuit: - m_quit_requested = true; + m_result.SetResult(lldb::eCommandInterpreterResultQuitRequested); io_handler.SetIsDone(true); break; } // Finally, if we're going to stop on crash, check that here: - if (!m_quit_requested && result.GetDidChangeProcessState() && + if (m_result.IsResult(lldb::eCommandInterpreterResultSuccess) && + result.GetDidChangeProcessState() && io_handler.GetFlags().Test(eHandleCommandFlagStopOnCrash) && DidProcessStopAbnormally()) { io_handler.SetIsDone(true); - m_stopped_for_crash = true; + m_result.SetResult(lldb::eCommandInterpreterResultInferiorCrash); } } @@ -2950,26 +2952,27 @@ CommandInterpreter::GetIOHandler(bool force_create, return m_command_io_handler_sp; } -void CommandInterpreter::RunCommandInterpreter( - bool auto_handle_events, bool spawn_thread, +CommandInterpreterRunResult CommandInterpreter::RunCommandInterpreter( CommandInterpreterRunOptions &options) { // Always re-create the command interpreter when we run it in case any file // handles have changed. bool force_create = true; m_debugger.RunIOHandlerAsync(GetIOHandler(force_create, &options)); - m_stopped_for_crash = false; + m_result = CommandInterpreterRunResult(); - if (auto_handle_events) + if (options.GetAutoHandleEvents()) m_debugger.StartEventHandlerThread(); - if (spawn_thread) { + if (options.GetSpawnThread()) { m_debugger.StartIOHandlerThread(); } else { m_debugger.RunIOHandlers(); - if (auto_handle_events) + if (options.GetAutoHandleEvents()) m_debugger.StopEventHandlerThread(); } + + return m_result; } CommandObject * diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp index d3ab51118c240..b92f00ec2b632 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp @@ -465,7 +465,7 @@ class ObjcObjectChecker : public Instrumenter { } static llvm::Function *GetCalledFunction(llvm::CallInst *inst) { - return GetFunction(inst->getCalledValue()); + return GetFunction(inst->getCalledOperand()); } bool InspectInstruction(llvm::Instruction &i) override { diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp index 92294ff9c7271..8819b8a773f59 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp @@ -1395,7 +1395,7 @@ bool IRForTarget::RemoveCXAAtExit(BasicBlock &basic_block) { if (func && func->getName() == "__cxa_atexit") remove = true; - llvm::Value *val = call->getCalledValue(); + llvm::Value *val = call->getCalledOperand(); if (val && val->getName() == "__cxa_atexit") remove = true; diff --git a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp index f584539094261..dcf41444e48f7 100644 --- a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp +++ b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp @@ -417,20 +417,12 @@ bool EmulateInstructionARM64::EvaluateInstruction(uint32_t evaluate_options) { if (opcode_data == nullptr) return false; - // printf ("opcode template for 0x%8.8x: %s\n", opcode, opcode_data->name); const bool auto_advance_pc = evaluate_options & eEmulateInstructionOptionAutoAdvancePC; m_ignore_conditions = evaluate_options & eEmulateInstructionOptionIgnoreConditions; bool success = false; - // if (m_opcode_cpsr == 0 || m_ignore_conditions == false) - // { - // m_opcode_cpsr = ReadRegisterUnsigned (eRegisterKindLLDB, - // gpr_cpsr_arm64, - // 0, - // &success); - // } // Only return false if we are unable to read the CPSR if we care about // conditions @@ -785,10 +777,6 @@ bool EmulateInstructionARM64::EmulateLDPSTP(const uint32_t opcode) { RegisterValue data_Rt; RegisterValue data_Rt2; - - // if (vector) - // CheckFPEnabled(false); - RegisterInfo reg_info_base; RegisterInfo reg_info_Rt; RegisterInfo reg_info_Rt2; diff --git a/lldb/source/Plugins/Language/ObjC/CFBasicHash.cpp b/lldb/source/Plugins/Language/ObjC/CFBasicHash.cpp new file mode 100644 index 0000000000000..42cda0146f2e0 --- /dev/null +++ b/lldb/source/Plugins/Language/ObjC/CFBasicHash.cpp @@ -0,0 +1,114 @@ +#include "CFBasicHash.h" + +#include "lldb/Utility/Endian.h" + +using namespace lldb; +using namespace lldb_private; + +bool CFBasicHash::IsValid() const { + if (m_address != LLDB_INVALID_ADDRESS) { + if (m_ptr_size == 4 && m_ht_32) + return true; + else if (m_ptr_size == 8 && m_ht_64) + return true; + else + return false; + } + return false; +} + +bool CFBasicHash::Update(addr_t addr, ExecutionContextRef exe_ctx_rf) { + if (addr == LLDB_INVALID_ADDRESS || !addr) + return false; + + m_address = addr; + m_exe_ctx_ref = exe_ctx_rf; + m_ptr_size = + m_exe_ctx_ref.GetTargetSP()->GetArchitecture().GetAddressByteSize(); + m_byte_order = m_exe_ctx_ref.GetTargetSP()->GetArchitecture().GetByteOrder(); + + if (m_ptr_size == 4) + return UpdateFor(m_ht_32); + else if (m_ptr_size == 8) + return UpdateFor(m_ht_64); + return false; + + llvm_unreachable( + "Unsupported architecture. Only 32bits and 64bits supported."); +} + +template +bool CFBasicHash::UpdateFor(std::unique_ptr<__CFBasicHash> &m_ht) { + if (m_byte_order != endian::InlHostByteOrder()) + return false; + + Status error; + Target *target = m_exe_ctx_ref.GetTargetSP().get(); + addr_t addr = m_address.GetLoadAddress(target); + size_t size = sizeof(typename __CFBasicHash::RuntimeBase) + + sizeof(typename __CFBasicHash::Bits); + + m_ht = std::make_unique<__CFBasicHash>(); + m_exe_ctx_ref.GetProcessSP()->ReadMemory(addr, m_ht.get(), + size, error); + if (error.Fail()) + return false; + + m_mutable = !(m_ht->base.cfinfoa & (1 << 6)); + m_multi = m_ht->bits.counts_offset; + m_type = static_cast(m_ht->bits.keys_offset); + addr_t ptr_offset = addr + size; + size_t ptr_count = GetPointerCount(); + size = ptr_count * sizeof(T); + + m_exe_ctx_ref.GetProcessSP()->ReadMemory(ptr_offset, m_ht->pointers, size, + error); + + if (error.Fail()) { + m_ht = nullptr; + return false; + } + + return true; +} + +size_t CFBasicHash::GetCount() const { + if (!IsValid()) + return 0; + + if (!m_multi) + return (m_ptr_size == 4) ? m_ht_32->bits.used_buckets + : m_ht_64->bits.used_buckets; + + // FIXME: Add support for multi + return 0; +} + +size_t CFBasicHash::GetPointerCount() const { + if (!IsValid()) + return 0; + + if (m_multi) + return 3; // Bits::counts_offset; + return (m_type == HashType::dict) + 1; +} + +addr_t CFBasicHash::GetKeyPointer() const { + if (!IsValid()) + return LLDB_INVALID_ADDRESS; + + if (m_ptr_size == 4) + return m_ht_32->pointers[m_ht_32->bits.keys_offset]; + + return m_ht_64->pointers[m_ht_64->bits.keys_offset]; +} + +addr_t CFBasicHash::GetValuePointer() const { + if (!IsValid()) + return LLDB_INVALID_ADDRESS; + + if (m_ptr_size == 4) + return m_ht_32->pointers[0]; + + return m_ht_64->pointers[0]; +} diff --git a/lldb/source/Plugins/Language/ObjC/CFBasicHash.h b/lldb/source/Plugins/Language/ObjC/CFBasicHash.h new file mode 100644 index 0000000000000..94c2a1ac8a5fa --- /dev/null +++ b/lldb/source/Plugins/Language/ObjC/CFBasicHash.h @@ -0,0 +1,77 @@ +//===-- CFBasicHash.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_OBJC_CFBASICHASH_H +#define LLDB_SOURCE_PLUGINS_LANGUAGE_OBJC_CFBASICHASH_H + +#include "lldb/Target/Process.h" +#include "lldb/Target/Target.h" + +namespace lldb_private { + +class CFBasicHash { +public: + enum class HashType { set = 0, dict }; + + CFBasicHash() = default; + ~CFBasicHash() = default; + + bool Update(lldb::addr_t addr, ExecutionContextRef exe_ctx_rf); + + bool IsValid() const; + + bool IsMutable() const { return m_mutable; }; + bool IsMultiVariant() const { return m_multi; } + HashType GetType() const { return m_type; } + + size_t GetCount() const; + lldb::addr_t GetKeyPointer() const; + lldb::addr_t GetValuePointer() const; + +private: + template struct __CFBasicHash { + struct RuntimeBase { + T cfisa; + T cfinfoa; + } base; + + struct Bits { + uint16_t __reserved0; + uint16_t __reserved1 : 2; + uint16_t keys_offset : 1; + uint16_t counts_offset : 2; + uint16_t counts_width : 2; + uint16_t __reserved2 : 9; + uint32_t used_buckets; // number of used buckets + uint64_t deleted : 16; // number of elements deleted + uint64_t num_buckets_idx : 8; // index to number of buckets + uint64_t __reserved3 : 40; + uint64_t __reserved4; + } bits; + + T pointers[3]; + }; + template bool UpdateFor(std::unique_ptr<__CFBasicHash> &m_ht); + + size_t GetPointerCount() const; + +private: + uint32_t m_ptr_size = UINT32_MAX; + lldb::ByteOrder m_byte_order = lldb::eByteOrderInvalid; + Address m_address = LLDB_INVALID_ADDRESS; + std::unique_ptr<__CFBasicHash> m_ht_32 = nullptr; + std::unique_ptr<__CFBasicHash> m_ht_64 = nullptr; + ExecutionContextRef m_exe_ctx_ref; + bool m_mutable = true; + bool m_multi = false; + HashType m_type; +}; + +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_OBJC_CFBASICHASH_H diff --git a/lldb/source/Plugins/Language/ObjC/CMakeLists.txt b/lldb/source/Plugins/Language/ObjC/CMakeLists.txt index 7b220e4c0c21d..c998d5455de38 100644 --- a/lldb/source/Plugins/Language/ObjC/CMakeLists.txt +++ b/lldb/source/Plugins/Language/ObjC/CMakeLists.txt @@ -11,6 +11,7 @@ endif () add_lldb_library(lldbPluginObjCLanguage PLUGIN ObjCLanguage.cpp CF.cpp + CFBasicHash.cpp Cocoa.cpp CoreMedia.cpp NSArray.cpp diff --git a/lldb/source/Plugins/Language/ObjC/NSArray.cpp b/lldb/source/Plugins/Language/ObjC/NSArray.cpp index 8c889927b9362..e1c789ce26d8d 100644 --- a/lldb/source/Plugins/Language/ObjC/NSArray.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSArray.cpp @@ -529,7 +529,7 @@ lldb_private::formatters::NSArrayMSyntheticFrontEndBase::GetIndexOfChildWithName template lldb_private::formatters:: GenericNSArrayMSyntheticFrontEnd:: - ~GenericNSArrayMSyntheticFrontEnd() { + ~GenericNSArrayMSyntheticFrontEnd() { delete m_data_32; m_data_32 = nullptr; delete m_data_64; @@ -596,7 +596,7 @@ lldb_private::formatters::GenericNSArrayISyntheticFrontEnd:: template lldb_private::formatters::GenericNSArrayISyntheticFrontEnd:: - ~GenericNSArrayISyntheticFrontEnd() { + ~GenericNSArrayISyntheticFrontEnd() { delete m_data_32; m_data_32 = nullptr; delete m_data_64; diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp index e76096ba78812..998c72e429e79 100644 --- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp @@ -10,6 +10,7 @@ #include "clang/AST/DeclCXX.h" +#include "CFBasicHash.h" #include "NSDictionary.h" #include "Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h" @@ -140,6 +141,37 @@ class NSDictionaryISyntheticFrontEnd : public SyntheticChildrenFrontEnd { std::vector m_children; }; +class NSCFDictionarySyntheticFrontEnd : public SyntheticChildrenFrontEnd { +public: + NSCFDictionarySyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); + + size_t CalculateNumChildren() override; + + lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + + bool Update() override; + + bool MightHaveChildren() override; + + size_t GetIndexOfChildWithName(ConstString name) override; + +private: + struct DictionaryItemDescriptor { + lldb::addr_t key_ptr; + lldb::addr_t val_ptr; + lldb::ValueObjectSP valobj_sp; + }; + + ExecutionContextRef m_exe_ctx_ref; + uint8_t m_ptr_size; + lldb::ByteOrder m_order; + + CFBasicHash m_hashtable; + + CompilerType m_pair_type; + std::vector m_children; +}; + class NSDictionary1SyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: NSDictionary1SyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); @@ -377,6 +409,7 @@ bool lldb_private::formatters::NSDictionarySummaryProvider( static const ConstString g_Dictionary1("__NSSingleEntryDictionaryI"); static const ConstString g_Dictionary0("__NSDictionary0"); static const ConstString g_DictionaryCF("__NSCFDictionary"); + static const ConstString g_DictionaryCFRef("CFDictionaryRef"); if (class_name.IsEmpty()) return false; @@ -388,8 +421,7 @@ bool lldb_private::formatters::NSDictionarySummaryProvider( if (error.Fail()) return false; value &= (is_64bit ? ~0xFC00000000000000UL : ~0xFC000000U); - } else if (class_name == g_DictionaryM || class_name == g_DictionaryMLegacy || - class_name == g_DictionaryCF) { + } else if (class_name == g_DictionaryM || class_name == g_DictionaryMLegacy) { AppleObjCRuntime *apple_runtime = llvm::dyn_cast_or_null(runtime); Status error; @@ -407,8 +439,13 @@ bool lldb_private::formatters::NSDictionarySummaryProvider( value = 1; } else if (class_name == g_Dictionary0) { value = 0; - } - else { + } else if (class_name == g_DictionaryCF || class_name == g_DictionaryCFRef) { + ExecutionContext exe_ctx(process_sp); + CFBasicHash cfbh; + if (!cfbh.Update(valobj_addr, exe_ctx)) + return false; + value = cfbh.GetCount(); + } else { auto &map(NSDictionary_Additionals::GetAdditionalSummaries()); for (auto &candidate : map) { if (candidate.first && candidate.first->Match(class_name)) @@ -466,6 +503,8 @@ lldb_private::formatters::NSDictionarySyntheticFrontEndCreator( static const ConstString g_DictionaryImmutable("__NSDictionaryM_Immutable"); static const ConstString g_DictionaryMLegacy("__NSDictionaryM_Legacy"); static const ConstString g_Dictionary0("__NSDictionary0"); + static const ConstString g_DictionaryCF("__NSCFDictionary"); + static const ConstString g_DictionaryCFRef("CFDictionaryRef"); if (class_name.IsEmpty()) return nullptr; @@ -484,6 +523,8 @@ lldb_private::formatters::NSDictionarySyntheticFrontEndCreator( return (new Foundation1100::NSDictionaryMSyntheticFrontEnd(valobj_sp)); } else if (class_name == g_Dictionary1) { return (new NSDictionary1SyntheticFrontEnd(valobj_sp)); + } else if (class_name == g_DictionaryCF || class_name == g_DictionaryCFRef) { + return (new NSCFDictionarySyntheticFrontEnd(valobj_sp)); } else { auto &map(NSDictionary_Additionals::GetAdditionalSynthetics()); for (auto &candidate : map) { @@ -641,6 +682,140 @@ lldb_private::formatters::NSDictionaryISyntheticFrontEnd::GetChildAtIndex( return dict_item.valobj_sp; } +lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: + NSCFDictionarySyntheticFrontEnd(lldb::ValueObjectSP valobj_sp) + : SyntheticChildrenFrontEnd(*valobj_sp), m_exe_ctx_ref(), m_ptr_size(8), + m_order(lldb::eByteOrderInvalid), m_hashtable(), m_pair_type() {} + +size_t lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: + GetIndexOfChildWithName(ConstString name) { + const char *item_name = name.GetCString(); + const uint32_t idx = ExtractIndexFromString(item_name); + if (idx < UINT32_MAX && idx >= CalculateNumChildren()) + return UINT32_MAX; + return idx; +} + +size_t lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: + CalculateNumChildren() { + if (!m_hashtable.IsValid()) + return 0; + return m_hashtable.GetCount(); +} + +bool lldb_private::formatters::NSCFDictionarySyntheticFrontEnd::Update() { + m_children.clear(); + ValueObjectSP valobj_sp = m_backend.GetSP(); + m_ptr_size = 0; + if (!valobj_sp) + return false; + m_exe_ctx_ref = valobj_sp->GetExecutionContextRef(); + + lldb::ProcessSP process_sp(valobj_sp->GetProcessSP()); + if (!process_sp) + return false; + m_ptr_size = process_sp->GetAddressByteSize(); + m_order = process_sp->GetByteOrder(); + return m_hashtable.Update(valobj_sp->GetValueAsUnsigned(0), m_exe_ctx_ref); +} + +bool lldb_private::formatters::NSCFDictionarySyntheticFrontEnd:: + MightHaveChildren() { + return true; +} + +lldb::ValueObjectSP +lldb_private::formatters::NSCFDictionarySyntheticFrontEnd::GetChildAtIndex( + size_t idx) { + lldb::addr_t m_keys_ptr = m_hashtable.GetKeyPointer(); + lldb::addr_t m_values_ptr = m_hashtable.GetValuePointer(); + + const uint32_t num_children = CalculateNumChildren(); + + if (idx >= num_children) + return lldb::ValueObjectSP(); + + if (m_children.empty()) { + ProcessSP process_sp = m_exe_ctx_ref.GetProcessSP(); + if (!process_sp) + return lldb::ValueObjectSP(); + + Status error; + lldb::addr_t key_at_idx = 0, val_at_idx = 0; + + uint32_t tries = 0; + uint32_t test_idx = 0; + + // Iterate over inferior memory, reading key/value pointers by shifting each + // cursor by test_index * m_ptr_size. Returns an empty ValueObject if a read + // fails, otherwise, continue until the number of tries matches the number + // of childen. + while (tries < num_children) { + key_at_idx = m_keys_ptr + (test_idx * m_ptr_size); + val_at_idx = m_values_ptr + (test_idx * m_ptr_size); + + key_at_idx = process_sp->ReadPointerFromMemory(key_at_idx, error); + if (error.Fail()) + return lldb::ValueObjectSP(); + val_at_idx = process_sp->ReadPointerFromMemory(val_at_idx, error); + if (error.Fail()) + return lldb::ValueObjectSP(); + + test_idx++; + + if (!key_at_idx || !val_at_idx) + continue; + tries++; + + DictionaryItemDescriptor descriptor = {key_at_idx, val_at_idx, + lldb::ValueObjectSP()}; + + m_children.push_back(descriptor); + } + } + + if (idx >= m_children.size()) // should never happen + return lldb::ValueObjectSP(); + + DictionaryItemDescriptor &dict_item = m_children[idx]; + if (!dict_item.valobj_sp) { + if (!m_pair_type.IsValid()) { + TargetSP target_sp(m_backend.GetTargetSP()); + if (!target_sp) + return ValueObjectSP(); + m_pair_type = GetLLDBNSPairType(target_sp); + } + if (!m_pair_type.IsValid()) + return ValueObjectSP(); + + DataBufferSP buffer_sp(new DataBufferHeap(2 * m_ptr_size, 0)); + + switch (m_ptr_size) { + case 0: // architecture has no clue - fail + return lldb::ValueObjectSP(); + case 4: { + uint32_t *data_ptr = reinterpret_cast(buffer_sp->GetBytes()); + *data_ptr = dict_item.key_ptr; + *(data_ptr + 1) = dict_item.val_ptr; + } break; + case 8: { + uint64_t *data_ptr = reinterpret_cast(buffer_sp->GetBytes()); + *data_ptr = dict_item.key_ptr; + *(data_ptr + 1) = dict_item.val_ptr; + } break; + default: + lldbassert(false && "pointer size is not 4 nor 8"); + } + + StreamString idx_name; + idx_name.Printf("[%" PRIu64 "]", (uint64_t)idx); + DataExtractor data(buffer_sp, m_order, m_ptr_size); + dict_item.valobj_sp = CreateValueObjectFromData(idx_name.GetString(), data, + m_exe_ctx_ref, m_pair_type); + } + return dict_item.valobj_sp; +} + lldb_private::formatters::NSDictionary1SyntheticFrontEnd:: NSDictionary1SyntheticFrontEnd(lldb::ValueObjectSP valobj_sp) : SyntheticChildrenFrontEnd(*valobj_sp.get()), m_pair(nullptr) {} @@ -725,7 +900,7 @@ lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd:: template lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd:: - ~GenericNSDictionaryMSyntheticFrontEnd() { + ~GenericNSDictionaryMSyntheticFrontEnd() { delete m_data_32; m_data_32 = nullptr; delete m_data_64; @@ -733,8 +908,8 @@ lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd:: } template -size_t -lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd:: GetIndexOfChildWithName(ConstString name) { +size_t lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd< + D32, D64>::GetIndexOfChildWithName(ConstString name) { const char *item_name = name.GetCString(); uint32_t idx = ExtractIndexFromString(item_name); if (idx < UINT32_MAX && idx >= CalculateNumChildren()) @@ -783,7 +958,7 @@ lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd:: } if (error.Fail()) return false; - return false; + return true; } template @@ -795,9 +970,8 @@ lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd:: template lldb::ValueObjectSP -lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd:: - GetChildAtIndex( - size_t idx) { +lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd< + D32, D64>::GetChildAtIndex(size_t idx) { lldb::addr_t m_keys_ptr; lldb::addr_t m_values_ptr; if (m_data_32) { @@ -885,7 +1059,6 @@ lldb_private::formatters::GenericNSDictionaryMSyntheticFrontEnd:: return dict_item.valobj_sp; } - lldb_private::formatters::Foundation1100:: NSDictionaryMSyntheticFrontEnd:: NSDictionaryMSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp) diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.h b/lldb/source/Plugins/Language/ObjC/NSDictionary.h index cc338e2f58e82..57dacd6759d29 100644 --- a/lldb/source/Plugins/Language/ObjC/NSDictionary.h +++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.h @@ -1,5 +1,4 @@ -//===-- NSDictionary.h ---------------------------------------------------*- C++ -//-*-===// +//===-- NSDictionary.h ------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/lldb/source/Plugins/Language/ObjC/NSSet.cpp b/lldb/source/Plugins/Language/ObjC/NSSet.cpp index 59597bed85b5f..543a1c5978e0e 100644 --- a/lldb/source/Plugins/Language/ObjC/NSSet.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSSet.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "NSSet.h" +#include "CFBasicHash.h" #include "Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h" #include "Plugins/TypeSystem/Clang/TypeSystemClang.h" @@ -79,6 +80,36 @@ class NSSetISyntheticFrontEnd : public SyntheticChildrenFrontEnd { std::vector m_children; }; +class NSCFSetSyntheticFrontEnd : public SyntheticChildrenFrontEnd { +public: + NSCFSetSyntheticFrontEnd(lldb::ValueObjectSP valobj_sp); + + size_t CalculateNumChildren() override; + + lldb::ValueObjectSP GetChildAtIndex(size_t idx) override; + + bool Update() override; + + bool MightHaveChildren() override; + + size_t GetIndexOfChildWithName(ConstString name) override; + +private: + struct SetItemDescriptor { + lldb::addr_t item_ptr; + lldb::ValueObjectSP valobj_sp; + }; + + ExecutionContextRef m_exe_ctx_ref; + uint8_t m_ptr_size; + lldb::ByteOrder m_order; + + CFBasicHash m_hashtable; + + CompilerType m_pair_type; + std::vector m_children; +}; + template class GenericNSSetMSyntheticFrontEnd : public SyntheticChildrenFrontEnd { public: @@ -245,21 +276,24 @@ bool lldb_private::formatters::NSSetSummaryProvider( uint64_t value = 0; - ConstString class_name_cs = descriptor->GetClassName(); - const char *class_name = class_name_cs.GetCString(); + ConstString class_name(descriptor->GetClassName()); - if (!class_name || !*class_name) + static const ConstString g_SetI("__NSSetI"); + static const ConstString g_OrderedSetI("__NSOrderedSetI"); + static const ConstString g_SetM("__NSSetM"); + static const ConstString g_SetCF("__NSCFSet"); + + if (class_name.IsEmpty()) return false; - if (!strcmp(class_name, "__NSSetI") || - !strcmp(class_name, "__NSOrderedSetI")) { + if (class_name == g_SetI || class_name == g_OrderedSetI) { Status error; value = process_sp->ReadUnsignedIntegerFromMemory(valobj_addr + ptr_size, ptr_size, 0, error); if (error.Fail()) return false; value &= (is_64bit ? ~0xFC00000000000000UL : ~0xFC000000U); - } else if (!strcmp(class_name, "__NSSetM")) { + } else if (class_name == g_SetM) { AppleObjCRuntime *apple_runtime = llvm::dyn_cast_or_null(runtime); Status error; @@ -272,9 +306,15 @@ bool lldb_private::formatters::NSSetSummaryProvider( } if (error.Fail()) return false; + } else if (class_name == g_SetCF) { + ExecutionContext exe_ctx(process_sp); + CFBasicHash cfbh; + if (!cfbh.Update(valobj_addr, exe_ctx)) + return false; + value = cfbh.GetCount(); } else { auto &map(NSSet_Additionals::GetAdditionalSummaries()); - auto iter = map.find(class_name_cs), end = map.end(); + auto iter = map.find(class_name), end = map.end(); if (iter != end) return iter->second(valobj, stream, options); else @@ -321,16 +361,19 @@ lldb_private::formatters::NSSetSyntheticFrontEndCreator( if (!descriptor || !descriptor->IsValid()) return nullptr; - ConstString class_name_cs = descriptor->GetClassName(); - const char *class_name = class_name_cs.GetCString(); + ConstString class_name = descriptor->GetClassName(); - if (!class_name || !*class_name) + static const ConstString g_SetI("__NSSetI"); + static const ConstString g_OrderedSetI("__NSOrderedSetI"); + static const ConstString g_SetM("__NSSetM"); + static const ConstString g_SetCF("__NSCFSet"); + + if (class_name.IsEmpty()) return nullptr; - if (!strcmp(class_name, "__NSSetI") || - !strcmp(class_name, "__NSOrderedSetI")) { + if (class_name == g_SetI || class_name == g_OrderedSetI) { return (new NSSetISyntheticFrontEnd(valobj_sp)); - } else if (!strcmp(class_name, "__NSSetM")) { + } else if (class_name == g_SetM) { AppleObjCRuntime *apple_runtime = llvm::dyn_cast_or_null(runtime); if (apple_runtime) { @@ -343,9 +386,11 @@ lldb_private::formatters::NSSetSyntheticFrontEndCreator( } else { return (new Foundation1300::NSSetMSyntheticFrontEnd(valobj_sp)); } + } else if (class_name == g_SetCF) { + return (new NSCFSetSyntheticFrontEnd(valobj_sp)); } else { auto &map(NSSet_Additionals::GetAdditionalSynthetics()); - auto iter = map.find(class_name_cs), end = map.end(); + auto iter = map.find(class_name), end = map.end(); if (iter != end) return iter->second(synth, valobj_sp); return nullptr; @@ -475,16 +520,18 @@ lldb_private::formatters::NSSetISyntheticFrontEnd::GetChildAtIndex(size_t idx) { auto ptr_size = process_sp->GetAddressByteSize(); DataBufferHeap buffer(ptr_size, 0); switch (ptr_size) { - case 0: // architecture has no clue?? - fail + case 0: // architecture has no clue - fail return lldb::ValueObjectSP(); case 4: - *((uint32_t *)buffer.GetBytes()) = (uint32_t)set_item.item_ptr; + *reinterpret_cast(buffer.GetBytes()) = + static_cast(set_item.item_ptr); break; case 8: - *((uint64_t *)buffer.GetBytes()) = (uint64_t)set_item.item_ptr; + *reinterpret_cast(buffer.GetBytes()) = + static_cast(set_item.item_ptr); break; default: - assert(false && "pointer size is not 4 nor 8 - get out of here ASAP"); + lldbassert(false && "pointer size is not 4 nor 8"); } StreamString idx_name; idx_name.Printf("[%" PRIu64 "]", (uint64_t)idx); @@ -501,6 +548,128 @@ lldb_private::formatters::NSSetISyntheticFrontEnd::GetChildAtIndex(size_t idx) { return set_item.valobj_sp; } +lldb_private::formatters::NSCFSetSyntheticFrontEnd::NSCFSetSyntheticFrontEnd( + lldb::ValueObjectSP valobj_sp) + : SyntheticChildrenFrontEnd(*valobj_sp), m_exe_ctx_ref(), m_ptr_size(8), + m_order(lldb::eByteOrderInvalid), m_hashtable(), m_pair_type() {} + +size_t +lldb_private::formatters::NSCFSetSyntheticFrontEnd::GetIndexOfChildWithName( + ConstString name) { + const char *item_name = name.GetCString(); + const uint32_t idx = ExtractIndexFromString(item_name); + if (idx < UINT32_MAX && idx >= CalculateNumChildren()) + return UINT32_MAX; + return idx; +} + +size_t +lldb_private::formatters::NSCFSetSyntheticFrontEnd::CalculateNumChildren() { + if (!m_hashtable.IsValid()) + return 0; + return m_hashtable.GetCount(); +} + +bool lldb_private::formatters::NSCFSetSyntheticFrontEnd::Update() { + m_children.clear(); + ValueObjectSP valobj_sp = m_backend.GetSP(); + m_ptr_size = 0; + if (!valobj_sp) + return false; + m_exe_ctx_ref = valobj_sp->GetExecutionContextRef(); + + lldb::ProcessSP process_sp(valobj_sp->GetProcessSP()); + if (!process_sp) + return false; + m_ptr_size = process_sp->GetAddressByteSize(); + m_order = process_sp->GetByteOrder(); + return m_hashtable.Update(valobj_sp->GetValueAsUnsigned(0), m_exe_ctx_ref); +} + +bool lldb_private::formatters::NSCFSetSyntheticFrontEnd::MightHaveChildren() { + return true; +} + +lldb::ValueObjectSP +lldb_private::formatters::NSCFSetSyntheticFrontEnd::GetChildAtIndex( + size_t idx) { + lldb::addr_t m_values_ptr = m_hashtable.GetValuePointer(); + + const uint32_t num_children = CalculateNumChildren(); + + if (idx >= num_children) + return lldb::ValueObjectSP(); + + if (m_children.empty()) { + ProcessSP process_sp = m_exe_ctx_ref.GetProcessSP(); + if (!process_sp) + return lldb::ValueObjectSP(); + + Status error; + lldb::addr_t val_at_idx = 0; + + uint32_t tries = 0; + uint32_t test_idx = 0; + + // Iterate over inferior memory, reading value pointers by shifting the + // cursor by test_index * m_ptr_size. Returns an empty ValueObject if a read + // fails, otherwise, continue until the number of tries matches the number + // of childen. + while (tries < num_children) { + val_at_idx = m_values_ptr + (test_idx * m_ptr_size); + + val_at_idx = process_sp->ReadPointerFromMemory(val_at_idx, error); + if (error.Fail()) + return lldb::ValueObjectSP(); + + test_idx++; + + if (!val_at_idx) + continue; + tries++; + + SetItemDescriptor descriptor = {val_at_idx, lldb::ValueObjectSP()}; + + m_children.push_back(descriptor); + } + } + + if (idx >= m_children.size()) // should never happen + return lldb::ValueObjectSP(); + + SetItemDescriptor &set_item = m_children[idx]; + if (!set_item.valobj_sp) { + + DataBufferSP buffer_sp(new DataBufferHeap(m_ptr_size, 0)); + + switch (m_ptr_size) { + case 0: // architecture has no clue - fail + return lldb::ValueObjectSP(); + case 4: + *reinterpret_cast(buffer_sp->GetBytes()) = + static_cast(set_item.item_ptr); + break; + case 8: + *reinterpret_cast(buffer_sp->GetBytes()) = + static_cast(set_item.item_ptr); + break; + default: + lldbassert(false && "pointer size is not 4 nor 8"); + } + StreamString idx_name; + idx_name.Printf("[%" PRIu64 "]", (uint64_t)idx); + + DataExtractor data(buffer_sp, m_order, m_ptr_size); + + set_item.valobj_sp = CreateValueObjectFromData( + idx_name.GetString(), data, m_exe_ctx_ref, + m_backend.GetCompilerType().GetBasicTypeFromAST( + lldb::eBasicTypeObjCID)); + } + + return set_item.valobj_sp; +} + template lldb_private::formatters:: GenericNSSetMSyntheticFrontEnd::GenericNSSetMSyntheticFrontEnd( @@ -513,7 +682,7 @@ lldb_private::formatters:: template lldb_private::formatters:: - GenericNSSetMSyntheticFrontEnd::~GenericNSSetMSyntheticFrontEnd() { + GenericNSSetMSyntheticFrontEnd::~GenericNSSetMSyntheticFrontEnd() { delete m_data_32; m_data_32 = nullptr; delete m_data_64; diff --git a/lldb/source/Plugins/Language/ObjC/NSString.cpp b/lldb/source/Plugins/Language/ObjC/NSString.cpp index 7c4afb36b5883..b9d0d73cbc2eb 100644 --- a/lldb/source/Plugins/Language/ObjC/NSString.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSString.cpp @@ -175,7 +175,6 @@ bool lldb_private::formatters::NSStringSummaryProvider( options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); options.SetBinaryZeroIsTerminator(false); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::UTF16>(options); } else { @@ -188,7 +187,6 @@ bool lldb_private::formatters::NSStringSummaryProvider( options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); options.SetBinaryZeroIsTerminator(false); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::ASCII>(options); } @@ -204,7 +202,6 @@ bool lldb_private::formatters::NSStringSummaryProvider( options.SetHasSourceSize(has_explicit_length); options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::ASCII>(options); } else if (is_unicode) { @@ -229,7 +226,6 @@ bool lldb_private::formatters::NSStringSummaryProvider( options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); options.SetBinaryZeroIsTerminator(!has_explicit_length); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::UTF16>(options); } else if (is_path_store) { @@ -250,7 +246,6 @@ bool lldb_private::formatters::NSStringSummaryProvider( options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); options.SetBinaryZeroIsTerminator(!has_explicit_length); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::UTF16>(options); } else if (is_inline) { @@ -273,7 +268,6 @@ bool lldb_private::formatters::NSStringSummaryProvider( options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); options.SetBinaryZeroIsTerminator(!has_explicit_length); - options.SetLanguage(summary_options.GetLanguage()); if (has_explicit_length) return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::UTF8>(options); @@ -295,7 +289,6 @@ bool lldb_private::formatters::NSStringSummaryProvider( options.SetHasSourceSize(has_explicit_length); options.SetIgnoreMaxLength(summary_options.GetCapping() == TypeSummaryCapping::eTypeSummaryUncapped); - options.SetLanguage(summary_options.GetLanguage()); return StringPrinter::ReadStringAndDumpToStream< StringPrinter::StringElementType::ASCII>(options); } diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp index 6b2a5f845d734..4acb4c0d216a1 100644 --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp @@ -606,6 +606,11 @@ static void LoadObjCFormatters(TypeCategoryImplSP objc_category_sp) { lldb_private::formatters::NSSetSyntheticFrontEndCreator, "__NSSetM synthetic children", ConstString("__NSSetM"), ScriptedSyntheticChildren::Flags()); + AddCXXSynthetic(objc_category_sp, + lldb_private::formatters::NSSetSyntheticFrontEndCreator, + "__NSCFSet synthetic children", ConstString("__NSCFSet"), + ScriptedSyntheticChildren::Flags()); + AddCXXSynthetic( objc_category_sp, lldb_private::formatters::NSSetSyntheticFrontEndCreator, "NSMutableSet synthetic children", ConstString("NSMutableSet"), diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index 0777c78aa22d6..436f93d9d8d65 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -1762,6 +1762,7 @@ PlatformDarwin::FindXcodeContentsDirectoryInPath(llvm::StringRef path) { } llvm::StringRef PlatformDarwin::GetSDKPath(XcodeSDK sdk) { + std::lock_guard guard(m_sdk_path_mutex); std::string &path = m_sdk_path[sdk.GetString()]; if (path.empty()) path = HostInfo::GetXcodeSDK(sdk); diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h index 7d205be596896..0252b093d6beb 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h @@ -171,6 +171,7 @@ class PlatformDarwin : public PlatformPOSIX { std::string m_developer_directory; llvm::StringMap m_sdk_path; + std::mutex m_sdk_path_mutex; private: DISALLOW_COPY_AND_ASSIGN(PlatformDarwin); diff --git a/lldb/source/Plugins/Process/Darwin/CFBundle.cpp b/lldb/source/Plugins/Process/Darwin/CFBundle.cpp deleted file mode 100644 index bccf73acddf86..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/CFBundle.cpp +++ /dev/null @@ -1,68 +0,0 @@ -//===-- CFBundle.cpp ------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Created by Greg Clayton on 1/16/08. -// -//===----------------------------------------------------------------------===// - -#include "CFBundle.h" -#include "CFString.h" - -// CFBundle constructor -CFBundle::CFBundle(const char *path) - : CFReleaser(), m_bundle_url() { - if (path && path[0]) - SetPath(path); -} - -// CFBundle copy constructor -CFBundle::CFBundle(const CFBundle &rhs) - : CFReleaser(rhs), m_bundle_url(rhs.m_bundle_url) {} - -// CFBundle copy constructor -CFBundle &CFBundle::operator=(const CFBundle &rhs) { - *this = rhs; - return *this; -} - -// Destructor -CFBundle::~CFBundle() {} - -// Set the path for a bundle by supplying a -bool CFBundle::SetPath(const char *path) { - CFAllocatorRef alloc = kCFAllocatorDefault; - // Release our old bundle and ULR - reset(); // This class is a CFReleaser - m_bundle_url.reset(); - // Make a CFStringRef from the supplied path - CFString cf_path; - cf_path.SetFileSystemRepresentation(path); - if (cf_path.get()) { - // Make our Bundle URL - m_bundle_url.reset(::CFURLCreateWithFileSystemPath( - alloc, cf_path.get(), kCFURLPOSIXPathStyle, true)); - if (m_bundle_url.get()) { - reset(::CFBundleCreate(alloc, m_bundle_url.get())); - } - } - return get() != NULL; -} - -CFStringRef CFBundle::GetIdentifier() const { - CFBundleRef bundle = get(); - if (bundle != NULL) - return ::CFBundleGetIdentifier(bundle); - return NULL; -} - -CFURLRef CFBundle::CopyExecutableURL() const { - CFBundleRef bundle = get(); - if (bundle != NULL) - return CFBundleCopyExecutableURL(bundle); - return NULL; -} diff --git a/lldb/source/Plugins/Process/Darwin/CFBundle.h b/lldb/source/Plugins/Process/Darwin/CFBundle.h deleted file mode 100644 index f49dc30f1f8f7..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/CFBundle.h +++ /dev/null @@ -1,35 +0,0 @@ -//===-- CFBundle.h ----------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Created by Greg Clayton on 1/16/08. -// -//===----------------------------------------------------------------------===// - -#ifndef __CFBundle_h__ -#define __CFBundle_h__ - -#include "CFUtils.h" - -class CFBundle : public CFReleaser { -public: - // Constructors and Destructors - CFBundle(const char *path = NULL); - CFBundle(const CFBundle &rhs); - CFBundle &operator=(const CFBundle &rhs); - virtual ~CFBundle(); - bool SetPath(const char *path); - - CFStringRef GetIdentifier() const; - - CFURLRef CopyExecutableURL() const; - -protected: - CFReleaser m_bundle_url; -}; - -#endif // #ifndef __CFBundle_h__ diff --git a/lldb/source/Plugins/Process/Darwin/CFString.cpp b/lldb/source/Plugins/Process/Darwin/CFString.cpp deleted file mode 100644 index 6d41ea200dc5d..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/CFString.cpp +++ /dev/null @@ -1,153 +0,0 @@ -//===-- CFString.cpp ------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Created by Greg Clayton on 1/16/08. -// -//===----------------------------------------------------------------------===// - -#include "CFString.h" -#include -#include - -// CFString constructor -CFString::CFString(CFStringRef s) : CFReleaser(s) {} - -// CFString copy constructor -CFString::CFString(const CFString &rhs) : CFReleaser(rhs) {} - -// CFString copy constructor -CFString &CFString::operator=(const CFString &rhs) { - if (this != &rhs) - *this = rhs; - return *this; -} - -CFString::CFString(const char *cstr, CFStringEncoding cstr_encoding) - : CFReleaser() { - if (cstr && cstr[0]) { - reset( - ::CFStringCreateWithCString(kCFAllocatorDefault, cstr, cstr_encoding)); - } -} - -// Destructor -CFString::~CFString() {} - -const char *CFString::GetFileSystemRepresentation(std::string &s) { - return CFString::FileSystemRepresentation(get(), s); -} - -CFStringRef CFString::SetFileSystemRepresentation(const char *path) { - CFStringRef new_value = NULL; - if (path && path[0]) - new_value = - ::CFStringCreateWithFileSystemRepresentation(kCFAllocatorDefault, path); - reset(new_value); - return get(); -} - -CFStringRef CFString::SetFileSystemRepresentationFromCFType(CFTypeRef cf_type) { - CFStringRef new_value = NULL; - if (cf_type != NULL) { - CFTypeID cf_type_id = ::CFGetTypeID(cf_type); - - if (cf_type_id == ::CFStringGetTypeID()) { - // Retain since we are using the existing object - new_value = (CFStringRef)::CFRetain(cf_type); - } else if (cf_type_id == ::CFURLGetTypeID()) { - new_value = - ::CFURLCopyFileSystemPath((CFURLRef)cf_type, kCFURLPOSIXPathStyle); - } - } - reset(new_value); - return get(); -} - -CFStringRef -CFString::SetFileSystemRepresentationAndExpandTilde(const char *path) { - std::string expanded_path; - if (CFString::GlobPath(path, expanded_path)) - SetFileSystemRepresentation(expanded_path.c_str()); - else - reset(); - return get(); -} - -const char *CFString::UTF8(std::string &str) { - return CFString::UTF8(get(), str); -} - -// Static function that puts a copy of the UTF8 contents of CF_STR into STR and -// returns the C string pointer that is contained in STR when successful, else -// NULL is returned. This allows the std::string parameter to own the extracted -// string, -// and also allows that string to be returned as a C string pointer that can be -// used. - -const char *CFString::UTF8(CFStringRef cf_str, std::string &str) { - if (cf_str) { - const CFStringEncoding encoding = kCFStringEncodingUTF8; - CFIndex max_utf8_str_len = CFStringGetLength(cf_str); - max_utf8_str_len = - CFStringGetMaximumSizeForEncoding(max_utf8_str_len, encoding); - if (max_utf8_str_len > 0) { - str.resize(max_utf8_str_len); - if (!str.empty()) { - if (CFStringGetCString(cf_str, &str[0], str.size(), encoding)) { - str.resize(strlen(str.c_str())); - return str.c_str(); - } - } - } - } - return NULL; -} - -// Static function that puts a copy of the file system representation of CF_STR -// into STR and returns the C string pointer that is contained in STR when -// successful, else NULL is returned. This allows the std::string parameter to -// own the extracted string, and also allows that string to be returned as a C -// string pointer that can be used. - -const char *CFString::FileSystemRepresentation(CFStringRef cf_str, - std::string &str) { - if (cf_str) { - CFIndex max_length = - ::CFStringGetMaximumSizeOfFileSystemRepresentation(cf_str); - if (max_length > 0) { - str.resize(max_length); - if (!str.empty()) { - if (::CFStringGetFileSystemRepresentation(cf_str, &str[0], - str.size())) { - str.erase(::strlen(str.c_str())); - return str.c_str(); - } - } - } - } - str.erase(); - return NULL; -} - -CFIndex CFString::GetLength() const { - CFStringRef str = get(); - if (str) - return CFStringGetLength(str); - return 0; -} - -const char *CFString::GlobPath(const char *path, std::string &expanded_path) { - glob_t globbuf; - if (::glob(path, GLOB_TILDE, NULL, &globbuf) == 0) { - expanded_path = globbuf.gl_pathv[0]; - ::globfree(&globbuf); - } else - expanded_path.clear(); - - return expanded_path.c_str(); -} diff --git a/lldb/source/Plugins/Process/Darwin/CFString.h b/lldb/source/Plugins/Process/Darwin/CFString.h deleted file mode 100644 index d1bd5682689e8..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/CFString.h +++ /dev/null @@ -1,40 +0,0 @@ -//===-- CFString.h ----------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Created by Greg Clayton on 1/16/08. -// -//===----------------------------------------------------------------------===// - -#ifndef __CFString_h__ -#define __CFString_h__ - -#include "CFUtils.h" -#include - -class CFString : public CFReleaser { -public: - // Constructors and Destructors - CFString(CFStringRef cf_str = NULL); - CFString(const char *s, CFStringEncoding encoding = kCFStringEncodingUTF8); - CFString(const CFString &rhs); - CFString &operator=(const CFString &rhs); - virtual ~CFString(); - - const char *GetFileSystemRepresentation(std::string &str); - CFStringRef SetFileSystemRepresentation(const char *path); - CFStringRef SetFileSystemRepresentationFromCFType(CFTypeRef cf_type); - CFStringRef SetFileSystemRepresentationAndExpandTilde(const char *path); - const char *UTF8(std::string &str); - CFIndex GetLength() const; - static const char *UTF8(CFStringRef cf_str, std::string &str); - static const char *FileSystemRepresentation(CFStringRef cf_str, - std::string &str); - static const char *GlobPath(const char *path, std::string &expanded_path); -}; - -#endif // #ifndef __CFString_h__ diff --git a/lldb/source/Plugins/Process/Darwin/CFUtils.h b/lldb/source/Plugins/Process/Darwin/CFUtils.h deleted file mode 100644 index 0ef7b18c55fea..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/CFUtils.h +++ /dev/null @@ -1,75 +0,0 @@ -//===-- CFUtils.h -----------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Created by Greg Clayton on 3/5/07. -// -//===----------------------------------------------------------------------===// - -#ifndef __CFUtils_h__ -#define __CFUtils_h__ - -#include - -#ifdef __cplusplus - -// Templatized CF helper class that can own any CF pointer and will -// call CFRelease() on any valid pointer it owns unless that pointer is -// explicitly released using the release() member function. -template class CFReleaser { -public: - // Type names for the value - typedef T element_type; - - // Constructors and destructors - CFReleaser(T ptr = NULL) : _ptr(ptr) {} - CFReleaser(const CFReleaser ©) : _ptr(copy.get()) { - if (get()) - ::CFRetain(get()); - } - virtual ~CFReleaser() { reset(); } - - // Assignments - CFReleaser &operator=(const CFReleaser ©) { - if (copy != *this) { - // Replace our owned pointer with the new one - reset(copy.get()); - // Retain the current pointer that we own - if (get()) - ::CFRetain(get()); - } - } - // Get the address of the contained type - T *ptr_address() { return &_ptr; } - - // Access the pointer itself - const T get() const { return _ptr; } - T get() { return _ptr; } - - // Set a new value for the pointer and CFRelease our old - // value if we had a valid one. - void reset(T ptr = NULL) { - if (ptr != _ptr) { - if (_ptr != NULL) - ::CFRelease(_ptr); - _ptr = ptr; - } - } - - // Release ownership without calling CFRelease - T release() { - T tmp = _ptr; - _ptr = NULL; - return tmp; - } - -private: - element_type _ptr; -}; - -#endif // #ifdef __cplusplus -#endif // #ifndef __CFUtils_h__ diff --git a/lldb/source/Plugins/Process/Darwin/DarwinProcessLauncher.cpp b/lldb/source/Plugins/Process/Darwin/DarwinProcessLauncher.cpp deleted file mode 100644 index f1126789c1852..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/DarwinProcessLauncher.cpp +++ /dev/null @@ -1,638 +0,0 @@ -//===-- DarwinProcessLauncher.cpp -----------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// -// DarwinProcessLauncher.cpp -// lldb -// -// Created by Todd Fiala on 8/30/16. -// -// - -#include "DarwinProcessLauncher.h" - -// C includes -#include -#include -#include -#include - -#ifndef _POSIX_SPAWN_DISABLE_ASLR -#define _POSIX_SPAWN_DISABLE_ASLR 0x0100 -#endif - -// LLDB includes -#include "lldb/lldb-enumerations.h" - -#include "lldb/Host/PseudoTerminal.h" -#include "lldb/Target/ProcessLaunchInfo.h" -#include "lldb/Utility/Log.h" -#include "lldb/Utility/Status.h" -#include "lldb/Utility/StreamString.h" -#include "llvm/Support/Errno.h" - -#include "CFBundle.h" -#include "CFString.h" - -using namespace lldb; -using namespace lldb_private; -using namespace lldb_private::process_darwin; -using namespace lldb_private::darwin_process_launcher; - -namespace { -static LaunchFlavor g_launch_flavor = LaunchFlavor::Default; -} - -namespace lldb_private { -namespace darwin_process_launcher { - -static uint32_t GetCPUTypeForLocalProcess(::pid_t pid) { - int mib[CTL_MAXNAME] = { - 0, - }; - size_t len = CTL_MAXNAME; - if (::sysctlnametomib("sysctl.proc_cputype", mib, &len)) - return 0; - - mib[len] = pid; - len++; - - cpu_type_t cpu; - size_t cpu_len = sizeof(cpu); - if (::sysctl(mib, static_cast(len), &cpu, &cpu_len, 0, 0)) - cpu = 0; - return cpu; -} - -static bool ResolveExecutablePath(const char *path, char *resolved_path, - size_t resolved_path_size) { - if (path == NULL || path[0] == '\0') - return false; - - char max_path[PATH_MAX]; - std::string result; - CFString::GlobPath(path, result); - - if (result.empty()) - result = path; - - struct stat path_stat; - if (::stat(path, &path_stat) == 0) { - if ((path_stat.st_mode & S_IFMT) == S_IFDIR) { - CFBundle bundle(path); - CFReleaser url(bundle.CopyExecutableURL()); - if (url.get()) { - if (::CFURLGetFileSystemRepresentation( - url.get(), true, (UInt8 *)resolved_path, resolved_path_size)) - return true; - } - } - } - - if (realpath(path, max_path)) { - // Found the path relatively... - ::strncpy(resolved_path, max_path, resolved_path_size); - return strlen(resolved_path) + 1 < resolved_path_size; - } else { - // Not a relative path, check the PATH environment variable if the - const char *PATH = getenv("PATH"); - if (PATH) { - const char *curr_path_start = PATH; - const char *curr_path_end; - while (curr_path_start && *curr_path_start) { - curr_path_end = strchr(curr_path_start, ':'); - if (curr_path_end == NULL) { - result.assign(curr_path_start); - curr_path_start = NULL; - } else if (curr_path_end > curr_path_start) { - size_t len = curr_path_end - curr_path_start; - result.assign(curr_path_start, len); - curr_path_start += len + 1; - } else - break; - - result += '/'; - result += path; - struct stat s; - if (stat(result.c_str(), &s) == 0) { - ::strncpy(resolved_path, result.c_str(), resolved_path_size); - return result.size() + 1 < resolved_path_size; - } - } - } - } - return false; -} - -// TODO check if we have a general purpose fork and exec. We may be -// able to get rid of this entirely. -static Status ForkChildForPTraceDebugging(const char *path, char const *argv[], - char const *envp[], ::pid_t *pid, - int *pty_fd) { - Status error; - if (!path || !argv || !envp || !pid || !pty_fd) { - error.SetErrorString("invalid arguments"); - return error; - } - - // Use a fork that ties the child process's stdin/out/err to a pseudo - // terminal so we can read it in our MachProcess::STDIOThread as unbuffered - // io. - PseudoTerminal pty; - char fork_error[256]; - memset(fork_error, 0, sizeof(fork_error)); - *pid = static_cast<::pid_t>(pty.Fork(fork_error, sizeof(fork_error))); - if (*pid < 0) { - // Status during fork. - *pid = static_cast<::pid_t>(LLDB_INVALID_PROCESS_ID); - error.SetErrorStringWithFormat("%s(): fork failed: %s", __FUNCTION__, - fork_error); - return error; - } else if (pid == 0) { - // Child process - - // Debug this process. - ::ptrace(PT_TRACE_ME, 0, 0, 0); - - // Get BSD signals as mach exceptions. - ::ptrace(PT_SIGEXC, 0, 0, 0); - - // If our parent is setgid, lets make sure we don't inherit those extra - // powers due to nepotism. - if (::setgid(getgid()) == 0) { - // Let the child have its own process group. We need to execute this call - // in both the child and parent to avoid a race condition between the two - // processes. - - // Set the child process group to match its pid. - ::setpgid(0, 0); - - // Sleep a bit to before the exec call. - ::sleep(1); - - // Turn this process into the given executable. - ::execv(path, (char *const *)argv); - } - // Exit with error code. Child process should have taken over in above exec - // call and if the exec fails it will exit the child process below. - ::exit(127); - } else { - // Parent process - // Let the child have its own process group. We need to execute this call - // in both the child and parent to avoid a race condition between the two - // processes. - - // Set the child process group to match its pid - ::setpgid(*pid, *pid); - if (pty_fd) { - // Release our master pty file descriptor so the pty class doesn't close - // it and so we can continue to use it in our STDIO thread - *pty_fd = pty.ReleaseMasterFileDescriptor(); - } - } - return error; -} - -static Status -CreatePosixSpawnFileAction(const FileAction &action, - posix_spawn_file_actions_t *file_actions) { - Status error; - - // Log it. - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - if (log) { - StreamString stream; - stream.PutCString("converting file action for posix_spawn(): "); - action.Dump(stream); - stream.Flush(); - log->PutCString(stream.GetString().c_str()); - } - - // Validate args. - if (!file_actions) { - error.SetErrorString("mandatory file_actions arg is null"); - return error; - } - - // Build the posix file action. - switch (action.GetAction()) { - case FileAction::eFileActionOpen: { - const int error_code = ::posix_spawn_file_actions_addopen( - file_actions, action.GetFD(), action.GetPath(), - action.GetActionArgument(), 0); - if (error_code != 0) { - error.SetError(error_code, eErrorTypePOSIX); - return error; - } - break; - } - - case FileAction::eFileActionClose: { - const int error_code = - ::posix_spawn_file_actions_addclose(file_actions, action.GetFD()); - if (error_code != 0) { - error.SetError(error_code, eErrorTypePOSIX); - return error; - } - break; - } - - case FileAction::eFileActionDuplicate: { - const int error_code = ::posix_spawn_file_actions_adddup2( - file_actions, action.GetFD(), action.GetActionArgument()); - if (error_code != 0) { - error.SetError(error_code, eErrorTypePOSIX); - return error; - } - break; - } - - case FileAction::eFileActionNone: - default: - LLDB_LOGF(log, "%s(): unsupported file action %u", __FUNCTION__, - action.GetAction()); - break; - } - - return error; -} - -static Status PosixSpawnChildForPTraceDebugging(const char *path, - ProcessLaunchInfo &launch_info, - ::pid_t *pid, - cpu_type_t *actual_cpu_type) { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - if (!pid) { - error.SetErrorStringWithFormat("%s(): pid arg cannot be null", - __FUNCTION__); - return error; - } - - posix_spawnattr_t attr; - short flags; - if (log) { - StreamString stream; - stream.Printf("%s(path='%s',...)\n", __FUNCTION__, path); - launch_info.Dump(stream, nullptr); - stream.Flush(); - log->PutCString(stream.GetString().c_str()); - } - - int error_code; - if ((error_code = ::posix_spawnattr_init(&attr)) != 0) { - LLDB_LOGF(log, "::posix_spawnattr_init(&attr) failed"); - error.SetError(error_code, eErrorTypePOSIX); - return error; - } - - // Ensure we clean up the spawnattr structure however we exit this function. - std::unique_ptr spawnattr_up( - &attr, ::posix_spawnattr_destroy); - - flags = POSIX_SPAWN_START_SUSPENDED | POSIX_SPAWN_SETSIGDEF | - POSIX_SPAWN_SETSIGMASK; - if (launch_info.GetFlags().Test(eLaunchFlagDisableASLR)) - flags |= _POSIX_SPAWN_DISABLE_ASLR; - - sigset_t no_signals; - sigset_t all_signals; - sigemptyset(&no_signals); - sigfillset(&all_signals); - ::posix_spawnattr_setsigmask(&attr, &no_signals); - ::posix_spawnattr_setsigdefault(&attr, &all_signals); - - if ((error_code = ::posix_spawnattr_setflags(&attr, flags)) != 0) { - LLDB_LOG(log, - "::posix_spawnattr_setflags(&attr, " - "POSIX_SPAWN_START_SUSPENDED{0}) failed: {1}", - flags & _POSIX_SPAWN_DISABLE_ASLR ? " | _POSIX_SPAWN_DISABLE_ASLR" - : "", - llvm::sys::StrError(error_code)); - error.SetError(error_code, eErrorTypePOSIX); - return error; - } - -#if !defined(__arm__) - - // We don't need to do this for ARM, and we really shouldn't now that we have - // multiple CPU subtypes and no posix_spawnattr call that allows us to set - // which CPU subtype to launch... - cpu_type_t desired_cpu_type = launch_info.GetArchitecture().GetMachOCPUType(); - if (desired_cpu_type != LLDB_INVALID_CPUTYPE) { - size_t ocount = 0; - error_code = - ::posix_spawnattr_setbinpref_np(&attr, 1, &desired_cpu_type, &ocount); - if (error_code != 0) { - LLDB_LOG(log, - "::posix_spawnattr_setbinpref_np(&attr, 1, " - "cpu_type = {0:x8}, count => {1}): {2}", - desired_cpu_type, ocount, llvm::sys::StrError(error_code)); - error.SetError(error_code, eErrorTypePOSIX); - return error; - } - if (ocount != 1) { - error.SetErrorStringWithFormat("posix_spawnattr_setbinpref_np " - "did not set the expected number " - "of cpu_type entries: expected 1 " - "but was %zu", - ocount); - return error; - } - } -#endif - - posix_spawn_file_actions_t file_actions; - if ((error_code = ::posix_spawn_file_actions_init(&file_actions)) != 0) { - LLDB_LOG(log, "::posix_spawn_file_actions_init(&file_actions) failed: {0}", - llvm::sys::StrError(error_code)); - error.SetError(error_code, eErrorTypePOSIX); - return error; - } - - // Ensure we clean up file actions however we exit this. When the - // file_actions_up below goes out of scope, we'll get our file action - // cleanup. - std::unique_ptr - file_actions_up(&file_actions, ::posix_spawn_file_actions_destroy); - - // We assume the caller has setup the file actions appropriately. We are not - // in the business of figuring out what we really need here. lldb-server will - // have already called FinalizeFileActions() as well to button these up - // properly. - const size_t num_actions = launch_info.GetNumFileActions(); - for (size_t action_index = 0; action_index < num_actions; ++action_index) { - const FileAction *const action = - launch_info.GetFileActionAtIndex(action_index); - if (!action) - continue; - - error = CreatePosixSpawnFileAction(*action, &file_actions); - if (!error.Success()) { - LLDB_LOGF(log, - "%s(): error converting FileAction to posix_spawn " - "file action: %s", - __FUNCTION__, error.AsCString()); - return error; - } - } - - // TODO: Verify if we can set the working directory back immediately - // after the posix_spawnp call without creating a race condition??? - const char *const working_directory = - launch_info.GetWorkingDirectory().GetCString(); - if (working_directory && working_directory[0]) - ::chdir(working_directory); - - auto argv = launch_info.GetArguments().GetArgumentVector(); - auto envp = launch_info.GetEnvironmentEntries().GetArgumentVector(); - error_code = ::posix_spawnp(pid, path, &file_actions, &attr, - (char *const *)argv, (char *const *)envp); - if (error_code != 0) { - LLDB_LOG(log, - "::posix_spawnp(pid => {0}, path = '{1}', file_actions " - "= {2}, attr = {3}, argv = {4}, envp = {5}) failed: {6}", - pid, path, &file_actions, &attr, argv, envp, - llvm::sys::StrError(error_code)); - error.SetError(error_code, eErrorTypePOSIX); - return error; - } - - // Validate we got a pid. - if (pid == LLDB_INVALID_PROCESS_ID) { - error.SetErrorString("posix_spawn() did not indicate a failure but it " - "failed to return a pid, aborting."); - return error; - } - - if (actual_cpu_type) { - *actual_cpu_type = GetCPUTypeForLocalProcess(*pid); - LLDB_LOGF(log, - "%s(): cpu type for launched process pid=%i: " - "cpu_type=0x%8.8x", - __FUNCTION__, *pid, *actual_cpu_type); - } - - return error; -} - -Status LaunchInferior(ProcessLaunchInfo &launch_info, int *pty_master_fd, - LaunchFlavor *launch_flavor) { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - if (!launch_flavor) { - error.SetErrorString("mandatory launch_flavor field was null"); - return error; - } - - if (log) { - StreamString stream; - stream.Printf("NativeProcessDarwin::%s(): launching with the " - "following launch info:", - __FUNCTION__); - launch_info.Dump(stream, nullptr); - stream.Flush(); - log->PutCString(stream.GetString().c_str()); - } - - // Retrieve the binary name given to us. - char given_path[PATH_MAX]; - given_path[0] = '\0'; - launch_info.GetExecutableFile().GetPath(given_path, sizeof(given_path)); - - // Determine the manner in which we'll launch. - *launch_flavor = g_launch_flavor; - if (*launch_flavor == LaunchFlavor::Default) { - // Our default launch method is posix spawn - *launch_flavor = LaunchFlavor::PosixSpawn; -#if defined WITH_FBS - // Check if we have an app bundle, if so launch using BackBoard Services. - if (strstr(given_path, ".app")) { - *launch_flavor = eLaunchFlavorFBS; - } -#elif defined WITH_BKS - // Check if we have an app bundle, if so launch using BackBoard Services. - if (strstr(given_path, ".app")) { - *launch_flavor = eLaunchFlavorBKS; - } -#elif defined WITH_SPRINGBOARD - // Check if we have an app bundle, if so launch using SpringBoard. - if (strstr(given_path, ".app")) { - *launch_flavor = eLaunchFlavorSpringBoard; - } -#endif - } - - // Attempt to resolve the binary name to an absolute path. - char resolved_path[PATH_MAX]; - resolved_path[0] = '\0'; - - LLDB_LOGF(log, "%s(): attempting to resolve given binary path: \"%s\"", - __FUNCTION__, given_path); - - // If we fail to resolve the path to our executable, then just use what we - // were given and hope for the best - if (!ResolveExecutablePath(given_path, resolved_path, - sizeof(resolved_path))) { - LLDB_LOGF(log, - "%s(): failed to resolve binary path, using " - "what was given verbatim and hoping for the best", - __FUNCTION__); - ::strncpy(resolved_path, given_path, sizeof(resolved_path)); - } else { - LLDB_LOGF(log, "%s(): resolved given binary path to: \"%s\"", __FUNCTION__, - resolved_path); - } - - char launch_err_str[PATH_MAX]; - launch_err_str[0] = '\0'; - - // TODO figure out how to handle QSetProcessEvent - // const char *process_event = ctx.GetProcessEvent(); - - // Ensure the binary is there. - struct stat path_stat; - if (::stat(resolved_path, &path_stat) == -1) { - error.SetErrorToErrno(); - return error; - } - - // Fork a child process for debugging - // state_callback(eStateLaunching); - - const auto argv = launch_info.GetArguments().GetConstArgumentVector(); - const auto envp = - launch_info.GetEnvironmentEntries().GetConstArgumentVector(); - - switch (*launch_flavor) { - case LaunchFlavor::ForkExec: { - ::pid_t pid = LLDB_INVALID_PROCESS_ID; - error = ForkChildForPTraceDebugging(resolved_path, argv, envp, &pid, - pty_master_fd); - if (error.Success()) { - launch_info.SetProcessID(static_cast(pid)); - } else { - // Reset any variables that might have been set during a failed launch - // attempt. - if (pty_master_fd) - *pty_master_fd = -1; - - // We're done. - return error; - } - } break; - -#ifdef WITH_FBS - case LaunchFlavor::FBS: { - const char *app_ext = strstr(path, ".app"); - if (app_ext && (app_ext[4] == '\0' || app_ext[4] == '/')) { - std::string app_bundle_path(path, app_ext + strlen(".app")); - m_flags |= eMachProcessFlagsUsingFBS; - if (BoardServiceLaunchForDebug(app_bundle_path.c_str(), argv, envp, - no_stdio, disable_aslr, event_data, - launch_err) != 0) - return m_pid; // A successful SBLaunchForDebug() returns and assigns a - // non-zero m_pid. - else - break; // We tried a FBS launch, but didn't succeed lets get out - } - } break; -#endif - -#ifdef WITH_BKS - case LaunchFlavor::BKS: { - const char *app_ext = strstr(path, ".app"); - if (app_ext && (app_ext[4] == '\0' || app_ext[4] == '/')) { - std::string app_bundle_path(path, app_ext + strlen(".app")); - m_flags |= eMachProcessFlagsUsingBKS; - if (BoardServiceLaunchForDebug(app_bundle_path.c_str(), argv, envp, - no_stdio, disable_aslr, event_data, - launch_err) != 0) - return m_pid; // A successful SBLaunchForDebug() returns and assigns a - // non-zero m_pid. - else - break; // We tried a BKS launch, but didn't succeed lets get out - } - } break; -#endif - -#ifdef WITH_SPRINGBOARD - case LaunchFlavor::SpringBoard: { - // .../whatever.app/whatever ? - // Or .../com.apple.whatever.app/whatever -- be careful of ".app" in - // "com.apple.whatever" here - const char *app_ext = strstr(path, ".app/"); - if (app_ext == NULL) { - // .../whatever.app ? - int len = strlen(path); - if (len > 5) { - if (strcmp(path + len - 4, ".app") == 0) { - app_ext = path + len - 4; - } - } - } - if (app_ext) { - std::string app_bundle_path(path, app_ext + strlen(".app")); - if (SBLaunchForDebug(app_bundle_path.c_str(), argv, envp, no_stdio, - disable_aslr, launch_err) != 0) - return m_pid; // A successful SBLaunchForDebug() returns and assigns a - // non-zero m_pid. - else - break; // We tried a springboard launch, but didn't succeed lets get out - } - } break; -#endif - - case LaunchFlavor::PosixSpawn: { - ::pid_t pid = LLDB_INVALID_PROCESS_ID; - - // Retrieve paths for stdin/stdout/stderr. - cpu_type_t actual_cpu_type = 0; - error = PosixSpawnChildForPTraceDebugging(resolved_path, launch_info, &pid, - &actual_cpu_type); - if (error.Success()) { - launch_info.SetProcessID(static_cast(pid)); - if (pty_master_fd) - *pty_master_fd = launch_info.GetPTY().ReleaseMasterFileDescriptor(); - } else { - // Reset any variables that might have been set during a failed launch - // attempt. - if (pty_master_fd) - *pty_master_fd = -1; - - // We're done. - return error; - } - break; - } - - default: - // Invalid launch flavor. - error.SetErrorStringWithFormat("NativeProcessDarwin::%s(): unknown " - "launch flavor %d", - __FUNCTION__, (int)*launch_flavor); - return error; - } - - if (launch_info.GetProcessID() == LLDB_INVALID_PROCESS_ID) { - // If we don't have a valid process ID and no one has set the error, then - // return a generic error. - if (error.Success()) - error.SetErrorStringWithFormat("%s(): failed to launch, no reason " - "specified", - __FUNCTION__); - } - - // We're done with the launch side of the operation. - return error; -} -} -} // namespaces diff --git a/lldb/source/Plugins/Process/Darwin/DarwinProcessLauncher.h b/lldb/source/Plugins/Process/Darwin/DarwinProcessLauncher.h deleted file mode 100644 index 0e65b56a143e0..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/DarwinProcessLauncher.h +++ /dev/null @@ -1,48 +0,0 @@ -//===-- DarwinProcessLauncher.h ---------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef DarwinProcessLauncher_h -#define DarwinProcessLauncher_h - -// C headers -#include -#include - -// C++ headers -#include - -// LLDB headers -#include "lldb/lldb-enumerations.h" -#include "lldb/lldb-forward.h" - -#include "LaunchFlavor.h" - -namespace lldb_private { -namespace darwin_process_launcher { -// ============================================================================= -/// Launches a process for debugging. -/// -/// \param[inout] launch_info -/// Specifies details about the process to launch (e.g. path, architecture, -/// etc.). On output, includes the launched ProcessID (pid). -/// -/// \param[out] pty_master_fd -/// Returns the master side of the pseudo-terminal used to communicate -/// with stdin/stdout from the launched process. May be nullptr. -/// -/// \param[out] launch_flavor -/// Contains the launch flavor used when launching the process. -// ============================================================================= -Status -LaunchInferior(ProcessLaunchInfo &launch_info, int *pty_master_fd, - lldb_private::process_darwin::LaunchFlavor *launch_flavor); - -} // darwin_process_launcher -} // lldb_private - -#endif /* DarwinProcessLauncher_h */ diff --git a/lldb/source/Plugins/Process/Darwin/LaunchFlavor.h b/lldb/source/Plugins/Process/Darwin/LaunchFlavor.h deleted file mode 100644 index cfd76d1b9c3c2..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/LaunchFlavor.h +++ /dev/null @@ -1,32 +0,0 @@ -//===-- LaunchFlavor.h ---------------------------------------- -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LaunchFlavor_h -#define LaunchFlavor_h - -namespace lldb_private { -namespace process_darwin { - -enum class LaunchFlavor { - Default = 0, - PosixSpawn = 1, - ForkExec = 2, -#ifdef WITH_SPRINGBOARD - SpringBoard = 3, -#endif -#ifdef WITH_BKS - BKS = 4, -#endif -#ifdef WITH_FBS - FBS = 5 -#endif -}; -} -} // namespaces - -#endif /* LaunchFlavor_h */ diff --git a/lldb/source/Plugins/Process/Darwin/MachException.cpp b/lldb/source/Plugins/Process/Darwin/MachException.cpp deleted file mode 100644 index 9c2025649da6f..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/MachException.cpp +++ /dev/null @@ -1,514 +0,0 @@ -//===-- MachException.cpp -------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Created by Greg Clayton on 6/18/07. -// -//===----------------------------------------------------------------------===// - -#include "MachException.h" - -// C includes -#include -#include -#include - -// C++ includes -#include - -// LLDB includes -#include "lldb/Target/UnixSignals.h" -#include "lldb/Utility/LLDBAssert.h" -#include "lldb/Utility/Log.h" -#include "lldb/Utility/Status.h" -#include "lldb/Utility/Stream.h" - -using namespace lldb; -using namespace lldb_private; -using namespace lldb_private::process_darwin; - -// Routine mach_exception_raise -extern "C" kern_return_t -catch_mach_exception_raise(mach_port_t exception_port, mach_port_t thread, - mach_port_t task, exception_type_t exception, - mach_exception_data_t code, - mach_msg_type_number_t codeCnt); - -extern "C" kern_return_t catch_mach_exception_raise_state( - mach_port_t exception_port, exception_type_t exception, - const mach_exception_data_t code, mach_msg_type_number_t codeCnt, - int *flavor, const thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt); - -// Routine mach_exception_raise_state_identity -extern "C" kern_return_t catch_mach_exception_raise_state_identity( - mach_port_t exception_port, mach_port_t thread, mach_port_t task, - exception_type_t exception, mach_exception_data_t code, - mach_msg_type_number_t codeCnt, int *flavor, thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt); - -extern "C" boolean_t mach_exc_server(mach_msg_header_t *InHeadP, - mach_msg_header_t *OutHeadP); - -static MachException::Data *g_message = NULL; - -extern "C" kern_return_t catch_mach_exception_raise_state( - mach_port_t exc_port, exception_type_t exc_type, - const mach_exception_data_t exc_data, mach_msg_type_number_t exc_data_count, - int *flavor, const thread_state_t old_state, - mach_msg_type_number_t old_stateCnt, thread_state_t new_state, - mach_msg_type_number_t *new_stateCnt) { - // TODO change to LIBLLDB_LOG_EXCEPTION - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - if (log) { - LLDB_LOGF(log, - "::%s(exc_port = 0x%4.4x, exc_type = %d (%s), " - "exc_data = 0x%llx, exc_data_count = %d)", - __FUNCTION__, exc_port, exc_type, MachException::Name(exc_type), - (uint64_t)exc_data, exc_data_count); - } - return KERN_FAILURE; -} - -extern "C" kern_return_t catch_mach_exception_raise_state_identity( - mach_port_t exc_port, mach_port_t thread_port, mach_port_t task_port, - exception_type_t exc_type, mach_exception_data_t exc_data, - mach_msg_type_number_t exc_data_count, int *flavor, - thread_state_t old_state, mach_msg_type_number_t old_stateCnt, - thread_state_t new_state, mach_msg_type_number_t *new_stateCnt) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - if (log) { - LLDB_LOGF(log, - "::%s(exc_port = 0x%4.4x, thd_port = 0x%4.4x, " - "tsk_port = 0x%4.4x, exc_type = %d (%s), exc_data[%d] = " - "{ 0x%llx, 0x%llx })", - __FUNCTION__, exc_port, thread_port, task_port, exc_type, - MachException::Name(exc_type), exc_data_count, - (uint64_t)(exc_data_count > 0 ? exc_data[0] : 0xBADDBADD), - (uint64_t)(exc_data_count > 1 ? exc_data[1] : 0xBADDBADD)); - } - - return KERN_FAILURE; -} - -extern "C" kern_return_t -catch_mach_exception_raise(mach_port_t exc_port, mach_port_t thread_port, - mach_port_t task_port, exception_type_t exc_type, - mach_exception_data_t exc_data, - mach_msg_type_number_t exc_data_count) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - if (log) { - LLDB_LOGF(log, - "::%s(exc_port = 0x%4.4x, thd_port = 0x%4.4x, " - "tsk_port = 0x%4.4x, exc_type = %d (%s), exc_data[%d] " - "= { 0x%llx, 0x%llx })", - __FUNCTION__, exc_port, thread_port, task_port, exc_type, - MachException::Name(exc_type), exc_data_count, - (uint64_t)(exc_data_count > 0 ? exc_data[0] : 0xBADDBADD), - (uint64_t)(exc_data_count > 1 ? exc_data[1] : 0xBADDBADD)); - } - - if (task_port == g_message->task_port) { - g_message->task_port = task_port; - g_message->thread_port = thread_port; - g_message->exc_type = exc_type; - g_message->exc_data.resize(exc_data_count); - ::memcpy(&g_message->exc_data[0], exc_data, - g_message->exc_data.size() * sizeof(mach_exception_data_type_t)); - return KERN_SUCCESS; - } - return KERN_FAILURE; -} - -bool MachException::Data::GetStopInfo(struct ThreadStopInfo *stop_info, - const UnixSignals &signals, - Stream &stream) const { - if (!stop_info) - return false; - - // Zero out the structure. - memset(stop_info, 0, sizeof(struct ThreadStopInfo)); - - if (exc_type == 0) { - stop_info->reason = eStopReasonInvalid; - return true; - } - - // We always stop with a mach exception. - stop_info->reason = eStopReasonException; - // Save the EXC_XXXX exception type. - stop_info->details.exception.type = exc_type; - - // Fill in a text description - const char *exc_name = MachException::Name(exc_type); - if (exc_name) - stream.Printf("%s", exc_name); - else - stream.Printf("%i", exc_type); - - stop_info->details.exception.data_count = exc_data.size(); - - int soft_signal = SoftSignal(); - if (soft_signal) { - const char *sig_str = signals.GetSignalAsCString(soft_signal); - stream.Printf(" EXC_SOFT_SIGNAL( %i ( %s ))", soft_signal, - sig_str ? sig_str : "unknown signal"); - } else { - // No special disassembly for exception data, just print it. - size_t idx; - stream.Printf(" data[%llu] = {", - (uint64_t)stop_info->details.exception.data_count); - - for (idx = 0; idx < stop_info->details.exception.data_count; ++idx) { - stream.Printf( - "0x%llx%c", (uint64_t)exc_data[idx], - ((idx + 1 == stop_info->details.exception.data_count) ? '}' : ',')); - } - } - - // Copy the exception data - for (size_t i = 0; i < stop_info->details.exception.data_count; i++) - stop_info->details.exception.data[i] = exc_data[i]; - - return true; -} - -Status MachException::Message::Receive(mach_port_t port, - mach_msg_option_t options, - mach_msg_timeout_t timeout, - mach_port_t notify_port) { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - - mach_msg_timeout_t mach_msg_timeout = - options & MACH_RCV_TIMEOUT ? timeout : 0; - if (log && ((options & MACH_RCV_TIMEOUT) == 0)) { - // Dump this log message if we have no timeout in case it never returns - LLDB_LOGF(log, - "::mach_msg(msg->{bits = %#x, size = %u remote_port = %#x, " - "local_port = %#x, reserved = 0x%x, id = 0x%x}, " - "option = %#x, send_size = 0, rcv_size = %llu, " - "rcv_name = %#x, timeout = %u, notify = %#x)", - exc_msg.hdr.msgh_bits, exc_msg.hdr.msgh_size, - exc_msg.hdr.msgh_remote_port, exc_msg.hdr.msgh_local_port, - exc_msg.hdr.msgh_reserved, exc_msg.hdr.msgh_id, options, - (uint64_t)sizeof(exc_msg.data), port, mach_msg_timeout, - notify_port); - } - - mach_msg_return_t mach_err = - ::mach_msg(&exc_msg.hdr, - options, // options - 0, // Send size - sizeof(exc_msg.data), // Receive size - port, // exception port to watch for - // exception on - mach_msg_timeout, // timeout in msec (obeyed only - // if MACH_RCV_TIMEOUT is ORed - // into the options parameter) - notify_port); - error.SetError(mach_err, eErrorTypeMachKernel); - - // Dump any errors we get - if (error.Fail() && log) { - LLDB_LOGF(log, - "::mach_msg(msg->{bits = %#x, size = %u remote_port = %#x, " - "local_port = %#x, reserved = 0x%x, id = 0x%x}, " - "option = %#x, send_size = %u, rcv_size = %lu, rcv_name " - "= %#x, timeout = %u, notify = %#x) failed: %s", - exc_msg.hdr.msgh_bits, exc_msg.hdr.msgh_size, - exc_msg.hdr.msgh_remote_port, exc_msg.hdr.msgh_local_port, - exc_msg.hdr.msgh_reserved, exc_msg.hdr.msgh_id, options, 0, - sizeof(exc_msg.data), port, mach_msg_timeout, notify_port, - error.AsCString()); - } - return error; -} - -void MachException::Message::Dump(Stream &stream) const { - stream.Printf(" exc_msg { bits = 0x%8.8x size = 0x%8.8x remote-port = " - "0x%8.8x local-port = 0x%8.8x reserved = 0x%8.8x id = " - "0x%8.8x }\n", - exc_msg.hdr.msgh_bits, exc_msg.hdr.msgh_size, - exc_msg.hdr.msgh_remote_port, exc_msg.hdr.msgh_local_port, - exc_msg.hdr.msgh_reserved, exc_msg.hdr.msgh_id); - - stream.Printf(" reply_msg { bits = 0x%8.8x size = 0x%8.8x remote-port = " - "0x%8.8x local-port = 0x%8.8x reserved = 0x%8.8x id = " - "0x%8.8x }", - reply_msg.hdr.msgh_bits, reply_msg.hdr.msgh_size, - reply_msg.hdr.msgh_remote_port, reply_msg.hdr.msgh_local_port, - reply_msg.hdr.msgh_reserved, reply_msg.hdr.msgh_id); -} - -bool MachException::Message::CatchExceptionRaise(task_t task) { - bool success = false; - state.task_port = task; - g_message = &state; - // The exc_server function is the MIG generated server handling function to - // handle messages from the kernel relating to the occurrence of an exception - // in a thread. Such messages are delivered to the exception port set via - // thread_set_exception_ports or task_set_exception_ports. When an exception - // occurs in a thread, the thread sends an exception message to its exception - // port, blocking in the kernel waiting for the receipt of a reply. The - // exc_server function performs all necessary argument handling for this - // kernel message and calls catch_exception_raise, - // catch_exception_raise_state or catch_exception_raise_state_identity, which - // should handle the exception. If the called routine returns KERN_SUCCESS, a - // reply message will be sent, allowing the thread to continue from the point - // of the exception; otherwise, no reply message is sent and the called - // routine must have dealt with the exception thread directly. - if (mach_exc_server(&exc_msg.hdr, &reply_msg.hdr)) { - success = true; - } else { - Log *log( - GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - LLDB_LOGF(log, - "MachException::Message::%s(): mach_exc_server " - "returned zero...", - __FUNCTION__); - } - g_message = NULL; - return success; -} - -Status MachException::Message::Reply(::pid_t inferior_pid, task_t inferior_task, - int signal) { - // Reply to the exception... - Status error; - - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - - // If we had a soft signal, we need to update the thread first so it can - // continue without signaling - int soft_signal = state.SoftSignal(); - if (soft_signal) { - int state_pid = -1; - if (inferior_task == state.task_port) { - // This is our task, so we can update the signal to send to it - state_pid = inferior_pid; - soft_signal = signal; - } else { - auto mach_err = ::pid_for_task(state.task_port, &state_pid); - if (mach_err) { - error.SetError(mach_err, eErrorTypeMachKernel); - LLDB_LOGF(log, - "MachException::Message::%s(): pid_for_task() " - "failed: %s", - __FUNCTION__, error.AsCString()); - return error; - } - } - - lldbassert(state_pid != -1); - if (state_pid != -1) { - errno = 0; - caddr_t thread_port_caddr = (caddr_t)(uintptr_t)state.thread_port; - if (::ptrace(PT_THUPDATE, state_pid, thread_port_caddr, soft_signal) != 0) - error.SetError(errno, eErrorTypePOSIX); - - if (!error.Success()) { - LLDB_LOGF(log, - "::ptrace(request = PT_THUPDATE, pid = " - "0x%4.4x, tid = 0x%4.4x, signal = %i)", - state_pid, state.thread_port, soft_signal); - return error; - } - } - } - - LLDB_LOGF(log, - "::mach_msg ( msg->{bits = %#x, size = %u, remote_port " - "= %#x, local_port = %#x, reserved = 0x%x, id = 0x%x}, " - "option = %#x, send_size = %u, rcv_size = %u, rcv_name " - "= %#x, timeout = %u, notify = %#x)", - reply_msg.hdr.msgh_bits, reply_msg.hdr.msgh_size, - reply_msg.hdr.msgh_remote_port, reply_msg.hdr.msgh_local_port, - reply_msg.hdr.msgh_reserved, reply_msg.hdr.msgh_id, - MACH_SEND_MSG | MACH_SEND_INTERRUPT, reply_msg.hdr.msgh_size, 0, - MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); - - auto mach_err = - ::mach_msg(&reply_msg.hdr, MACH_SEND_MSG | MACH_SEND_INTERRUPT, - reply_msg.hdr.msgh_size, 0, MACH_PORT_NULL, - MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); - if (mach_err) - error.SetError(mach_err, eErrorTypeMachKernel); - - // Log our error if we have one. - if (error.Fail() && log) { - if (error.GetError() == MACH_SEND_INTERRUPTED) { - log->PutCString("::mach_msg() - send interrupted"); - // TODO: keep retrying to reply??? - } else if (state.task_port == inferior_task) { - LLDB_LOGF(log, - "mach_msg(): returned an error when replying " - "to a mach exception: error = %u (%s)", - error.GetError(), error.AsCString()); - } else { - LLDB_LOGF(log, "::mach_msg() - failed (child of task): %u (%s)", - error.GetError(), error.AsCString()); - } - } - - return error; -} - -#define PREV_EXC_MASK_ALL \ - (EXC_MASK_BAD_ACCESS | EXC_MASK_BAD_INSTRUCTION | EXC_MASK_ARITHMETIC | \ - EXC_MASK_EMULATION | EXC_MASK_SOFTWARE | EXC_MASK_BREAKPOINT | \ - EXC_MASK_SYSCALL | EXC_MASK_MACH_SYSCALL | EXC_MASK_RPC_ALERT | \ - EXC_MASK_MACHINE) - -// Don't listen for EXC_RESOURCE, it should really get handled by the system -// handler. - -#ifndef EXC_RESOURCE -#define EXC_RESOURCE 11 -#endif - -#ifndef EXC_MASK_RESOURCE -#define EXC_MASK_RESOURCE (1 << EXC_RESOURCE) -#endif - -#define LLDB_EXC_MASK (EXC_MASK_ALL & ~EXC_MASK_RESOURCE) - -Status MachException::PortInfo::Save(task_t task) { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - - LLDB_LOGF(log, "MachException::PortInfo::%s(task = 0x%4.4x)", __FUNCTION__, - task); - - // Be careful to be able to have debugserver built on a newer OS than what it - // is currently running on by being able to start with all exceptions and - // back off to just what is supported on the current system - mask = LLDB_EXC_MASK; - - count = (sizeof(ports) / sizeof(ports[0])); - auto mach_err = ::task_get_exception_ports(task, mask, masks, &count, ports, - behaviors, flavors); - if (mach_err) - error.SetError(mach_err, eErrorTypeMachKernel); - - if (log) { - if (error.Success()) { - LLDB_LOGF(log, - "::task_get_exception_ports(task = 0x%4.4x, mask = " - "0x%x, maskCnt => %u, ports, behaviors, flavors)", - task, mask, count); - } else { - LLDB_LOGF(log, - "::task_get_exception_ports(task = 0x%4.4x, mask = 0x%x, " - "maskCnt => %u, ports, behaviors, flavors) error: %u (%s)", - task, mask, count, error.GetError(), error.AsCString()); - } - } - - if ((error.GetError() == KERN_INVALID_ARGUMENT) && - (mask != PREV_EXC_MASK_ALL)) { - mask = PREV_EXC_MASK_ALL; - count = (sizeof(ports) / sizeof(ports[0])); - mach_err = ::task_get_exception_ports(task, mask, masks, &count, ports, - behaviors, flavors); - error.SetError(mach_err, eErrorTypeMachKernel); - if (log) { - if (error.Success()) { - LLDB_LOGF(log, - "::task_get_exception_ports(task = 0x%4.4x, " - "mask = 0x%x, maskCnt => %u, ports, behaviors, " - "flavors)", - task, mask, count); - } else { - LLDB_LOGF(log, - "::task_get_exception_ports(task = 0x%4.4x, mask = " - "0x%x, maskCnt => %u, ports, behaviors, flavors) " - "error: %u (%s)", - task, mask, count, error.GetError(), error.AsCString()); - } - } - } - if (error.Fail()) { - mask = 0; - count = 0; - } - return error; -} - -Status MachException::PortInfo::Restore(task_t task) { - Status error; - - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - - LLDB_LOGF(log, "MachException::PortInfo::Restore(task = 0x%4.4x)", task); - - uint32_t i = 0; - if (count > 0) { - for (i = 0; i < count; i++) { - auto mach_err = ::task_set_exception_ports(task, masks[i], ports[i], - behaviors[i], flavors[i]); - if (mach_err) - error.SetError(mach_err, eErrorTypeMachKernel); - if (log) { - if (error.Success()) { - LLDB_LOGF(log, - "::task_set_exception_ports(task = 0x%4.4x, " - "exception_mask = 0x%8.8x, new_port = 0x%4.4x, " - "behavior = 0x%8.8x, new_flavor = 0x%8.8x)", - task, masks[i], ports[i], behaviors[i], flavors[i]); - } else { - LLDB_LOGF(log, - "::task_set_exception_ports(task = 0x%4.4x, " - "exception_mask = 0x%8.8x, new_port = 0x%4.4x, " - "behavior = 0x%8.8x, new_flavor = 0x%8.8x): " - "error %u (%s)", - task, masks[i], ports[i], behaviors[i], flavors[i], - error.GetError(), error.AsCString()); - } - } - - // Bail if we encounter any errors - if (error.Fail()) - break; - } - } - - count = 0; - return error; -} - -const char *MachException::Name(exception_type_t exc_type) { - switch (exc_type) { - case EXC_BAD_ACCESS: - return "EXC_BAD_ACCESS"; - case EXC_BAD_INSTRUCTION: - return "EXC_BAD_INSTRUCTION"; - case EXC_ARITHMETIC: - return "EXC_ARITHMETIC"; - case EXC_EMULATION: - return "EXC_EMULATION"; - case EXC_SOFTWARE: - return "EXC_SOFTWARE"; - case EXC_BREAKPOINT: - return "EXC_BREAKPOINT"; - case EXC_SYSCALL: - return "EXC_SYSCALL"; - case EXC_MACH_SYSCALL: - return "EXC_MACH_SYSCALL"; - case EXC_RPC_ALERT: - return "EXC_RPC_ALERT"; -#ifdef EXC_CRASH - case EXC_CRASH: - return "EXC_CRASH"; -#endif - default: - break; - } - return NULL; -} diff --git a/lldb/source/Plugins/Process/Darwin/MachException.h b/lldb/source/Plugins/Process/Darwin/MachException.h deleted file mode 100644 index 18e49173b0200..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/MachException.h +++ /dev/null @@ -1,139 +0,0 @@ -//===-- MachException.h -----------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Created by Greg Clayton on 6/18/07. -// -//===----------------------------------------------------------------------===// - -#ifndef __MachException_h__ -#define __MachException_h__ - -#include -#include - -#include "lldb/Host/Debug.h" -#include "lldb/lldb-private-forward.h" -#include "lldb/lldb-types.h" - -namespace lldb_private { -namespace process_darwin { - -typedef union MachMessageTag { - mach_msg_header_t hdr; - char data[1024]; -} MachMessage; - -class MachException { -public: - struct PortInfo { - exception_mask_t mask; // the exception mask for this device which may be a - // subset of EXC_MASK_ALL... - exception_mask_t masks[EXC_TYPES_COUNT]; - mach_port_t ports[EXC_TYPES_COUNT]; - exception_behavior_t behaviors[EXC_TYPES_COUNT]; - thread_state_flavor_t flavors[EXC_TYPES_COUNT]; - mach_msg_type_number_t count; - - Status Save(task_t task); - - Status Restore(task_t task); - }; - - struct Data { - task_t task_port; - thread_t thread_port; - exception_type_t exc_type; - std::vector exc_data; - Data() - : task_port(TASK_NULL), thread_port(THREAD_NULL), exc_type(0), - exc_data() {} - - void Clear() { - task_port = TASK_NULL; - thread_port = THREAD_NULL; - exc_type = 0; - exc_data.clear(); - } - - bool IsValid() const { - return task_port != TASK_NULL && thread_port != THREAD_NULL && - exc_type != 0; - } - - // Return the SoftSignal for this MachException data, or zero if there is - // none - int SoftSignal() const { - if (exc_type == EXC_SOFTWARE && exc_data.size() == 2 && - exc_data[0] == EXC_SOFT_SIGNAL) - return static_cast(exc_data[1]); - return 0; - } - - bool IsBreakpoint() const { - return (exc_type == EXC_BREAKPOINT || - ((exc_type == EXC_SOFTWARE) && exc_data[0] == 1)); - } - - bool GetStopInfo(ThreadStopInfo *stop_info, const UnixSignals &signals, - Stream &stream) const; - }; - - struct Message { - MachMessage exc_msg; - MachMessage reply_msg; - Data state; - - Message() : state() { - memset(&exc_msg, 0, sizeof(exc_msg)); - memset(&reply_msg, 0, sizeof(reply_msg)); - } - - bool CatchExceptionRaise(task_t task); - - Status Reply(::pid_t inferior_pid, task_t inferior_task, int signal); - - Status Receive(mach_port_t receive_port, mach_msg_option_t options, - mach_msg_timeout_t timeout, - mach_port_t notify_port = MACH_PORT_NULL); - - void Dump(Stream &stream) const; - - typedef std::vector collection; - typedef collection::iterator iterator; - typedef collection::const_iterator const_iterator; - }; - - enum { - e_actionForward, // Forward signal to inferior process - e_actionStop, // Stop when this signal is received - }; - struct Action { - task_t task_port; // Set to TASK_NULL for any TASK - thread_t thread_port; // Set to THREAD_NULL for any thread - exception_type_t exc_mask; // Mach exception mask to watch for - std::vector exc_data_mask; // Mask to apply to - // exception data, or - // empty to ignore - // exc_data value for - // exception - std::vector exc_data_value; // Value to compare - // to exception data - // after masking, or - // empty to ignore - // exc_data value - // for exception - uint8_t flags; // Action flags describing what to do with the exception - }; - - static const char *Name(exception_type_t exc_type); -}; - -} // namespace process_darwin -} // namespace lldb_private - -#endif diff --git a/lldb/source/Plugins/Process/Darwin/NativeProcessDarwin.cpp b/lldb/source/Plugins/Process/Darwin/NativeProcessDarwin.cpp deleted file mode 100644 index 7b35f2e88507a..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/NativeProcessDarwin.cpp +++ /dev/null @@ -1,1535 +0,0 @@ -//===-- NativeProcessDarwin.cpp -------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "NativeProcessDarwin.h" - -// C includes -#include -#include -#include -#include -#include -#include - -// C++ includes -// LLDB includes -#include "lldb/Host/PseudoTerminal.h" -#include "lldb/Target/ProcessLaunchInfo.h" -#include "lldb/Utility/Log.h" -#include "lldb/Utility/State.h" -#include "lldb/Utility/StreamString.h" - -#include "CFBundle.h" -#include "CFString.h" -#include "DarwinProcessLauncher.h" - -#include "MachException.h" - -#include "llvm/Support/FileSystem.h" - -using namespace lldb; -using namespace lldb_private; -using namespace lldb_private::process_darwin; -using namespace lldb_private::darwin_process_launcher; - -// Hidden Impl - -namespace { -struct hack_task_dyld_info { - mach_vm_address_t all_image_info_addr; - mach_vm_size_t all_image_info_size; -}; -} - -// Public Static Methods - -Status NativeProcessProtocol::Launch( - ProcessLaunchInfo &launch_info, - NativeProcessProtocol::NativeDelegate &native_delegate, MainLoop &mainloop, - NativeProcessProtocolSP &native_process_sp) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - Status error; - - // Verify the working directory is valid if one was specified. - FileSpec working_dir(launch_info.GetWorkingDirectory()); - if (working_dir) { - FileInstance::Instance().Resolve(working_dir); - if (!FileSystem::Instance().IsDirectory(working_dir)) { - error.SetErrorStringWithFormat("No such file or directory: %s", - working_dir.GetCString()); - return error; - } - } - - // Launch the inferior. - int pty_master_fd = -1; - LaunchFlavor launch_flavor = LaunchFlavor::Default; - - error = LaunchInferior(launch_info, &pty_master_fd, &launch_flavor); - - // Handle launch failure. - if (!error.Success()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s() failed to launch process: " - "%s", - __FUNCTION__, error.AsCString()); - return error; - } - - // Handle failure to return a pid. - if (launch_info.GetProcessID() == LLDB_INVALID_PROCESS_ID) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s() launch succeeded but no " - "pid was returned! Aborting.", - __FUNCTION__); - return error; - } - - // Create the Darwin native process impl. - std::shared_ptr np_darwin_sp( - new NativeProcessDarwin(launch_info.GetProcessID(), pty_master_fd)); - if (!np_darwin_sp->RegisterNativeDelegate(native_delegate)) { - native_process_sp.reset(); - error.SetErrorStringWithFormat("failed to register the native delegate"); - return error; - } - - // Finalize the processing needed to debug the launched process with a - // NativeProcessDarwin instance. - error = np_darwin_sp->FinalizeLaunch(launch_flavor, mainloop); - if (!error.Success()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s() aborting, failed to finalize" - " the launching of the process: %s", - __FUNCTION__, error.AsCString()); - return error; - } - - // Return the process and process id to the caller through the launch args. - native_process_sp = np_darwin_sp; - return error; -} - -Status NativeProcessProtocol::Attach( - lldb::pid_t pid, NativeProcessProtocol::NativeDelegate &native_delegate, - MainLoop &mainloop, NativeProcessProtocolSP &native_process_sp) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - LLDB_LOGF(log, "NativeProcessDarwin::%s(pid = %" PRIi64 ")", __FUNCTION__, - pid); - - // Retrieve the architecture for the running process. - ArchSpec process_arch; - Status error = ResolveProcessArchitecture(pid, process_arch); - if (!error.Success()) - return error; - - // TODO get attach to return this value. - const int pty_master_fd = -1; - std::shared_ptr native_process_darwin_sp( - new NativeProcessDarwin(pid, pty_master_fd)); - - if (!native_process_darwin_sp->RegisterNativeDelegate(native_delegate)) { - error.SetErrorStringWithFormat("failed to register the native " - "delegate"); - return error; - } - - native_process_darwin_sp->AttachToInferior(mainloop, pid, error); - if (!error.Success()) - return error; - - native_process_sp = native_process_darwin_sp; - return error; -} - -// ctor/dtor - -NativeProcessDarwin::NativeProcessDarwin(lldb::pid_t pid, int pty_master_fd) - : NativeProcessProtocol(pid), m_task(TASK_NULL), m_did_exec(false), - m_cpu_type(0), m_exception_port(MACH_PORT_NULL), m_exc_port_info(), - m_exception_thread(nullptr), m_exception_messages_mutex(), - m_sent_interrupt_signo(0), m_auto_resume_signo(0), m_thread_list(), - m_thread_actions(), m_waitpid_pipe(), m_waitpid_thread(nullptr), - m_waitpid_reader_handle() { - // TODO add this to the NativeProcessProtocol constructor. - m_terminal_fd = pty_master_fd; -} - -NativeProcessDarwin::~NativeProcessDarwin() {} - -// Instance methods - -Status NativeProcessDarwin::FinalizeLaunch(LaunchFlavor launch_flavor, - MainLoop &main_loop) { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - error = StartExceptionThread(); - if (!error.Success()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): failure starting the " - "mach exception port monitor thread: %s", - __FUNCTION__, error.AsCString()); - - // Terminate the inferior process. There's nothing meaningful we can do if - // we can't receive signals and exceptions. Since we launched the process, - // it's fair game for us to kill it. - ::ptrace(PT_KILL, m_pid, 0, 0); - SetState(eStateExited); - - return error; - } - - StartSTDIOThread(); - - if (launch_flavor == LaunchFlavor::PosixSpawn) { - SetState(eStateAttaching); - errno = 0; - int err = ::ptrace(PT_ATTACHEXC, m_pid, 0, 0); - if (err == 0) { - // m_flags |= eMachProcessFlagsAttached; - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): successfully spawned " - "process with pid %" PRIu64, - __FUNCTION__, m_pid); - } else { - error.SetErrorToErrno(); - SetState(eStateExited); - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): error: failed to " - "attach to spawned pid %" PRIu64 " (error=%d (%s))", - __FUNCTION__, m_pid, (int)error.GetError(), error.AsCString()); - return error; - } - } - - LLDB_LOGF(log, "NativeProcessDarwin::%s(): new pid is %" PRIu64 "...", - __FUNCTION__, m_pid); - - // Spawn a thread to reap our child inferior process... - error = StartWaitpidThread(main_loop); - if (error.Fail()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): failed to start waitpid() " - "thread: %s", - __FUNCTION__, error.AsCString()); - kill(SIGKILL, static_cast<::pid_t>(m_pid)); - return error; - } - - if (TaskPortForProcessID(error) == TASK_NULL) { - // We failed to get the task for our process ID which is bad. Kill our - // process; otherwise, it will be stopped at the entry point and get - // reparented to someone else and never go away. - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): could not get task port " - "for process, sending SIGKILL and exiting: %s", - __FUNCTION__, error.AsCString()); - kill(SIGKILL, static_cast<::pid_t>(m_pid)); - return error; - } - - // Indicate that we're stopped, as we always launch suspended. - SetState(eStateStopped); - - // Success. - return error; -} - -Status NativeProcessDarwin::SaveExceptionPortInfo() { - return m_exc_port_info.Save(m_task); -} - -bool NativeProcessDarwin::ProcessUsingSpringBoard() const { - // TODO implement flags - // return (m_flags & eMachProcessFlagsUsingSBS) != 0; - return false; -} - -bool NativeProcessDarwin::ProcessUsingBackBoard() const { - // TODO implement flags - // return (m_flags & eMachProcessFlagsUsingBKS) != 0; - return false; -} - -// Called by the exception thread when an exception has been received from our -// process. The exception message is completely filled and the exception data -// has already been copied. -void NativeProcessDarwin::ExceptionMessageReceived( - const MachException::Message &message) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - - std::lock_guard locker(m_exception_messages_mutex); - if (m_exception_messages.empty()) { - // Suspend the task the moment we receive our first exception message. - SuspendTask(); - } - - // Use a locker to automatically unlock our mutex in case of exceptions Add - // the exception to our internal exception stack - m_exception_messages.push_back(message); - - LLDB_LOGF(log, "NativeProcessDarwin::%s(): new queued message count: %lu", - __FUNCTION__, m_exception_messages.size()); -} - -void *NativeProcessDarwin::ExceptionThread(void *arg) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - if (!arg) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): cannot run mach exception " - "thread, mandatory process arg was null", - __FUNCTION__); - return nullptr; - } - - return reinterpret_cast(arg)->DoExceptionThread(); -} - -void *NativeProcessDarwin::DoExceptionThread() { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - - LLDB_LOGF(log, "NativeProcessDarwin::%s(arg=%p) starting thread...", - __FUNCTION__, this); - - pthread_setname_np("exception monitoring thread"); - - // Ensure we don't get CPU starved. - MaybeRaiseThreadPriority(); - - // We keep a count of the number of consecutive exceptions received so we - // know to grab all exceptions without a timeout. We do this to get a bunch - // of related exceptions on our exception port so we can process then - // together. When we have multiple threads, we can get an exception per - // thread and they will come in consecutively. The main loop in this thread - // can stop periodically if needed to service things related to this process. - // - // [did we lose some words here?] - // - // flag set in the options, so we will wait forever for an exception on - // 0 our exception port. After we get one exception, we then will use the - // MACH_RCV_TIMEOUT option with a zero timeout to grab all other current - // exceptions for our process. After we have received the last pending - // exception, we will get a timeout which enables us to then notify our main - // thread that we have an exception bundle available. We then wait for the - // main thread to tell this exception thread to start trying to get - // exceptions messages again and we start again with a mach_msg read with - // infinite timeout. - // - // We choose to park a thread on this, rather than polling, because the - // polling is expensive. On devices, we need to minimize overhead caused by - // the process monitor. - uint32_t num_exceptions_received = 0; - Status error; - task_t task = m_task; - mach_msg_timeout_t periodic_timeout = 0; - -#if defined(WITH_SPRINGBOARD) && !defined(WITH_BKS) - mach_msg_timeout_t watchdog_elapsed = 0; - mach_msg_timeout_t watchdog_timeout = 60 * 1000; - ::pid_t pid = (::pid_t)process->GetID(); - CFReleaser watchdog; - - if (process->ProcessUsingSpringBoard()) { - // Request a renewal for every 60 seconds if we attached using SpringBoard. - watchdog.reset(::SBSWatchdogAssertionCreateForPID(nullptr, pid, 60)); - LLDB_LOGF(log, - "::SBSWatchdogAssertionCreateForPID(NULL, %4.4x, 60) " - "=> %p", - pid, watchdog.get()); - - if (watchdog.get()) { - ::SBSWatchdogAssertionRenew(watchdog.get()); - - CFTimeInterval watchdogRenewalInterval = - ::SBSWatchdogAssertionGetRenewalInterval(watchdog.get()); - LLDB_LOGF(log, - "::SBSWatchdogAssertionGetRenewalInterval(%p) => " - "%g seconds", - watchdog.get(), watchdogRenewalInterval); - if (watchdogRenewalInterval > 0.0) { - watchdog_timeout = (mach_msg_timeout_t)watchdogRenewalInterval * 1000; - if (watchdog_timeout > 3000) { - // Give us a second to renew our timeout. - watchdog_timeout -= 1000; - } else if (watchdog_timeout > 1000) { - // Give us a quarter of a second to renew our timeout. - watchdog_timeout -= 250; - } - } - } - if (periodic_timeout == 0 || periodic_timeout > watchdog_timeout) - periodic_timeout = watchdog_timeout; - } -#endif // #if defined (WITH_SPRINGBOARD) && !defined (WITH_BKS) - -#ifdef WITH_BKS - CFReleaser watchdog; - if (process->ProcessUsingBackBoard()) { - ::pid_t pid = process->GetID(); - CFAllocatorRef alloc = kCFAllocatorDefault; - watchdog.reset(::BKSWatchdogAssertionCreateForPID(alloc, pid)); - } -#endif // #ifdef WITH_BKS - - // Do we want to use a weak pointer to the NativeProcessDarwin here, in which - // case we can guarantee we don't whack the process monitor if we race - // between this thread and the main one on shutdown? - while (IsExceptionPortValid()) { - ::pthread_testcancel(); - - MachException::Message exception_message; - - if (num_exceptions_received > 0) { - // We don't want a timeout here, just receive as many exceptions as we - // can since we already have one. We want to get all currently available - // exceptions for this task at once. - error = exception_message.Receive( - GetExceptionPort(), - MACH_RCV_MSG | MACH_RCV_INTERRUPT | MACH_RCV_TIMEOUT, 0); - } else if (periodic_timeout > 0) { - // We need to stop periodically in this loop, so try and get a mach - // message with a valid timeout (ms). - error = exception_message.Receive(GetExceptionPort(), - MACH_RCV_MSG | MACH_RCV_INTERRUPT | - MACH_RCV_TIMEOUT, - periodic_timeout); - } else { - // We don't need to parse all current exceptions or stop periodically, - // just wait for an exception forever. - error = exception_message.Receive(GetExceptionPort(), - MACH_RCV_MSG | MACH_RCV_INTERRUPT, 0); - } - - if (error.Success()) { - // We successfully received an exception. - if (exception_message.CatchExceptionRaise(task)) { - ++num_exceptions_received; - ExceptionMessageReceived(exception_message); - } - } else { - if (error.GetError() == MACH_RCV_INTERRUPTED) { - // We were interrupted. - - // If we have no task port we should exit this thread, as it implies - // the inferior went down. - if (!IsExceptionPortValid()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): the inferior " - "exception port is no longer valid, " - "canceling exception thread...", - __FUNCTION__); - // Should we be setting a process state here? - break; - } - - // Make sure the inferior task is still valid. - if (IsTaskValid()) { - // Task is still ok. - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): interrupted, but " - "the inferior task iss till valid, " - "continuing...", - __FUNCTION__); - continue; - } else { - // The inferior task is no longer valid. Time to exit as the process - // has gone away. - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): the inferior task " - "has exited, and so will we...", - __FUNCTION__); - // Does this race at all with our waitpid()? - SetState(eStateExited); - break; - } - } else if (error.GetError() == MACH_RCV_TIMED_OUT) { - // We timed out when waiting for exceptions. - - if (num_exceptions_received > 0) { - // We were receiving all current exceptions with a timeout of zero. - // It is time to go back to our normal looping mode. - num_exceptions_received = 0; - - // Notify our main thread we have a complete exception message bundle - // available. Get the possibly updated task port back from the - // process in case we exec'ed and our task port changed. - task = ExceptionMessageBundleComplete(); - - // In case we use a timeout value when getting exceptions, make sure - // our task is still valid. - if (IsTaskValid(task)) { - // Task is still ok. - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): got a timeout, " - "continuing...", - __FUNCTION__); - continue; - } else { - // The inferior task is no longer valid. Time to exit as the - // process has gone away. - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): the inferior " - "task has exited, and so will we...", - __FUNCTION__); - // Does this race at all with our waitpid()? - SetState(eStateExited); - break; - } - } - -#if defined(WITH_SPRINGBOARD) && !defined(WITH_BKS) - if (watchdog.get()) { - watchdog_elapsed += periodic_timeout; - if (watchdog_elapsed >= watchdog_timeout) { - LLDB_LOGF(log, "SBSWatchdogAssertionRenew(%p)", watchdog.get()); - ::SBSWatchdogAssertionRenew(watchdog.get()); - watchdog_elapsed = 0; - } - } -#endif - } else { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): continuing after " - "receiving an unexpected error: %u (%s)", - __FUNCTION__, error.GetError(), error.AsCString()); - // TODO: notify of error? - } - } - } - -#if defined(WITH_SPRINGBOARD) && !defined(WITH_BKS) - if (watchdog.get()) { - // TODO: change SBSWatchdogAssertionRelease to SBSWatchdogAssertionCancel - // when we - // all are up and running on systems that support it. The SBS framework has - // a #define that will forward SBSWatchdogAssertionRelease to - // SBSWatchdogAssertionCancel for now so it should still build either way. - DNBLogThreadedIf(LOG_TASK, "::SBSWatchdogAssertionRelease(%p)", - watchdog.get()); - ::SBSWatchdogAssertionRelease(watchdog.get()); - } -#endif // #if defined (WITH_SPRINGBOARD) && !defined (WITH_BKS) - - LLDB_LOGF(log, "NativeProcessDarwin::%s(%p): thread exiting...", __FUNCTION__, - this); - return nullptr; -} - -Status NativeProcessDarwin::StartExceptionThread() { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - LLDB_LOGF(log, "NativeProcessDarwin::%s() called", __FUNCTION__); - - // Make sure we've looked up the inferior port. - TaskPortForProcessID(error); - - // Ensure the inferior task is valid. - if (!IsTaskValid()) { - error.SetErrorStringWithFormat("cannot start exception thread: " - "task 0x%4.4x is not valid", - m_task); - return error; - } - - // Get the mach port for the process monitor. - mach_port_t task_self = mach_task_self(); - - // Allocate an exception port that we will use to track our child process - auto mach_err = ::mach_port_allocate(task_self, MACH_PORT_RIGHT_RECEIVE, - &m_exception_port); - error.SetError(mach_err, eErrorTypeMachKernel); - if (error.Fail()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): mach_port_allocate(" - "task_self=0x%4.4x, MACH_PORT_RIGHT_RECEIVE, " - "&m_exception_port) failed: %u (%s)", - __FUNCTION__, task_self, error.GetError(), error.AsCString()); - return error; - } - - // Add the ability to send messages on the new exception port - mach_err = ::mach_port_insert_right( - task_self, m_exception_port, m_exception_port, MACH_MSG_TYPE_MAKE_SEND); - error.SetError(mach_err, eErrorTypeMachKernel); - if (error.Fail()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): mach_port_insert_right(" - "task_self=0x%4.4x, m_exception_port=0x%4.4x, " - "m_exception_port=0x%4.4x, MACH_MSG_TYPE_MAKE_SEND) " - "failed: %u (%s)", - __FUNCTION__, task_self, m_exception_port, m_exception_port, - error.GetError(), error.AsCString()); - return error; - } - - // Save the original state of the exception ports for our child process. - error = SaveExceptionPortInfo(); - if (error.Fail() || (m_exc_port_info.mask == 0)) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): SaveExceptionPortInfo() " - "failed, cannot install exception handler: %s", - __FUNCTION__, error.AsCString()); - return error; - } - - // Set the ability to get all exceptions on this port. - mach_err = ::task_set_exception_ports( - m_task, m_exc_port_info.mask, m_exception_port, - EXCEPTION_DEFAULT | MACH_EXCEPTION_CODES, THREAD_STATE_NONE); - error.SetError(mach_err, eErrorTypeMachKernel); - if (error.Fail()) { - LLDB_LOGF(log, - "::task_set_exception_ports (task = 0x%4.4x, " - "exception_mask = 0x%8.8x, new_port = 0x%4.4x, " - "behavior = 0x%8.8x, new_flavor = 0x%8.8x) failed: " - "%u (%s)", - m_task, m_exc_port_info.mask, m_exception_port, - (EXCEPTION_DEFAULT | MACH_EXCEPTION_CODES), THREAD_STATE_NONE, - error.GetError(), error.AsCString()); - return error; - } - - // Create the exception thread. - auto pthread_err = - ::pthread_create(&m_exception_thread, nullptr, ExceptionThread, this); - error.SetError(pthread_err, eErrorTypePOSIX); - if (error.Fail()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): failed to create Mach " - "exception-handling thread: %u (%s)", - __FUNCTION__, error.GetError(), error.AsCString()); - } - - return error; -} - -lldb::addr_t -NativeProcessDarwin::GetDYLDAllImageInfosAddress(Status &error) const { - error.Clear(); - - struct hack_task_dyld_info dyld_info; - mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT; - // Make sure that COUNT isn't bigger than our hacked up struct - // hack_task_dyld_info. If it is, then make COUNT smaller to match. - if (count > (sizeof(struct hack_task_dyld_info) / sizeof(natural_t))) { - count = (sizeof(struct hack_task_dyld_info) / sizeof(natural_t)); - } - - TaskPortForProcessID(error); - if (error.Fail()) - return LLDB_INVALID_ADDRESS; - - auto mach_err = - ::task_info(m_task, TASK_DYLD_INFO, (task_info_t)&dyld_info, &count); - error.SetError(mach_err, eErrorTypeMachKernel); - if (error.Success()) { - // We now have the address of the all image infos structure. - return dyld_info.all_image_info_addr; - } - - // We don't have it. - return LLDB_INVALID_ADDRESS; -} - -uint32_t NativeProcessDarwin::GetCPUTypeForLocalProcess(::pid_t pid) { - int mib[CTL_MAXNAME] = { - 0, - }; - size_t len = CTL_MAXNAME; - - if (::sysctlnametomib("sysctl.proc_cputype", mib, &len)) - return 0; - - mib[len] = pid; - len++; - - cpu_type_t cpu; - size_t cpu_len = sizeof(cpu); - if (::sysctl(mib, static_cast(len), &cpu, &cpu_len, 0, 0)) - cpu = 0; - return cpu; -} - -uint32_t NativeProcessDarwin::GetCPUType() const { - if (m_cpu_type == 0 && m_pid != 0) - m_cpu_type = GetCPUTypeForLocalProcess(m_pid); - return m_cpu_type; -} - -task_t NativeProcessDarwin::ExceptionMessageBundleComplete() { - // We have a complete bundle of exceptions for our child process. - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - - std::lock_guard locker(m_exception_messages_mutex); - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): processing %lu exception " - "messages.", - __FUNCTION__, m_exception_messages.size()); - - if (m_exception_messages.empty()) { - // Not particularly useful... - return m_task; - } - - bool auto_resume = false; - m_did_exec = false; - - // First check for any SIGTRAP and make sure we didn't exec - const task_t task = m_task; - size_t i; - if (m_pid != 0) { - bool received_interrupt = false; - uint32_t num_task_exceptions = 0; - for (i = 0; i < m_exception_messages.size(); ++i) { - if (m_exception_messages[i].state.task_port != task) { - // This is an exception that is not for our inferior, ignore. - continue; - } - - // This is an exception for the inferior. - ++num_task_exceptions; - const int signo = m_exception_messages[i].state.SoftSignal(); - if (signo == SIGTRAP) { - // SIGTRAP could mean that we exec'ed. We need to check the - // dyld all_image_infos.infoArray to see if it is NULL and if so, say - // that we exec'ed. - const addr_t aii_addr = GetDYLDAllImageInfosAddress(error); - if (aii_addr == LLDB_INVALID_ADDRESS) - break; - - const addr_t info_array_count_addr = aii_addr + 4; - uint32_t info_array_count = 0; - size_t bytes_read = 0; - Status read_error; - read_error = ReadMemory(info_array_count_addr, // source addr - &info_array_count, // dest addr - 4, // byte count - bytes_read); // #bytes read - if (read_error.Success() && (bytes_read == 4)) { - if (info_array_count == 0) { - // We got the all infos address, and there are zero entries. We - // think we exec'd. - m_did_exec = true; - - // Force the task port to update itself in case the task port - // changed after exec - const task_t old_task = m_task; - const bool force_update = true; - const task_t new_task = TaskPortForProcessID(error, force_update); - if (old_task != new_task) { - LLDB_LOGF(log, - "exec: inferior task port changed " - "from 0x%4.4x to 0x%4.4x", - old_task, new_task); - } - } - } else { - LLDB_LOGF(log, - "NativeProcessDarwin::%s() warning: " - "failed to read all_image_infos." - "infoArrayCount from 0x%8.8llx", - __FUNCTION__, info_array_count_addr); - } - } else if ((m_sent_interrupt_signo != 0) && - (signo == m_sent_interrupt_signo)) { - // We just received the interrupt that we sent to ourselves. - received_interrupt = true; - } - } - - if (m_did_exec) { - cpu_type_t process_cpu_type = GetCPUTypeForLocalProcess(m_pid); - if (m_cpu_type != process_cpu_type) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): arch changed from " - "0x%8.8x to 0x%8.8x", - __FUNCTION__, m_cpu_type, process_cpu_type); - m_cpu_type = process_cpu_type; - // TODO figure out if we need to do something here. - // DNBArchProtocol::SetArchitecture (process_cpu_type); - } - m_thread_list.Clear(); - - // TODO hook up breakpoints. - // m_breakpoints.DisableAll(); - } - - if (m_sent_interrupt_signo != 0) { - if (received_interrupt) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): process " - "successfully interrupted with signal %i", - __FUNCTION__, m_sent_interrupt_signo); - - // Mark that we received the interrupt signal - m_sent_interrupt_signo = 0; - // Now check if we had a case where: - // 1 - We called NativeProcessDarwin::Interrupt() but we stopped - // for another reason. - // 2 - We called NativeProcessDarwin::Resume() (but still - // haven't gotten the interrupt signal). - // 3 - We are now incorrectly stopped because we are handling - // the interrupt signal we missed. - // 4 - We might need to resume if we stopped only with the - // interrupt signal that we never handled. - if (m_auto_resume_signo != 0) { - // Only auto_resume if we stopped with _only_ the interrupt signal. - if (num_task_exceptions == 1) { - auto_resume = true; - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): auto " - "resuming due to unhandled interrupt " - "signal %i", - __FUNCTION__, m_auto_resume_signo); - } - m_auto_resume_signo = 0; - } - } else { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): didn't get signal " - "%i after MachProcess::Interrupt()", - __FUNCTION__, m_sent_interrupt_signo); - } - } - } - - // Let all threads recover from stopping and do any clean up based on the - // previous thread state (if any). - m_thread_list.ProcessDidStop(*this); - - // Let each thread know of any exceptions - for (i = 0; i < m_exception_messages.size(); ++i) { - // Let the thread list forward all exceptions on down to each thread. - if (m_exception_messages[i].state.task_port == task) { - // This exception is for our inferior. - m_thread_list.NotifyException(m_exception_messages[i].state); - } - - if (log) { - StreamString stream; - m_exception_messages[i].Dump(stream); - stream.Flush(); - log->PutCString(stream.GetString().c_str()); - } - } - - if (log) { - StreamString stream; - m_thread_list.Dump(stream); - stream.Flush(); - log->PutCString(stream.GetString().c_str()); - } - - bool step_more = false; - if (m_thread_list.ShouldStop(step_more) && (auto_resume == false)) { -// TODO - need to hook up event system here. !!!! -#if 0 - // Wait for the eEventProcessRunningStateChanged event to be reset - // before changing state to stopped to avoid race condition with very - // fast start/stops. - struct timespec timeout; - - //DNBTimer::OffsetTimeOfDay(&timeout, 0, 250 * 1000); // Wait for 250 ms - DNBTimer::OffsetTimeOfDay(&timeout, 1, 0); // Wait for 250 ms - m_events.WaitForEventsToReset(eEventProcessRunningStateChanged, - &timeout); -#endif - SetState(eStateStopped); - } else { - // Resume without checking our current state. - PrivateResume(); - } - - return m_task; -} - -void NativeProcessDarwin::StartSTDIOThread() { - // TODO implement -} - -Status NativeProcessDarwin::StartWaitpidThread(MainLoop &main_loop) { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - // Strategy: create a thread that sits on waitpid(), waiting for the inferior - // process to die, reaping it in the process. Arrange for the thread to have - // a pipe file descriptor that it can send a byte over when the waitpid - // completes. Have the main loop have a read object for the other side of - // the pipe, and have the callback for the read do the process termination - // message sending. - - // Create a single-direction communication channel. - const bool child_inherits = false; - error = m_waitpid_pipe.CreateNew(child_inherits); - if (error.Fail()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): failed to create waitpid " - "communication pipe: %s", - __FUNCTION__, error.AsCString()); - return error; - } - - // Hook up the waitpid reader callback. - - // TODO make PipePOSIX derive from IOObject. This is goofy here. - const bool transfer_ownership = false; - auto io_sp = IOObjectSP(new NativeFile(m_waitpid_pipe.GetReadFileDescriptor(), - transfer_ownership)); - m_waitpid_reader_handle = main_loop.RegisterReadObject( - io_sp, [this](MainLoopBase &) { HandleWaitpidResult(); }, error); - - // Create the thread. - auto pthread_err = - ::pthread_create(&m_waitpid_thread, nullptr, WaitpidThread, this); - error.SetError(pthread_err, eErrorTypePOSIX); - if (error.Fail()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): failed to create waitpid " - "handling thread: %u (%s)", - __FUNCTION__, error.GetError(), error.AsCString()); - return error; - } - - return error; -} - -void *NativeProcessDarwin::WaitpidThread(void *arg) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - if (!arg) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): cannot run waitpid " - "thread, mandatory process arg was null", - __FUNCTION__); - return nullptr; - } - - return reinterpret_cast(arg)->DoWaitpidThread(); -} - -void NativeProcessDarwin::MaybeRaiseThreadPriority() { -#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) - struct sched_param thread_param; - int thread_sched_policy; - if (pthread_getschedparam(pthread_self(), &thread_sched_policy, - &thread_param) == 0) { - thread_param.sched_priority = 47; - pthread_setschedparam(pthread_self(), thread_sched_policy, &thread_param); - } -#endif -} - -void *NativeProcessDarwin::DoWaitpidThread() { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - if (m_pid == LLDB_INVALID_PROCESS_ID) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): inferior process ID is " - "not set, cannot waitpid on it", - __FUNCTION__); - return nullptr; - } - - // Name the thread. - pthread_setname_np("waitpid thread"); - - // Ensure we don't get CPU starved. - MaybeRaiseThreadPriority(); - - Status error; - int status = -1; - - while (1) { - // Do a waitpid. - ::pid_t child_pid = ::waitpid(m_pid, &status, 0); - if (child_pid < 0) - error.SetErrorToErrno(); - if (error.Fail()) { - if (error.GetError() == EINTR) { - // This is okay, we can keep going. - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): waitpid(pid = %" PRIu64 - ", &status, 0) interrupted, continuing", - __FUNCTION__, m_pid); - continue; - } - - // This error is not okay, abort. - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): waitpid(pid = %" PRIu64 - ", &status, 0) aborting due to error: %u (%s)", - __FUNCTION__, m_pid, error.GetError(), error.AsCString()); - break; - } - - // Log the successful result. - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): waitpid(pid = %" PRIu64 - ", &status, 0) => %i, status = %i", - __FUNCTION__, m_pid, child_pid, status); - - // Handle the result. - if (WIFSTOPPED(status)) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): waitpid(pid = %" PRIu64 - ") received a stop, continuing waitpid() loop", - __FUNCTION__, m_pid); - continue; - } else // if (WIFEXITED(status) || WIFSIGNALED(status)) - { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(pid = %" PRIu64 "): " - "waitpid thread is setting exit status for pid = " - "%i to %i", - __FUNCTION__, m_pid, child_pid, status); - - error = SendInferiorExitStatusToMainLoop(child_pid, status); - return nullptr; - } - } - - // We should never exit as long as our child process is alive. If we get - // here, something completely unexpected went wrong and we should exit. - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): internal error: waitpid thread " - "exited out of its main loop in an unexpected way. pid = %" PRIu64 - ". Sending exit status of -1.", - __FUNCTION__, m_pid); - - error = SendInferiorExitStatusToMainLoop((::pid_t)m_pid, -1); - return nullptr; -} - -Status NativeProcessDarwin::SendInferiorExitStatusToMainLoop(::pid_t pid, - int status) { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - size_t bytes_written = 0; - - // Send the pid. - error = m_waitpid_pipe.Write(&pid, sizeof(pid), bytes_written); - if (error.Fail() || (bytes_written < sizeof(pid))) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s() - failed to write " - "waitpid exiting pid to the pipe. Client will not " - "hear about inferior exit status!", - __FUNCTION__); - return error; - } - - // Send the status. - bytes_written = 0; - error = m_waitpid_pipe.Write(&status, sizeof(status), bytes_written); - if (error.Fail() || (bytes_written < sizeof(status))) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s() - failed to write " - "waitpid exit result to the pipe. Client will not " - "hear about inferior exit status!", - __FUNCTION__); - } - return error; -} - -Status NativeProcessDarwin::HandleWaitpidResult() { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - // Read the pid. - const bool notify_status = true; - - ::pid_t pid = -1; - size_t bytes_read = 0; - error = m_waitpid_pipe.Read(&pid, sizeof(pid), bytes_read); - if (error.Fail() || (bytes_read < sizeof(pid))) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s() - failed to read " - "waitpid exiting pid from the pipe. Will notify " - "as if parent process died with exit status -1.", - __FUNCTION__); - SetExitStatus(WaitStatus(WaitStatus::Exit, -1), notify_status); - return error; - } - - // Read the status. - int status = -1; - error = m_waitpid_pipe.Read(&status, sizeof(status), bytes_read); - if (error.Fail() || (bytes_read < sizeof(status))) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s() - failed to read " - "waitpid exit status from the pipe. Will notify " - "as if parent process died with exit status -1.", - __FUNCTION__); - SetExitStatus(WaitStatus(WaitStatus::Exit, -1), notify_status); - return error; - } - - // Notify the monitor that our state has changed. - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): main loop received waitpid " - "exit status info: pid=%i (%s), status=%i", - __FUNCTION__, pid, - (pid == m_pid) ? "the inferior" : "not the inferior", status); - - SetExitStatus(WaitStatus::Decode(status), notify_status); - return error; -} - -task_t NativeProcessDarwin::TaskPortForProcessID(Status &error, - bool force) const { - if ((m_task == TASK_NULL) || force) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - if (m_pid == LLDB_INVALID_PROCESS_ID) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): cannot get task due " - "to invalid pid", - __FUNCTION__); - return TASK_NULL; - } - - const uint32_t num_retries = 10; - const uint32_t usec_interval = 10000; - - mach_port_t task_self = mach_task_self(); - task_t task = TASK_NULL; - - for (uint32_t i = 0; i < num_retries; i++) { - kern_return_t err = ::task_for_pid(task_self, m_pid, &task); - if (err == 0) { - // Succeeded. Save and return it. - error.Clear(); - m_task = task; - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): ::task_for_pid(" - "stub_port = 0x%4.4x, pid = %llu, &task) " - "succeeded: inferior task port = 0x%4.4x", - __FUNCTION__, task_self, m_pid, m_task); - return m_task; - } else { - // Failed to get the task for the inferior process. - error.SetError(err, eErrorTypeMachKernel); - if (log) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): ::task_for_pid(" - "stub_port = 0x%4.4x, pid = %llu, &task) " - "failed, err = 0x%8.8x (%s)", - __FUNCTION__, task_self, m_pid, err, error.AsCString()); - } - } - - // Sleep a bit and try again - ::usleep(usec_interval); - } - - // We failed to get the task for the inferior process. Ensure that it is - // cleared out. - m_task = TASK_NULL; - } - return m_task; -} - -void NativeProcessDarwin::AttachToInferior(MainLoop &mainloop, lldb::pid_t pid, - Status &error) { - error.SetErrorString("TODO: implement"); -} - -Status NativeProcessDarwin::PrivateResume() { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - std::lock_guard locker(m_exception_messages_mutex); - m_auto_resume_signo = m_sent_interrupt_signo; - - if (log) { - if (m_auto_resume_signo) - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): task 0x%x resuming (with " - "unhandled interrupt signal %i)...", - __FUNCTION__, m_task, m_auto_resume_signo); - else - LLDB_LOGF(log, "NativeProcessDarwin::%s(): task 0x%x resuming...", - __FUNCTION__, m_task); - } - - error = ReplyToAllExceptions(); - if (error.Fail()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): aborting, failed to " - "reply to exceptions: %s", - __FUNCTION__, error.AsCString()); - return error; - } - // bool stepOverBreakInstruction = step; - - // Let the thread prepare to resume and see if any threads want us to step - // over a breakpoint instruction (ProcessWillResume will modify the value of - // stepOverBreakInstruction). - m_thread_list.ProcessWillResume(*this, m_thread_actions); - - // Set our state accordingly - if (m_thread_actions.NumActionsWithState(eStateStepping)) - SetState(eStateStepping); - else - SetState(eStateRunning); - - // Now resume our task. - error = ResumeTask(); - return error; -} - -Status NativeProcessDarwin::ReplyToAllExceptions() { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - - TaskPortForProcessID(error); - if (error.Fail()) { - LLDB_LOGF(log, "NativeProcessDarwin::%s(): no task port, aborting", - __FUNCTION__); - return error; - } - - std::lock_guard locker(m_exception_messages_mutex); - if (m_exception_messages.empty()) { - // We're done. - return error; - } - - size_t index = 0; - for (auto &message : m_exception_messages) { - if (log) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): replying to exception " - "%zu...", - __FUNCTION__, index++); - } - - int thread_reply_signal = 0; - - const tid_t tid = - m_thread_list.GetThreadIDByMachPortNumber(message.state.thread_port); - const ResumeAction *action = nullptr; - if (tid != LLDB_INVALID_THREAD_ID) - action = m_thread_actions.GetActionForThread(tid, false); - - if (action) { - thread_reply_signal = action->signal; - if (thread_reply_signal) - m_thread_actions.SetSignalHandledForThread(tid); - } - - error = message.Reply(m_pid, m_task, thread_reply_signal); - if (error.Fail() && log) { - // We log any error here, but we don't stop the exception response - // handling. - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): failed to reply to " - "exception: %s", - __FUNCTION__, error.AsCString()); - error.Clear(); - } - } - - // Erase all exception message as we should have used and replied to them all - // already. - m_exception_messages.clear(); - return error; -} - -Status NativeProcessDarwin::ResumeTask() { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - TaskPortForProcessID(error); - if (error.Fail()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): failed to get task port " - "for process when attempting to resume: %s", - __FUNCTION__, error.AsCString()); - return error; - } - if (m_task == TASK_NULL) { - error.SetErrorString("task port retrieval succeeded but task port is " - "null when attempting to resume the task"); - return error; - } - - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): requesting resume of task " - "0x%4.4x", - __FUNCTION__, m_task); - - // Get the BasicInfo struct to verify that we're suspended before we try to - // resume the task. - struct task_basic_info task_info; - error = GetTaskBasicInfo(m_task, &task_info); - if (error.Fail()) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): failed to get task " - "BasicInfo when attempting to resume: %s", - __FUNCTION__, error.AsCString()); - return error; - } - - // task_resume isn't counted like task_suspend calls are, so if the task is - // not suspended, don't try and resume it since it is already running - if (task_info.suspend_count > 0) { - auto mach_err = ::task_resume(m_task); - error.SetError(mach_err, eErrorTypeMachKernel); - if (log) { - if (error.Success()) - LLDB_LOGF(log, "::task_resume(target_task = 0x%4.4x): success", m_task); - else - LLDB_LOGF(log, "::task_resume(target_task = 0x%4.4x) error: %s", m_task, - error.AsCString()); - } - } else { - LLDB_LOGF(log, - "::task_resume(target_task = 0x%4.4x): ignored, " - "already running", - m_task); - } - - return error; -} - -bool NativeProcessDarwin::IsTaskValid() const { - if (m_task == TASK_NULL) - return false; - - struct task_basic_info task_info; - return GetTaskBasicInfo(m_task, &task_info).Success(); -} - -bool NativeProcessDarwin::IsTaskValid(task_t task) const { - if (task == TASK_NULL) - return false; - - struct task_basic_info task_info; - return GetTaskBasicInfo(task, &task_info).Success(); -} - -mach_port_t NativeProcessDarwin::GetExceptionPort() const { - return m_exception_port; -} - -bool NativeProcessDarwin::IsExceptionPortValid() const { - return MACH_PORT_VALID(m_exception_port); -} - -Status -NativeProcessDarwin::GetTaskBasicInfo(task_t task, - struct task_basic_info *info) const { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - // Validate args. - if (info == NULL) { - error.SetErrorStringWithFormat("NativeProcessDarwin::%s(): mandatory " - "info arg is null", - __FUNCTION__); - return error; - } - - // Grab the task if we don't already have it. - if (task == TASK_NULL) { - error.SetErrorStringWithFormat("NativeProcessDarwin::%s(): given task " - "is invalid", - __FUNCTION__); - } - - mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT; - auto err = ::task_info(m_task, TASK_BASIC_INFO, (task_info_t)info, &count); - error.SetError(err, eErrorTypeMachKernel); - if (error.Fail()) { - LLDB_LOGF(log, - "::task_info(target_task = 0x%4.4x, " - "flavor = TASK_BASIC_INFO, task_info_out => %p, " - "task_info_outCnt => %u) failed: %u (%s)", - m_task, info, count, error.GetError(), error.AsCString()); - return error; - } - - Log *verbose_log( - GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_VERBOSE)); - if (verbose_log) { - float user = (float)info->user_time.seconds + - (float)info->user_time.microseconds / 1000000.0f; - float system = (float)info->user_time.seconds + - (float)info->user_time.microseconds / 1000000.0f; - verbose_LLDB_LOGF(log, - "task_basic_info = { suspend_count = %i, " - "virtual_size = 0x%8.8llx, resident_size = " - "0x%8.8llx, user_time = %f, system_time = %f }", - info->suspend_count, (uint64_t)info->virtual_size, - (uint64_t)info->resident_size, user, system); - } - return error; -} - -Status NativeProcessDarwin::SuspendTask() { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - if (m_task == TASK_NULL) { - error.SetErrorString("task port is null, cannot suspend task"); - LLDB_LOGF(log, "NativeProcessDarwin::%s() failed: %s", __FUNCTION__, - error.AsCString()); - return error; - } - - auto mach_err = ::task_suspend(m_task); - error.SetError(mach_err, eErrorTypeMachKernel); - if (error.Fail() && log) - LLDB_LOGF(log, "::task_suspend(target_task = 0x%4.4x)", m_task); - - return error; -} - -Status NativeProcessDarwin::Resume(const ResumeActionList &resume_actions) { - Status error; - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - - LLDB_LOGF(log, "NativeProcessDarwin::%s() called", __FUNCTION__); - - if (CanResume()) { - m_thread_actions = resume_actions; - error = PrivateResume(); - return error; - } - - auto state = GetState(); - if (state == eStateRunning) { - LLDB_LOGF(log, - "NativeProcessDarwin::%s(): task 0x%x is already " - "running, ignoring...", - __FUNCTION__, TaskPortForProcessID(error)); - return error; - } - - // We can't resume from this state. - error.SetErrorStringWithFormat("task 0x%x has state %s, can't resume", - TaskPortForProcessID(error), - StateAsCString(state)); - return error; -} - -Status NativeProcessDarwin::Halt() { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -Status NativeProcessDarwin::Detach() { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -Status NativeProcessDarwin::Signal(int signo) { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -Status NativeProcessDarwin::Interrupt() { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -Status NativeProcessDarwin::Kill() { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -Status NativeProcessDarwin::GetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo &range_info) { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -Status NativeProcessDarwin::ReadMemory(lldb::addr_t addr, void *buf, - size_t size, size_t &bytes_read) { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -Status NativeProcessDarwin::ReadMemoryWithoutTrap(lldb::addr_t addr, void *buf, - size_t size, - size_t &bytes_read) { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -Status NativeProcessDarwin::WriteMemory(lldb::addr_t addr, const void *buf, - size_t size, size_t &bytes_written) { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -Status NativeProcessDarwin::AllocateMemory(size_t size, uint32_t permissions, - lldb::addr_t &addr) { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -Status NativeProcessDarwin::DeallocateMemory(lldb::addr_t addr) { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -lldb::addr_t NativeProcessDarwin::GetSharedLibraryInfoAddress() { - return LLDB_INVALID_ADDRESS; -} - -size_t NativeProcessDarwin::UpdateThreads() { return 0; } - -bool NativeProcessDarwin::GetArchitecture(ArchSpec &arch) const { - return false; -} - -Status NativeProcessDarwin::SetBreakpoint(lldb::addr_t addr, uint32_t size, - bool hardware) { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -void NativeProcessDarwin::DoStopIDBumped(uint32_t newBumpId) {} - -Status NativeProcessDarwin::GetLoadedModuleFileSpec(const char *module_path, - FileSpec &file_spec) { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -Status NativeProcessDarwin::GetFileLoadAddress(const llvm::StringRef &file_name, - lldb::addr_t &load_addr) { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} - -// NativeProcessProtocol protected interface -Status NativeProcessDarwin::GetSoftwareBreakpointTrapOpcode( - size_t trap_opcode_size_hint, size_t &actual_opcode_size, - const uint8_t *&trap_opcode_bytes) { - Status error; - error.SetErrorString("TODO: implement"); - return error; -} diff --git a/lldb/source/Plugins/Process/Darwin/NativeProcessDarwin.h b/lldb/source/Plugins/Process/Darwin/NativeProcessDarwin.h deleted file mode 100644 index 6741d4ddc5d8f..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/NativeProcessDarwin.h +++ /dev/null @@ -1,337 +0,0 @@ -//===-- NativeProcessDarwin.h --------------------------------- -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef NativeProcessDarwin_h -#define NativeProcessDarwin_h - -// NOTE: this code should only be compiled on Apple Darwin systems. It is -// not cross-platform code and is not intended to build on any other platform. -// Therefore, platform-specific headers and code are okay here. - -// C includes -#include - -// C++ includes -#include -#include - -#include "lldb/Host/Debug.h" -#include "lldb/Host/HostThread.h" -#include "lldb/Host/Pipe.h" -#include "lldb/Host/common/NativeProcessProtocol.h" -#include "lldb/Target/MemoryRegionInfo.h" -#include "lldb/Utility/ArchSpec.h" -#include "lldb/Utility/FileSpec.h" -#include "lldb/lldb-types.h" - -#include "LaunchFlavor.h" -#include "MachException.h" -#include "NativeThreadDarwin.h" -#include "NativeThreadListDarwin.h" - -namespace lldb_private { -class Status; -class Scalar; - -namespace process_darwin { - -/// \class NativeProcessDarwin -/// Manages communication with the inferior (debugee) process. -/// -/// Upon construction, this class prepares and launches an inferior process -/// for debugging. -/// -/// Changes in the inferior process state are broadcasted. -class NativeProcessDarwin : public NativeProcessProtocol { - friend Status NativeProcessProtocol::Launch( - ProcessLaunchInfo &launch_info, NativeDelegate &native_delegate, - MainLoop &mainloop, NativeProcessProtocolSP &process_sp); - - friend Status NativeProcessProtocol::Attach( - lldb::pid_t pid, NativeProcessProtocol::NativeDelegate &native_delegate, - MainLoop &mainloop, NativeProcessProtocolSP &process_sp); - -public: - ~NativeProcessDarwin() override; - - // NativeProcessProtocol Interface - Status Resume(const ResumeActionList &resume_actions) override; - - Status Halt() override; - - Status Detach() override; - - Status Signal(int signo) override; - - Status Interrupt() override; - - Status Kill() override; - - Status GetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo &range_info) override; - - Status ReadMemory(lldb::addr_t addr, void *buf, size_t size, - size_t &bytes_read) override; - - Status ReadMemoryWithoutTrap(lldb::addr_t addr, void *buf, size_t size, - size_t &bytes_read) override; - - Status WriteMemory(lldb::addr_t addr, const void *buf, size_t size, - size_t &bytes_written) override; - - Status AllocateMemory(size_t size, uint32_t permissions, - lldb::addr_t &addr) override; - - Status DeallocateMemory(lldb::addr_t addr) override; - - lldb::addr_t GetSharedLibraryInfoAddress() override; - - size_t UpdateThreads() override; - - bool GetArchitecture(ArchSpec &arch) const override; - - Status SetBreakpoint(lldb::addr_t addr, uint32_t size, - bool hardware) override; - - void DoStopIDBumped(uint32_t newBumpId) override; - - Status GetLoadedModuleFileSpec(const char *module_path, - FileSpec &file_spec) override; - - Status GetFileLoadAddress(const llvm::StringRef &file_name, - lldb::addr_t &load_addr) override; - - NativeThreadDarwinSP GetThreadByID(lldb::tid_t id); - - task_t GetTask() const { return m_task; } - - // Interface used by NativeRegisterContext-derived classes. - static Status PtraceWrapper(int req, lldb::pid_t pid, void *addr = nullptr, - void *data = nullptr, size_t data_size = 0, - long *result = nullptr); - - bool SupportHardwareSingleStepping() const; - -protected: - // NativeProcessProtocol protected interface - Status - GetSoftwareBreakpointTrapOpcode(size_t trap_opcode_size_hint, - size_t &actual_opcode_size, - const uint8_t *&trap_opcode_bytes) override; - -private: - /// Mach task-related Member Variables - - // The task port for the inferior process. - mutable task_t m_task; - - // True if the inferior process did an exec since we started - // monitoring it. - bool m_did_exec; - - // The CPU type of this process. - mutable cpu_type_t m_cpu_type; - - /// Exception/Signal Handling Member Variables - - // Exception port on which we will receive child exceptions - mach_port_t m_exception_port; - - // Saved state of the child exception port prior to us installing - // our own intercepting port. - MachException::PortInfo m_exc_port_info; - - // The thread that runs the Mach exception read and reply handler. - pthread_t m_exception_thread; - - // TODO see if we can remove this if we get the exception collection - // and distribution to happen in a single-threaded fashion. - std::recursive_mutex m_exception_messages_mutex; - - // A collection of exception messages caught when listening to the - // exception port. - MachException::Message::collection m_exception_messages; - - // When we call MachProcess::Interrupt(), we want to send this - // signal (if non-zero). - int m_sent_interrupt_signo; - - // If we resume the process and still haven't received our - // interrupt signal (if this is non-zero). - int m_auto_resume_signo; - - /// Thread-related Member Variables - NativeThreadListDarwin m_thread_list; - ResumeActionList m_thread_actions; - - /// Process Lifetime Member Variable - - // The pipe over which the waitpid thread and the main loop will - // communicate. - Pipe m_waitpid_pipe; - - // The thread that runs the waitpid handler. - pthread_t m_waitpid_thread; - - // waitpid reader callback handle. - MainLoop::ReadHandleUP m_waitpid_reader_handle; - - // Private Instance Methods - NativeProcessDarwin(lldb::pid_t pid, int pty_master_fd); - - /// Finalize the launch. - /// - /// This method associates the NativeProcessDarwin instance with the host - /// process that was just launched. It peforms actions like attaching a - /// listener to the inferior exception port, ptracing the process, and the - /// like. - /// - /// \param[in] launch_flavor - /// The launch flavor that was used to launch the process. - /// - /// \param[in] main_loop - /// The main loop that will run the process monitor. Work - /// that needs to be done (e.g. reading files) gets registered - /// here along with callbacks to process the work. - /// - /// \return - /// Any error that occurred during the aforementioned - /// operations. Failure here will force termination of the - /// launched process and debugging session. - Status FinalizeLaunch(LaunchFlavor launch_flavor, MainLoop &main_loop); - - Status SaveExceptionPortInfo(); - - void ExceptionMessageReceived(const MachException::Message &message); - - void MaybeRaiseThreadPriority(); - - Status StartExceptionThread(); - - Status SendInferiorExitStatusToMainLoop(::pid_t pid, int status); - - Status HandleWaitpidResult(); - - bool ProcessUsingSpringBoard() const; - - bool ProcessUsingBackBoard() const; - - static void *ExceptionThread(void *arg); - - void *DoExceptionThread(); - - lldb::addr_t GetDYLDAllImageInfosAddress(Status &error) const; - - static uint32_t GetCPUTypeForLocalProcess(::pid_t pid); - - uint32_t GetCPUType() const; - - task_t ExceptionMessageBundleComplete(); - - void StartSTDIOThread(); - - Status StartWaitpidThread(MainLoop &main_loop); - - static void *WaitpidThread(void *arg); - - void *DoWaitpidThread(); - - task_t TaskPortForProcessID(Status &error, bool force = false) const; - - /// Attaches to an existing process. Forms the implementation of - /// Process::DoAttach. - void AttachToInferior(MainLoop &mainloop, lldb::pid_t pid, Status &error); - - ::pid_t Attach(lldb::pid_t pid, Status &error); - - Status PrivateResume(); - - Status ReplyToAllExceptions(); - - Status ResumeTask(); - - bool IsTaskValid() const; - - bool IsTaskValid(task_t task) const; - - mach_port_t GetExceptionPort() const; - - bool IsExceptionPortValid() const; - - Status GetTaskBasicInfo(task_t task, struct task_basic_info *info) const; - - Status SuspendTask(); - - static Status SetDefaultPtraceOpts(const lldb::pid_t); - - static void *MonitorThread(void *baton); - - void MonitorCallback(lldb::pid_t pid, bool exited, int signal, int status); - - void WaitForNewThread(::pid_t tid); - - void MonitorSIGTRAP(const siginfo_t &info, NativeThreadDarwin &thread); - - void MonitorTrace(NativeThreadDarwin &thread); - - void MonitorBreakpoint(NativeThreadDarwin &thread); - - void MonitorWatchpoint(NativeThreadDarwin &thread, uint32_t wp_index); - - void MonitorSignal(const siginfo_t &info, NativeThreadDarwin &thread, - bool exited); - - Status SetupSoftwareSingleStepping(NativeThreadDarwin &thread); - - bool HasThreadNoLock(lldb::tid_t thread_id); - - bool StopTrackingThread(lldb::tid_t thread_id); - - NativeThreadDarwinSP AddThread(lldb::tid_t thread_id); - - Status GetSoftwareBreakpointPCOffset(uint32_t &actual_opcode_size); - - Status FixupBreakpointPCAsNeeded(NativeThreadDarwin &thread); - - /// Writes a siginfo_t structure corresponding to the given thread - /// ID to the memory region pointed to by \p siginfo. - Status GetSignalInfo(lldb::tid_t tid, void *siginfo); - - /// Writes the raw event message code (vis-a-vis PTRACE_GETEVENTMSG) - /// corresponding to the given thread ID to the memory pointed to by @p - /// message. - Status GetEventMessage(lldb::tid_t tid, unsigned long *message); - - void NotifyThreadDeath(lldb::tid_t tid); - - Status Detach(lldb::tid_t tid); - - // This method is requests a stop on all threads which are still - // running. It sets up a deferred delegate notification, which will - // fire once threads report as stopped. The triggerring_tid will be - // set as the current thread (main stop reason). - void StopRunningThreads(lldb::tid_t triggering_tid); - - // Notify the delegate if all threads have stopped. - void SignalIfAllThreadsStopped(); - - // Resume the given thread, optionally passing it the given signal. - // The type of resume operation (continue, single-step) depends on - // the state parameter. - Status ResumeThread(NativeThreadDarwin &thread, lldb::StateType state, - int signo); - - void ThreadWasCreated(NativeThreadDarwin &thread); - - void SigchldHandler(); -}; - -} // namespace process_darwin -} // namespace lldb_private - -#endif /* NativeProcessDarwin_h */ diff --git a/lldb/source/Plugins/Process/Darwin/NativeThreadDarwin.cpp b/lldb/source/Plugins/Process/Darwin/NativeThreadDarwin.cpp deleted file mode 100644 index 4cfb463d864f2..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/NativeThreadDarwin.cpp +++ /dev/null @@ -1,281 +0,0 @@ -//===-- NativeThreadDarwin.cpp --------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "NativeThreadDarwin.h" - -// C includes -#include - -// LLDB includes -#include "lldb/Utility/Stream.h" - -#include "NativeProcessDarwin.h" - -using namespace lldb; -using namespace lldb_private; -using namespace lldb_private::process_darwin; - -uint64_t NativeThreadDarwin::GetGloballyUniqueThreadIDForMachPortID( - ::thread_t mach_port_id) { - thread_identifier_info_data_t tident; - mach_msg_type_number_t tident_count = THREAD_IDENTIFIER_INFO_COUNT; - - auto mach_err = ::thread_info(mach_port_id, THREAD_IDENTIFIER_INFO, - (thread_info_t)&tident, &tident_count); - if (mach_err != KERN_SUCCESS) { - // When we fail to get thread info for the supposed port, assume it is - // really a globally unique thread id already, or return the best thing we - // can, which is the thread port. - return mach_port_id; - } - return tident.thread_id; -} - -NativeThreadDarwin::NativeThreadDarwin(NativeProcessDarwin *process, - bool is_64_bit, - lldb::tid_t unique_thread_id, - ::thread_t mach_thread_port) - : NativeThreadProtocol(process, unique_thread_id), - m_mach_thread_port(mach_thread_port), m_basic_info(), - m_proc_threadinfo() {} - -bool NativeThreadDarwin::GetIdentifierInfo() { - // Don't try to get the thread info once and cache it for the life of the - // thread. It changes over time, for instance if the thread name changes, - // then the thread_handle also changes... So you have to refetch it every - // time. - mach_msg_type_number_t count = THREAD_IDENTIFIER_INFO_COUNT; - kern_return_t kret = ::thread_info(m_mach_thread_port, THREAD_IDENTIFIER_INFO, - (thread_info_t)&m_ident_info, &count); - return kret == KERN_SUCCESS; - - return false; -} - -std::string NativeThreadDarwin::GetName() { - std::string name; - - if (GetIdentifierInfo()) { - auto process_sp = GetProcess(); - if (!process_sp) { - name = ""; - return name; - } - - int len = ::proc_pidinfo(process_sp->GetID(), PROC_PIDTHREADINFO, - m_ident_info.thread_handle, &m_proc_threadinfo, - sizeof(m_proc_threadinfo)); - - if (len && m_proc_threadinfo.pth_name[0]) - name = m_proc_threadinfo.pth_name; - } - return name; -} - -lldb::StateType NativeThreadDarwin::GetState() { - // TODO implement - return eStateInvalid; -} - -bool NativeThreadDarwin::GetStopReason(ThreadStopInfo &stop_info, - std::string &description) { - // TODO implement - return false; -} - -NativeRegisterContextSP NativeThreadDarwin::GetRegisterContext() { - // TODO implement - return NativeRegisterContextSP(); -} - -Status NativeThreadDarwin::SetWatchpoint(lldb::addr_t addr, size_t size, - uint32_t watch_flags, bool hardware) { - Status error; - error.SetErrorString("not yet implemented"); - return error; -} - -Status NativeThreadDarwin::RemoveWatchpoint(lldb::addr_t addr) { - Status error; - error.SetErrorString("not yet implemented"); - return error; -} - -void NativeThreadDarwin::Dump(Stream &stream) const { -// This is what we really want once we have the thread class wired up. -#if 0 - DNBLogThreaded("[%3u] #%3u tid: 0x%8.8" PRIx64 ", pc: 0x%16.16" PRIx64 ", sp: 0x%16.16" PRIx64 ", user: %d.%6.6d, system: %d.%6.6d, cpu: %2d, policy: %2d, run_state: %2d (%s), flags: %2d, suspend_count: %2d (current %2d), sleep_time: %d", - index, - m_seq_id, - m_unique_id, - GetPC(INVALID_NUB_ADDRESS), - GetSP(INVALID_NUB_ADDRESS), - m_basic_info.user_time.seconds, m_basic_info.user_time.microseconds, - m_basic_info.system_time.seconds, m_basic_info.system_time.microseconds, - m_basic_info.cpu_usage, - m_basic_info.policy, - m_basic_info.run_state, - thread_run_state, - m_basic_info.flags, - m_basic_info.suspend_count, m_suspend_count, - m_basic_info.sleep_time); - -#else - // Here's all we have right now. - stream.Printf("tid: 0x%8.8" PRIx64 ", thread port: 0x%4.4x", GetID(), - m_mach_thread_port); -#endif -} - -bool NativeThreadDarwin::NotifyException(MachException::Data &exc) { -// TODO implement this. -#if 0 - // Allow the arch specific protocol to process (MachException::Data &)exc - // first before possible reassignment of m_stop_exception with exc. See - // also MachThread::GetStopException(). - bool handled = m_arch_up->NotifyException(exc); - - if (m_stop_exception.IsValid()) - { - // We may have more than one exception for a thread, but we need to - // only remember the one that we will say is the reason we stopped. We - // may have been single stepping and also gotten a signal exception, so - // just remember the most pertinent one. - if (m_stop_exception.IsBreakpoint()) - m_stop_exception = exc; - } - else - { - m_stop_exception = exc; - } - - return handled; -#else - // Pretend we handled it. - return true; -#endif -} - -bool NativeThreadDarwin::ShouldStop(bool &step_more) const { -// TODO: implement this -#if 0 - // See if this thread is at a breakpoint? - DNBBreakpoint *bp = CurrentBreakpoint(); - - if (bp) - { - // This thread is sitting at a breakpoint, ask the breakpoint if we - // should be stopping here. - return true; - } - else - { - if (m_arch_up->StepNotComplete()) - { - step_more = true; - return false; - } - // The thread state is used to let us know what the thread was trying - // to do. MachThread::ThreadWillResume() will set the thread state to - // various values depending if the thread was the current thread and if - // it was to be single stepped, or resumed. - if (GetState() == eStateRunning) - { - // If our state is running, then we should continue as we are in - // the process of stepping over a breakpoint. - return false; - } - else - { - // Stop if we have any kind of valid exception for this thread. - if (GetStopException().IsValid()) - return true; - } - } - return false; -#else - return false; -#endif -} - -void NativeThreadDarwin::ThreadDidStop() { -// TODO implement this. -#if 0 - // This thread has existed prior to resuming under debug nub control, and - // has just been stopped. Do any cleanup that needs to be done after - // running. - - // The thread state and breakpoint will still have the same values as they - // had prior to resuming the thread, so it makes it easy to check if we - // were trying to step a thread, or we tried to resume while being at a - // breakpoint. - - // When this method gets called, the process state is still in the state it - // was in while running so we can act accordingly. - m_arch_up->ThreadDidStop(); - - - // We may have suspended this thread so the primary thread could step - // without worrying about race conditions, so lets restore our suspend - // count. - RestoreSuspendCountAfterStop(); - - // Update the basic information for a thread - MachThread::GetBasicInfo(m_mach_port_number, &m_basic_info); - - if (m_basic_info.suspend_count > 0) - SetState(eStateSuspended); - else - SetState(eStateStopped); -#endif -} - -bool NativeThreadDarwin::MachPortNumberIsValid(::thread_t thread) { - return thread != (::thread_t)(0); -} - -const struct thread_basic_info *NativeThreadDarwin::GetBasicInfo() const { - if (GetBasicInfo(m_mach_thread_port, &m_basic_info)) - return &m_basic_info; - return NULL; -} - -bool NativeThreadDarwin::GetBasicInfo(::thread_t thread, - struct thread_basic_info *basicInfoPtr) { - if (MachPortNumberIsValid(thread)) { - unsigned int info_count = THREAD_BASIC_INFO_COUNT; - kern_return_t err = ::thread_info(thread, THREAD_BASIC_INFO, - (thread_info_t)basicInfoPtr, &info_count); - if (err == KERN_SUCCESS) - return true; - } - ::memset(basicInfoPtr, 0, sizeof(struct thread_basic_info)); - return false; -} - -bool NativeThreadDarwin::IsUserReady() const { - if (m_basic_info.run_state == 0) - GetBasicInfo(); - - switch (m_basic_info.run_state) { - default: - case TH_STATE_UNINTERRUPTIBLE: - break; - - case TH_STATE_RUNNING: - case TH_STATE_STOPPED: - case TH_STATE_WAITING: - case TH_STATE_HALTED: - return true; - } - return false; -} - -NativeProcessDarwinSP NativeThreadDarwin::GetNativeProcessDarwinSP() { - return std::static_pointer_cast(GetProcess()); -} diff --git a/lldb/source/Plugins/Process/Darwin/NativeThreadDarwin.h b/lldb/source/Plugins/Process/Darwin/NativeThreadDarwin.h deleted file mode 100644 index 616a9a7b9bf00..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/NativeThreadDarwin.h +++ /dev/null @@ -1,165 +0,0 @@ -//===-- NativeThreadDarwin.h ---------------------------------- -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef NativeThreadDarwin_H -#define NativeThreadDarwin_H - -// C includes -#include -#include -#include - -// C++ includes -#include -#include -#include - -// LLDB includes -#include "lldb/Host/common/NativeThreadProtocol.h" -#include "lldb/lldb-private-forward.h" - -#include "MachException.h" - -namespace lldb_private { -namespace process_darwin { - -class NativeProcessDarwin; -using NativeProcessDarwinSP = std::shared_ptr; - -class NativeThreadListDarwin; - -class NativeThreadDarwin : public NativeThreadProtocol { - friend class NativeProcessDarwin; - friend class NativeThreadListDarwin; - -public: - static uint64_t - GetGloballyUniqueThreadIDForMachPortID(::thread_t mach_port_id); - - NativeThreadDarwin(NativeProcessDarwin *process, bool is_64_bit, - lldb::tid_t unique_thread_id = 0, - ::thread_t mach_thread_port = 0); - - // NativeThreadProtocol Interface - std::string GetName() override; - - lldb::StateType GetState() override; - - bool GetStopReason(ThreadStopInfo &stop_info, - std::string &description) override; - - NativeRegisterContextSP GetRegisterContext() override; - - Status SetWatchpoint(lldb::addr_t addr, size_t size, uint32_t watch_flags, - bool hardware) override; - - Status RemoveWatchpoint(lldb::addr_t addr) override; - - // New methods that are fine for others to call. - void Dump(Stream &stream) const; - -private: - // Interface for friend classes - - /// Resumes the thread. If \p signo is anything but - /// LLDB_INVALID_SIGNAL_NUMBER, deliver that signal to the thread. - Status Resume(uint32_t signo); - - /// Single steps the thread. If \p signo is anything but - /// LLDB_INVALID_SIGNAL_NUMBER, deliver that signal to the thread. - Status SingleStep(uint32_t signo); - - bool NotifyException(MachException::Data &exc); - - bool ShouldStop(bool &step_more) const; - - void ThreadDidStop(); - - void SetStoppedBySignal(uint32_t signo, const siginfo_t *info = nullptr); - - /// Return true if the thread is stopped. - /// If stopped by a signal, indicate the signo in the signo - /// argument. Otherwise, return LLDB_INVALID_SIGNAL_NUMBER. - bool IsStopped(int *signo); - - const struct thread_basic_info *GetBasicInfo() const; - - static bool GetBasicInfo(::thread_t thread, - struct thread_basic_info *basicInfoPtr); - - bool IsUserReady() const; - - void SetStoppedByExec(); - - void SetStoppedByBreakpoint(); - - void SetStoppedByWatchpoint(uint32_t wp_index); - - bool IsStoppedAtBreakpoint(); - - bool IsStoppedAtWatchpoint(); - - void SetStoppedByTrace(); - - void SetStoppedWithNoReason(); - - void SetExited(); - - Status RequestStop(); - - /// Return the mach thread port number for this thread. - /// - /// \return - /// The mach port number for this thread. Returns NULL_THREAD - /// when the thread is invalid. - thread_t GetMachPortNumber() const { return m_mach_thread_port; } - - static bool MachPortNumberIsValid(::thread_t thread); - - // Private interface - bool GetIdentifierInfo(); - - void MaybeLogStateChange(lldb::StateType new_state); - - NativeProcessDarwinSP GetNativeProcessDarwinSP(); - - void SetStopped(); - - inline void MaybePrepareSingleStepWorkaround(); - - inline void MaybeCleanupSingleStepWorkaround(); - - // Member Variables - - // The mach thread port for the thread. - ::thread_t m_mach_thread_port; - - // The most recently-retrieved thread basic info. - mutable ::thread_basic_info m_basic_info; - - struct proc_threadinfo m_proc_threadinfo; - - thread_identifier_info_data_t m_ident_info; - -#if 0 - lldb::StateType m_state; - ThreadStopInfo m_stop_info; - NativeRegisterContextSP m_reg_context_sp; - std::string m_stop_description; - using WatchpointIndexMap = std::map; - WatchpointIndexMap m_watchpoint_index_map; - // cpu_set_t m_original_cpu_set; // For single-step workaround. -#endif -}; - -typedef std::shared_ptr NativeThreadDarwinSP; - -} // namespace process_darwin -} // namespace lldb_private - -#endif // #ifndef NativeThreadDarwin_H diff --git a/lldb/source/Plugins/Process/Darwin/NativeThreadListDarwin.cpp b/lldb/source/Plugins/Process/Darwin/NativeThreadListDarwin.cpp deleted file mode 100644 index 890f5f67ef0c9..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/NativeThreadListDarwin.cpp +++ /dev/null @@ -1,701 +0,0 @@ -//===-- NativeThreadListDarwin.cpp ----------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Created by Greg Clayton on 6/19/07. -// -//===----------------------------------------------------------------------===// - -#include "NativeThreadListDarwin.h" - -// C includes -#include -#include -#include - -// LLDB includes -#include "lldb/Utility/Log.h" -#include "lldb/Utility/Status.h" -#include "lldb/Utility/Stream.h" -#include "lldb/lldb-enumerations.h" - -#include "NativeProcessDarwin.h" -#include "NativeThreadDarwin.h" - -using namespace lldb; -using namespace lldb_private; -using namespace lldb_private::process_darwin; - -NativeThreadListDarwin::NativeThreadListDarwin() - : m_threads(), m_threads_mutex(), m_is_64_bit(false) {} - -NativeThreadListDarwin::~NativeThreadListDarwin() {} - -// These methods will be accessed directly from NativeThreadDarwin -#if 0 -nub_state_t -NativeThreadListDarwin::GetState(nub_thread_t tid) -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->GetState(); - return eStateInvalid; -} - -const char * -NativeThreadListDarwin::GetName (nub_thread_t tid) -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->GetName(); - return NULL; -} -#endif - -// TODO: figure out if we need to add this to NativeThreadDarwin yet. -#if 0 -ThreadInfo::QoS -NativeThreadListDarwin::GetRequestedQoS (nub_thread_t tid, nub_addr_t tsd, uint64_t dti_qos_class_index) -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->GetRequestedQoS(tsd, dti_qos_class_index); - return ThreadInfo::QoS(); -} - -nub_addr_t -NativeThreadListDarwin::GetPThreadT (nub_thread_t tid) -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->GetPThreadT(); - return INVALID_NUB_ADDRESS; -} - -nub_addr_t -NativeThreadListDarwin::GetDispatchQueueT (nub_thread_t tid) -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->GetDispatchQueueT(); - return INVALID_NUB_ADDRESS; -} - -nub_addr_t -NativeThreadListDarwin::GetTSDAddressForThread (nub_thread_t tid, uint64_t plo_pthread_tsd_base_address_offset, uint64_t plo_pthread_tsd_base_offset, uint64_t plo_pthread_tsd_entry_size) -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->GetTSDAddressForThread(plo_pthread_tsd_base_address_offset, plo_pthread_tsd_base_offset, plo_pthread_tsd_entry_size); - return INVALID_NUB_ADDRESS; -} -#endif - -// TODO implement these -#if 0 -nub_thread_t -NativeThreadListDarwin::SetCurrentThread(nub_thread_t tid) -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - { - m_current_thread = thread_sp; - return tid; - } - return INVALID_NUB_THREAD; -} - - -bool -NativeThreadListDarwin::GetThreadStoppedReason(nub_thread_t tid, struct DNBThreadStopInfo *stop_info) const -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->GetStopException().GetStopInfo(stop_info); - return false; -} - -bool -NativeThreadListDarwin::GetIdentifierInfo (nub_thread_t tid, thread_identifier_info_data_t *ident_info) -{ - thread_t mach_port_number = GetMachPortNumberByThreadID (tid); - - mach_msg_type_number_t count = THREAD_IDENTIFIER_INFO_COUNT; - return ::thread_info (mach_port_number, THREAD_IDENTIFIER_INFO, (thread_info_t)ident_info, &count) == KERN_SUCCESS; -} - -void -NativeThreadListDarwin::DumpThreadStoppedReason (nub_thread_t tid) const -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - thread_sp->GetStopException().DumpStopReason(); -} - -const char * -NativeThreadListDarwin::GetThreadInfo (nub_thread_t tid) const -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->GetBasicInfoAsString(); - return NULL; -} - -#endif - -NativeThreadDarwinSP -NativeThreadListDarwin::GetThreadByID(lldb::tid_t tid) const { - std::lock_guard locker(m_threads_mutex); - for (auto thread_sp : m_threads) { - if (thread_sp && (thread_sp->GetID() == tid)) - return thread_sp; - } - return NativeThreadDarwinSP(); -} - -NativeThreadDarwinSP NativeThreadListDarwin::GetThreadByMachPortNumber( - ::thread_t mach_port_number) const { - std::lock_guard locker(m_threads_mutex); - for (auto thread_sp : m_threads) { - if (thread_sp && (thread_sp->GetMachPortNumber() == mach_port_number)) - return thread_sp; - } - return NativeThreadDarwinSP(); -} - -lldb::tid_t NativeThreadListDarwin::GetThreadIDByMachPortNumber( - ::thread_t mach_port_number) const { - std::lock_guard locker(m_threads_mutex); - for (auto thread_sp : m_threads) { - if (thread_sp && (thread_sp->GetMachPortNumber() == mach_port_number)) - return thread_sp->GetID(); - } - return LLDB_INVALID_THREAD_ID; -} - -// TODO implement -#if 0 -thread_t -NativeThreadListDarwin::GetMachPortNumberByThreadID (nub_thread_t globally_unique_id) const -{ - PTHREAD_MUTEX_LOCKER (locker, m_threads_mutex); - MachThreadSP thread_sp; - const size_t num_threads = m_threads.size(); - for (size_t idx = 0; idx < num_threads; ++idx) - { - if (m_threads[idx]->ThreadID() == globally_unique_id) - { - return m_threads[idx]->MachPortNumber(); - } - } - return 0; -} - -bool -NativeThreadListDarwin::GetRegisterValue (nub_thread_t tid, uint32_t set, uint32_t reg, DNBRegisterValue *reg_value ) const -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->GetRegisterValue(set, reg, reg_value); - - return false; -} - -bool -NativeThreadListDarwin::SetRegisterValue (nub_thread_t tid, uint32_t set, uint32_t reg, const DNBRegisterValue *reg_value ) const -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->SetRegisterValue(set, reg, reg_value); - - return false; -} - -nub_size_t -NativeThreadListDarwin::GetRegisterContext (nub_thread_t tid, void *buf, size_t buf_len) -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->GetRegisterContext (buf, buf_len); - return 0; -} - -nub_size_t -NativeThreadListDarwin::SetRegisterContext (nub_thread_t tid, const void *buf, size_t buf_len) -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->SetRegisterContext (buf, buf_len); - return 0; -} - -uint32_t -NativeThreadListDarwin::SaveRegisterState (nub_thread_t tid) -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->SaveRegisterState (); - return 0; -} - -bool -NativeThreadListDarwin::RestoreRegisterState (nub_thread_t tid, uint32_t save_id) -{ - MachThreadSP thread_sp (GetThreadByID (tid)); - if (thread_sp) - return thread_sp->RestoreRegisterState (save_id); - return 0; -} -#endif - -size_t NativeThreadListDarwin::GetNumberOfThreads() const { - std::lock_guard locker(m_threads_mutex); - return static_cast(m_threads.size()); -} - -// TODO implement -#if 0 -nub_thread_t -NativeThreadListDarwin::ThreadIDAtIndex (nub_size_t idx) const -{ - PTHREAD_MUTEX_LOCKER (locker, m_threads_mutex); - if (idx < m_threads.size()) - return m_threads[idx]->ThreadID(); - return INVALID_NUB_THREAD; -} - -nub_thread_t -NativeThreadListDarwin::CurrentThreadID ( ) -{ - MachThreadSP thread_sp; - CurrentThread(thread_sp); - if (thread_sp.get()) - return thread_sp->ThreadID(); - return INVALID_NUB_THREAD; -} - -#endif - -bool NativeThreadListDarwin::NotifyException(MachException::Data &exc) { - auto thread_sp = GetThreadByMachPortNumber(exc.thread_port); - if (thread_sp) { - thread_sp->NotifyException(exc); - return true; - } - return false; -} - -void NativeThreadListDarwin::Clear() { - std::lock_guard locker(m_threads_mutex); - m_threads.clear(); -} - -uint32_t NativeThreadListDarwin::UpdateThreadList(NativeProcessDarwin &process, - bool update, - collection *new_threads) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_THREAD)); - - std::lock_guard locker(m_threads_mutex); - LLDB_LOGF(log, - "NativeThreadListDarwin::%s() (pid = %" PRIu64 ", update = " - "%u) process stop count = %u", - __FUNCTION__, process.GetID(), update, process.GetStopID()); - - if (process.GetStopID() == 0) { - // On our first stop, we'll record details like 32/64 bitness and select - // the proper architecture implementation. - // - int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, (int)process.GetID()}; - - struct kinfo_proc processInfo; - size_t bufsize = sizeof(processInfo); - if ((sysctl(mib, (unsigned)(sizeof(mib) / sizeof(int)), &processInfo, - &bufsize, NULL, 0) == 0) && - (bufsize > 0)) { - if (processInfo.kp_proc.p_flag & P_LP64) - m_is_64_bit = true; - } - -// TODO implement architecture selection and abstraction. -#if 0 -#if defined(__i386__) || defined(__x86_64__) - if (m_is_64_bit) - DNBArchProtocol::SetArchitecture(CPU_TYPE_X86_64); - else - DNBArchProtocol::SetArchitecture(CPU_TYPE_I386); -#elif defined(__arm__) || defined(__arm64__) || defined(__aarch64__) - if (m_is_64_bit) - DNBArchProtocol::SetArchitecture(CPU_TYPE_ARM64); - else - DNBArchProtocol::SetArchitecture(CPU_TYPE_ARM); -#endif -#endif - } - - if (m_threads.empty() || update) { - thread_array_t thread_list = nullptr; - mach_msg_type_number_t thread_list_count = 0; - task_t task = process.GetTask(); - - Status error; - auto mach_err = ::task_threads(task, &thread_list, &thread_list_count); - error.SetError(mach_err, eErrorTypeMachKernel); - if (error.Fail()) { - LLDB_LOGF(log, - "::task_threads(task = 0x%4.4x, thread_list => %p, " - "thread_list_count => %u) failed: %u (%s)", - task, thread_list, thread_list_count, error.GetError(), - error.AsCString()); - return 0; - } - - if (thread_list_count > 0) { - collection currThreads; - size_t idx; - // Iterator through the current thread list and see which threads we - // already have in our list (keep them), which ones we don't (add them), - // and which ones are not around anymore (remove them). - for (idx = 0; idx < thread_list_count; ++idx) { - // Get the Mach thread port. - const ::thread_t mach_port_num = thread_list[idx]; - - // Get the unique thread id for the mach port number. - uint64_t unique_thread_id = - NativeThreadDarwin::GetGloballyUniqueThreadIDForMachPortID( - mach_port_num); - - // Retrieve the thread if it exists. - auto thread_sp = GetThreadByID(unique_thread_id); - if (thread_sp) { - // We are already tracking it. Keep the existing native thread - // instance. - currThreads.push_back(thread_sp); - } else { - // We don't have a native thread instance for this thread. Create it - // now. - thread_sp.reset(new NativeThreadDarwin( - &process, m_is_64_bit, unique_thread_id, mach_port_num)); - - // Add the new thread regardless of its is user ready state. Make - // sure the thread is ready to be displayed and shown to users before - // we add this thread to our list... - if (thread_sp->IsUserReady()) { - if (new_threads) - new_threads->push_back(thread_sp); - - currThreads.push_back(thread_sp); - } - } - } - - m_threads.swap(currThreads); - m_current_thread.reset(); - - // Free the vm memory given to us by ::task_threads() - vm_size_t thread_list_size = - (vm_size_t)(thread_list_count * sizeof(::thread_t)); - ::vm_deallocate(::mach_task_self(), (vm_address_t)thread_list, - thread_list_size); - } - } - return static_cast(m_threads.size()); -} - -// TODO implement -#if 0 - -void -NativeThreadListDarwin::CurrentThread (MachThreadSP& thread_sp) -{ - // locker will keep a mutex locked until it goes out of scope - PTHREAD_MUTEX_LOCKER (locker, m_threads_mutex); - if (m_current_thread.get() == NULL) - { - // Figure out which thread is going to be our current thread. This is - // currently done by finding the first thread in the list that has a - // valid exception. - const size_t num_threads = m_threads.size(); - for (uint32_t idx = 0; idx < num_threads; ++idx) - { - if (m_threads[idx]->GetStopException().IsValid()) - { - m_current_thread = m_threads[idx]; - break; - } - } - } - thread_sp = m_current_thread; -} - -#endif - -void NativeThreadListDarwin::Dump(Stream &stream) const { - bool first = true; - - std::lock_guard locker(m_threads_mutex); - for (auto thread_sp : m_threads) { - if (thread_sp) { - // Handle newlines between thread entries. - if (first) - first = false; - else - stream.PutChar('\n'); - thread_sp->Dump(stream); - } - } -} - -void NativeThreadListDarwin::ProcessWillResume( - NativeProcessDarwin &process, const ResumeActionList &thread_actions) { - std::lock_guard locker(m_threads_mutex); - - // Update our thread list, because sometimes libdispatch or the kernel will - // spawn threads while a task is suspended. - NativeThreadListDarwin::collection new_threads; - -// TODO implement this. -#if 0 - // First figure out if we were planning on running only one thread, and if - // so, force that thread to resume. - bool run_one_thread; - thread_t solo_thread = THREAD_NULL; - if ((thread_actions.GetSize() > 0) && - (thread_actions.NumActionsWithState(eStateStepping) + - thread_actions.NumActionsWithState (eStateRunning) == 1)) - { - run_one_thread = true; - const DNBThreadResumeAction *action_ptr = thread_actions.GetFirst(); - size_t num_actions = thread_actions.GetSize(); - for (size_t i = 0; i < num_actions; i++, action_ptr++) - { - if (action_ptr->state == eStateStepping || action_ptr->state == eStateRunning) - { - solo_thread = action_ptr->tid; - break; - } - } - } - else - run_one_thread = false; -#endif - - UpdateThreadList(process, true, &new_threads); - -#if 0 - DNBThreadResumeAction resume_new_threads = { -1U, eStateRunning, 0, INVALID_NUB_ADDRESS }; - // If we are planning to run only one thread, any new threads should be - // suspended. - if (run_one_thread) - resume_new_threads.state = eStateSuspended; - - const size_t num_new_threads = new_threads.size(); - const size_t num_threads = m_threads.size(); - for (uint32_t idx = 0; idx < num_threads; ++idx) - { - MachThread *thread = m_threads[idx].get(); - bool handled = false; - for (uint32_t new_idx = 0; new_idx < num_new_threads; ++new_idx) - { - if (thread == new_threads[new_idx].get()) - { - thread->ThreadWillResume(&resume_new_threads); - handled = true; - break; - } - } - - if (!handled) - { - const DNBThreadResumeAction *thread_action = thread_actions.GetActionForThread (thread->ThreadID(), true); - // There must always be a thread action for every thread. - assert (thread_action); - bool others_stopped = false; - if (solo_thread == thread->ThreadID()) - others_stopped = true; - thread->ThreadWillResume (thread_action, others_stopped); - } - } - - if (new_threads.size()) - { - for (uint32_t idx = 0; idx < num_new_threads; ++idx) - { - DNBLogThreadedIf (LOG_THREAD, "NativeThreadListDarwin::ProcessWillResume (pid = %4.4x) stop-id=%u, resuming newly discovered thread: 0x%8.8" PRIx64 ", thread-is-user-ready=%i)", - process->ProcessID(), - process->StopCount(), - new_threads[idx]->ThreadID(), - new_threads[idx]->IsUserReady()); - } - } -#endif -} - -uint32_t NativeThreadListDarwin::ProcessDidStop(NativeProcessDarwin &process) { - std::lock_guard locker(m_threads_mutex); - - // Update our thread list. - UpdateThreadList(process, true); - - for (auto thread_sp : m_threads) { - if (thread_sp) - thread_sp->ThreadDidStop(); - } - return (uint32_t)m_threads.size(); -} - -// Check each thread in our thread list to see if we should notify our client -// of the current halt in execution. -// -// Breakpoints can have callback functions associated with them than can return -// true to stop, or false to continue executing the inferior. -// -// RETURNS -// true if we should stop and notify our clients -// false if we should resume our child process and skip notification -bool NativeThreadListDarwin::ShouldStop(bool &step_more) { - std::lock_guard locker(m_threads_mutex); - for (auto thread_sp : m_threads) { - if (thread_sp && thread_sp->ShouldStop(step_more)) - return true; - } - return false; -} - -// Implement. -#if 0 - -void -NativeThreadListDarwin::NotifyBreakpointChanged (const DNBBreakpoint *bp) -{ - PTHREAD_MUTEX_LOCKER (locker, m_threads_mutex); - const size_t num_threads = m_threads.size(); - for (uint32_t idx = 0; idx < num_threads; ++idx) - { - m_threads[idx]->NotifyBreakpointChanged(bp); - } -} - - -uint32_t -NativeThreadListDarwin::EnableHardwareBreakpoint (const DNBBreakpoint* bp) const -{ - if (bp != NULL) - { - const size_t num_threads = m_threads.size(); - for (uint32_t idx = 0; idx < num_threads; ++idx) - m_threads[idx]->EnableHardwareBreakpoint(bp); - } - return INVALID_NUB_HW_INDEX; -} - -bool -NativeThreadListDarwin::DisableHardwareBreakpoint (const DNBBreakpoint* bp) const -{ - if (bp != NULL) - { - const size_t num_threads = m_threads.size(); - for (uint32_t idx = 0; idx < num_threads; ++idx) - m_threads[idx]->DisableHardwareBreakpoint(bp); - } - return false; -} - -// DNBWatchpointSet() -> MachProcess::CreateWatchpoint() -> -// MachProcess::EnableWatchpoint() -> -// NativeThreadListDarwin::EnableHardwareWatchpoint(). -uint32_t -NativeThreadListDarwin::EnableHardwareWatchpoint (const DNBBreakpoint* wp) const -{ - uint32_t hw_index = INVALID_NUB_HW_INDEX; - if (wp != NULL) - { - PTHREAD_MUTEX_LOCKER (locker, m_threads_mutex); - const size_t num_threads = m_threads.size(); - // On Mac OS X we have to prime the control registers for new threads. - // We do this using the control register data for the first thread, for - // lack of a better way of choosing. - bool also_set_on_task = true; - for (uint32_t idx = 0; idx < num_threads; ++idx) - { - if ((hw_index = m_threads[idx]->EnableHardwareWatchpoint(wp, also_set_on_task)) == INVALID_NUB_HW_INDEX) - { - // We know that idx failed for some reason. Let's rollback the - // transaction for [0, idx). - for (uint32_t i = 0; i < idx; ++i) - m_threads[i]->RollbackTransForHWP(); - return INVALID_NUB_HW_INDEX; - } - also_set_on_task = false; - } - // Notify each thread to commit the pending transaction. - for (uint32_t idx = 0; idx < num_threads; ++idx) - m_threads[idx]->FinishTransForHWP(); - - } - return hw_index; -} - -bool -NativeThreadListDarwin::DisableHardwareWatchpoint (const DNBBreakpoint* wp) const -{ - if (wp != NULL) - { - PTHREAD_MUTEX_LOCKER (locker, m_threads_mutex); - const size_t num_threads = m_threads.size(); - - // On Mac OS X we have to prime the control registers for new threads. - // We do this using the control register data for the first thread, for - // lack of a better way of choosing. - bool also_set_on_task = true; - for (uint32_t idx = 0; idx < num_threads; ++idx) - { - if (!m_threads[idx]->DisableHardwareWatchpoint(wp, also_set_on_task)) - { - // We know that idx failed for some reason. Let's rollback the - // transaction for [0, idx). - for (uint32_t i = 0; i < idx; ++i) - m_threads[i]->RollbackTransForHWP(); - return false; - } - also_set_on_task = false; - } - // Notify each thread to commit the pending transaction. - for (uint32_t idx = 0; idx < num_threads; ++idx) - m_threads[idx]->FinishTransForHWP(); - - return true; - } - return false; -} - -uint32_t -NativeThreadListDarwin::NumSupportedHardwareWatchpoints () const -{ - PTHREAD_MUTEX_LOCKER (locker, m_threads_mutex); - const size_t num_threads = m_threads.size(); - // Use an arbitrary thread to retrieve the number of supported hardware - // watchpoints. - if (num_threads) - return m_threads[0]->NumSupportedHardwareWatchpoints(); - return 0; -} - -uint32_t -NativeThreadListDarwin::GetThreadIndexForThreadStoppedWithSignal (const int signo) const -{ - PTHREAD_MUTEX_LOCKER (locker, m_threads_mutex); - uint32_t should_stop = false; - const size_t num_threads = m_threads.size(); - for (uint32_t idx = 0; !should_stop && idx < num_threads; ++idx) - { - if (m_threads[idx]->GetStopException().SoftSignal () == signo) - return idx; - } - return UINT32_MAX; -} - -#endif diff --git a/lldb/source/Plugins/Process/Darwin/NativeThreadListDarwin.h b/lldb/source/Plugins/Process/Darwin/NativeThreadListDarwin.h deleted file mode 100644 index 9ab0a7c8c8023..0000000000000 --- a/lldb/source/Plugins/Process/Darwin/NativeThreadListDarwin.h +++ /dev/null @@ -1,138 +0,0 @@ -//===-- NativeThreadListDarwin.h --------------------------------------*- C++ -//-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Created by Greg Clayton on 6/19/07. -// -//===----------------------------------------------------------------------===// - -#ifndef __NativeThreadListDarwin_h__ -#define __NativeThreadListDarwin_h__ - -#include -#include -#include - -#include "lldb/lldb-private-forward.h" -#include "lldb/lldb-types.h" - -#include "MachException.h" - -// #include "ThreadInfo.h" - -namespace lldb_private { -namespace process_darwin { - -class NativeBreakpointDarwin; -class NativeProcessDarwin; - -class NativeThreadDarwin; -using NativeThreadDarwinSP = std::shared_ptr; - -class NativeThreadListDarwin { -public: - NativeThreadListDarwin(); - ~NativeThreadListDarwin(); - - void Clear(); - - void Dump(Stream &stream) const; - -// These methods will be accessed directly from NativeThreadDarwin -#if 0 - bool GetRegisterValue (nub_thread_t tid, uint32_t set, uint32_t reg, DNBRegisterValue *reg_value) const; - bool SetRegisterValue (nub_thread_t tid, uint32_t set, uint32_t reg, const DNBRegisterValue *reg_value) const; - nub_size_t GetRegisterContext (nub_thread_t tid, void *buf, size_t buf_len); - nub_size_t SetRegisterContext (nub_thread_t tid, const void *buf, size_t buf_len); - uint32_t SaveRegisterState (nub_thread_t tid); - bool RestoreRegisterState (nub_thread_t tid, uint32_t save_id); -#endif - - const char *GetThreadInfo(lldb::tid_t tid) const; - - void ProcessWillResume(NativeProcessDarwin &process, - const ResumeActionList &thread_actions); - - uint32_t ProcessDidStop(NativeProcessDarwin &process); - - bool NotifyException(MachException::Data &exc); - - bool ShouldStop(bool &step_more); - -// These methods will be accessed directly from NativeThreadDarwin -#if 0 - const char * GetName (nub_thread_t tid); - nub_state_t GetState (nub_thread_t tid); - nub_thread_t SetCurrentThread (nub_thread_t tid); -#endif - -// TODO: figure out if we need to add this to NativeThreadDarwin yet. -#if 0 - ThreadInfo::QoS GetRequestedQoS (nub_thread_t tid, nub_addr_t tsd, uint64_t dti_qos_class_index); - nub_addr_t GetPThreadT (nub_thread_t tid); - nub_addr_t GetDispatchQueueT (nub_thread_t tid); - nub_addr_t GetTSDAddressForThread (nub_thread_t tid, uint64_t plo_pthread_tsd_base_address_offset, uint64_t plo_pthread_tsd_base_offset, uint64_t plo_pthread_tsd_entry_size); -#endif - -// These methods will be accessed directly from NativeThreadDarwin -#if 0 - bool GetThreadStoppedReason (nub_thread_t tid, struct DNBThreadStopInfo *stop_info) const; - void DumpThreadStoppedReason (nub_thread_t tid) const; - bool GetIdentifierInfo (nub_thread_t tid, thread_identifier_info_data_t *ident_info); -#endif - - size_t GetNumberOfThreads() const; - - lldb::tid_t ThreadIDAtIndex(size_t idx) const; - - lldb::tid_t GetCurrentThreadID(); - - NativeThreadDarwinSP GetCurrentThread(); - - void NotifyBreakpointChanged(const NativeBreakpointDarwin *bp); - - uint32_t EnableHardwareBreakpoint(const NativeBreakpointDarwin *bp) const; - - bool DisableHardwareBreakpoint(const NativeBreakpointDarwin *bp) const; - - uint32_t EnableHardwareWatchpoint(const NativeBreakpointDarwin *wp) const; - - bool DisableHardwareWatchpoint(const NativeBreakpointDarwin *wp) const; - - uint32_t GetNumberOfSupportedHardwareWatchpoints() const; - - size_t GetThreadIndexForThreadStoppedWithSignal(const int signo) const; - - NativeThreadDarwinSP GetThreadByID(lldb::tid_t tid) const; - - NativeThreadDarwinSP - GetThreadByMachPortNumber(::thread_t mach_port_number) const; - - lldb::tid_t GetThreadIDByMachPortNumber(::thread_t mach_port_number) const; - - thread_t GetMachPortNumberByThreadID(lldb::tid_t globally_unique_id) const; - -protected: - typedef std::vector collection; - typedef collection::iterator iterator; - typedef collection::const_iterator const_iterator; - - // Consider having this return an lldb_private::Status. - uint32_t UpdateThreadList(NativeProcessDarwin &process, bool update, - collection *num_threads = nullptr); - - collection m_threads; - mutable std::recursive_mutex m_threads_mutex; - NativeThreadDarwinSP m_current_thread; - bool m_is_64_bit; -}; - -} // namespace process_darwin -} // namespace lldb_private - -#endif // #ifndef __NativeThreadListDarwin_h__ diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp index 310c5e142ef3c..b76f26a584c04 100644 --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp @@ -44,10 +44,12 @@ bool RegisterContextCorePOSIX_arm::WriteFPR() { bool RegisterContextCorePOSIX_arm::ReadRegister(const RegisterInfo *reg_info, RegisterValue &value) { lldb::offset_t offset = reg_info->byte_offset; - uint64_t v = m_gpr.GetMaxU64(&offset, reg_info->byte_size); - if (offset == reg_info->byte_offset + reg_info->byte_size) { - value = v; - return true; + if (offset + reg_info->byte_size <= GetGPRSize()) { + uint64_t v = m_gpr.GetMaxU64(&offset, reg_info->byte_size); + if (offset == reg_info->byte_offset + reg_info->byte_size) { + value = v; + return true; + } } return false; } diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp index 3e176b6eeff96..95c419b053fa9 100644 --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp @@ -24,6 +24,9 @@ RegisterContextCorePOSIX_arm64::RegisterContextCorePOSIX_arm64( gpregset.GetByteSize()); m_gpr.SetData(m_gpr_buffer); m_gpr.SetByteOrder(gpregset.GetByteOrder()); + + m_fpregset = getRegset( + notes, register_info->GetTargetArchitecture().GetTriple(), FPR_Desc); } RegisterContextCorePOSIX_arm64::~RegisterContextCorePOSIX_arm64() {} @@ -45,11 +48,26 @@ bool RegisterContextCorePOSIX_arm64::WriteFPR() { bool RegisterContextCorePOSIX_arm64::ReadRegister(const RegisterInfo *reg_info, RegisterValue &value) { lldb::offset_t offset = reg_info->byte_offset; - uint64_t v = m_gpr.GetMaxU64(&offset, reg_info->byte_size); - if (offset == reg_info->byte_offset + reg_info->byte_size) { - value = v; - return true; + if (offset + reg_info->byte_size <= GetGPRSize()) { + uint64_t v = m_gpr.GetMaxU64(&offset, reg_info->byte_size); + if (offset == reg_info->byte_offset + reg_info->byte_size) { + value = v; + return true; + } } + + const uint32_t reg = reg_info->kinds[lldb::eRegisterKindLLDB]; + if (reg == LLDB_INVALID_REGNUM) + return false; + + offset -= GetGPRSize(); + if (IsFPR(reg) && offset + reg_info->byte_size <= sizeof(FPU)) { + Status error; + value.SetFromMemoryData(reg_info, m_fpregset.GetDataStart() + offset, + reg_info->byte_size, lldb::eByteOrderLittle, error); + return error.Success(); + } + return false; } diff --git a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h index 7c35d89c4f137..5bbcdf5677f6f 100644 --- a/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h +++ b/lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h @@ -48,6 +48,7 @@ class RegisterContextCorePOSIX_arm64 : public RegisterContextPOSIX_arm64 { private: lldb::DataBufferSP m_gpr_buffer; lldb_private::DataExtractor m_gpr; + lldb_private::DataExtractor m_fpregset; }; #endif // LLDB_SOURCE_PLUGINS_PROCESS_ELF_CORE_REGISTERCONTEXTPOSIXCORE_ARM64_H diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp index 7d6cb2a3484b9..ae2f4bd041c9b 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp @@ -2040,7 +2040,7 @@ GDBRemoteCommunicationServerLLGS::Handle_P(StringExtractorGDBRemote &packet) { packet, "P packet missing '=' char after register number"); // Parse out the value. - uint8_t reg_bytes[32]; // big enough to support up to 256 bit ymmN register + uint8_t reg_bytes[RegisterValue::kMaxRegisterByteSize]; size_t reg_size = packet.GetHexBytesAvail(reg_bytes); // Get the thread to use. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp index 3b5224cae7b2e..7b96c15bf3f9d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp @@ -395,145 +395,6 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges( return !ranges.IsEmpty(); } -// Dump -// -// Dumps a debug information entry and all of it's attributes to the specified -// stream. -void DWARFDebugInfoEntry::Dump(const DWARFUnit *cu, Stream &s, - uint32_t recurse_depth) const { - const DWARFDataExtractor &data = cu->GetData(); - lldb::offset_t offset = m_offset; - - if (data.ValidOffset(offset)) { - dw_uleb128_t abbrCode = data.GetULEB128(&offset); - - s.Printf("\n0x%8.8x: ", m_offset); - s.Indent(); - if (abbrCode != m_abbr_idx) { - s.Printf("error: DWARF has been modified\n"); - } else if (abbrCode) { - const auto *abbrevDecl = GetAbbreviationDeclarationPtr(cu); - if (abbrevDecl) { - s.PutCString(DW_TAG_value_to_name(abbrevDecl->Tag())); - s.Printf(" [%u] %c\n", abbrCode, abbrevDecl->HasChildren() ? '*' : ' '); - - // Dump all data in the .debug_info/.debug_types for the attributes - const uint32_t numAttributes = abbrevDecl->NumAttributes(); - for (uint32_t i = 0; i < numAttributes; ++i) { - DWARFFormValue form_value(cu); - dw_attr_t attr; - abbrevDecl->GetAttrAndFormValueByIndex(i, attr, form_value); - - DumpAttribute(cu, data, &offset, s, attr, form_value); - } - - const DWARFDebugInfoEntry *child = GetFirstChild(); - if (recurse_depth > 0 && child) { - s.IndentMore(); - - while (child) { - child->Dump(cu, s, recurse_depth - 1); - child = child->GetSibling(); - } - s.IndentLess(); - } - } else - s.Printf("Abbreviation code note found in 'debug_abbrev' class for " - "code: %u\n", - abbrCode); - } else { - s.Printf("NULL\n"); - } - } -} - -// DumpAttribute -// -// Dumps a debug information entry attribute along with it's form. Any special -// display of attributes is done (disassemble location lists, show enumeration -// values for attributes, etc). -void DWARFDebugInfoEntry::DumpAttribute( - const DWARFUnit *cu, const DWARFDataExtractor &data, - lldb::offset_t *offset_ptr, Stream &s, dw_attr_t attr, - DWARFFormValue &form_value) { - bool show_form = s.GetFlags().Test(DWARFDebugInfo::eDumpFlag_ShowForm); - - s.Printf(" "); - s.Indent(DW_AT_value_to_name(attr)); - - if (show_form) { - s.Printf("[%s", DW_FORM_value_to_name(form_value.Form())); - } - - if (!form_value.ExtractValue(data, offset_ptr)) - return; - - if (show_form) { - if (form_value.Form() == DW_FORM_indirect) { - s.Printf(" [%s]", DW_FORM_value_to_name(form_value.Form())); - } - - s.PutCString("] "); - } - - s.PutCString("( "); - - // Check to see if we have any special attribute formatters - switch (attr) { - case DW_AT_stmt_list: - s.Printf("0x%8.8" PRIx64, form_value.Unsigned()); - break; - - case DW_AT_language: - s.PutCString(DW_LANG_value_to_name(form_value.Unsigned())); - break; - - case DW_AT_encoding: - s.PutCString(DW_ATE_value_to_name(form_value.Unsigned())); - break; - - case DW_AT_frame_base: - case DW_AT_location: - case DW_AT_data_member_location: { - const uint8_t *blockData = form_value.BlockData(); - if (blockData) { - // Location description is inlined in data in the form value - DWARFDataExtractor locationData(data, - (*offset_ptr) - form_value.Unsigned(), - form_value.Unsigned()); - DWARFExpression::PrintDWARFExpression( - s, locationData, DWARFUnit::GetAddressByteSize(cu), 4, false); - } else { - // We have a location list offset as the value that is the offset into - // the .debug_loc section that describes the value over it's lifetime - uint64_t debug_loc_offset = form_value.Unsigned(); - DWARFExpression::PrintDWARFLocationList(s, cu, cu->GetLocationData(), - debug_loc_offset); - } - } break; - - case DW_AT_abstract_origin: - case DW_AT_specification: { - DWARFDIE abstract_die = form_value.Reference(); - form_value.Dump(s); - // *ostrm_ptr << HEX32 << abstract_die.GetOffset() << " ( "; - abstract_die.GetName(s); - } break; - - case DW_AT_type: { - DWARFDIE type_die = form_value.Reference(); - s.PutCString(" ( "); - type_die.AppendTypeName(s); - s.PutCString(" )"); - } break; - - default: - break; - } - - s.PutCString(" )\n"); -} - // Get all attribute values for a given DIE, including following any // specification or abstract origin attributes and including those in the // results. Any duplicate attributes will have the first instance take diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h index ca2dbd8a6bc0c..3fb9c9135e85f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h @@ -99,15 +99,6 @@ class DWARFDebugInfoEntry { const char *GetQualifiedName(DWARFUnit *cu, const DWARFAttributes &attributes, std::string &storage) const; - void Dump(const DWARFUnit *cu, lldb_private::Stream &s, - uint32_t recurse_depth) const; - - static void - DumpAttribute(const DWARFUnit *cu, - const lldb_private::DWARFDataExtractor &data, - lldb::offset_t *offset_ptr, lldb_private::Stream &s, - dw_attr_t attr, DWARFFormValue &form_value); - bool GetDIENamesAndRanges( DWARFUnit *cu, const char *&name, const char *&mangled, DWARFRangeList &rangeList, int &decl_file, int &decl_line, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 5eebc9657905f..ca7710d8ab143 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1450,17 +1450,20 @@ Type *SymbolFileDWARF::ResolveType(const DWARFDIE &die, CompileUnit * SymbolFileDWARF::GetCompUnitForDWARFCompUnit(DWARFCompileUnit &dwarf_cu) { - DWARFCompileUnit *non_dwo_cu = - dwarf_cu.IsDWOUnit() - ? static_cast(dwarf_cu.GetUserData()) - : &dwarf_cu; + if (dwarf_cu.IsDWOUnit()) { + DWARFCompileUnit *non_dwo_cu = + static_cast(dwarf_cu.GetUserData()); + assert(non_dwo_cu); + return non_dwo_cu->GetSymbolFileDWARF().GetCompUnitForDWARFCompUnit( + *non_dwo_cu); + } // Check if the symbol vendor already knows about this compile unit? - if (non_dwo_cu->GetUserData() == nullptr) { + if (dwarf_cu.GetUserData() == nullptr) { // The symbol vendor doesn't know about this compile unit, we need to parse // and add it to the symbol vendor object. - return ParseCompileUnit(*non_dwo_cu).get(); + return ParseCompileUnit(dwarf_cu).get(); } - return static_cast(non_dwo_cu->GetUserData()); + return static_cast(dwarf_cu.GetUserData()); } void SymbolFileDWARF::GetObjCMethods( diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 7acdad2924fbb..de1ce9d36b040 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -1563,15 +1563,7 @@ TypeSystemClang::CreateClassTemplateSpecializationDecl( ast.getTypeDeclType(class_template_specialization_decl, nullptr); class_template_specialization_decl->setDeclName( class_template_decl->getDeclName()); - // FIXME: Turning this on breaks the libcxx data formatter tests. - // SetOwningModule marks the Decl as external, which prevents a - // LookupPtr from being built. Template instantiations can also not - // be found by ExternalASTSource::FindExternalVisibleDeclsByName(), - // nor can we lazily build a LookupPtr later, because template - // specializations are supposed to be hidden so - // makeDeclVisibleInContextWithFlags() is a noop, as well. - // - // SetOwningModule(class_template_specialization_decl, owning_module); + SetOwningModule(class_template_specialization_decl, owning_module); decl_ctx->addDecl(class_template_specialization_decl); class_template_specialization_decl->setSpecializationKind( diff --git a/lldb/source/Target/Language.cpp b/lldb/source/Target/Language.cpp index 2dabcccf4ce1a..b1a8a9517f3fc 100644 --- a/lldb/source/Target/Language.cpp +++ b/lldb/source/Target/Language.cpp @@ -139,13 +139,6 @@ Language::GetPossibleFormattersMatches(ValueObject &valobj, return {}; } -lldb_private::formatters::StringPrinter::EscapingHelper -Language::GetStringPrinterEscapingHelper( - lldb_private::formatters::StringPrinter::GetPrintableElementType - elem_type) { - return StringPrinter::GetDefaultEscapingHelper(elem_type); -} - struct language_name_pair { const char *name; LanguageType type; diff --git a/lldb/source/Utility/RegisterValue.cpp b/lldb/source/Utility/RegisterValue.cpp index bb56ade716630..91f4025c923cd 100644 --- a/lldb/source/Utility/RegisterValue.cpp +++ b/lldb/source/Utility/RegisterValue.cpp @@ -810,7 +810,7 @@ bool RegisterValue::operator==(const RegisterValue &rhs) const { if (buffer.length != rhs.buffer.length) return false; else { - uint8_t length = buffer.length; + uint16_t length = buffer.length; if (length > kMaxRegisterByteSize) length = kMaxRegisterByteSize; return memcmp(buffer.bytes, rhs.buffer.bytes, length) == 0; diff --git a/lldb/test/API/commands/target/basic/TestTargetCommand.py b/lldb/test/API/commands/target/basic/TestTargetCommand.py index 2a5978c9844b3..2704e0ed25ad1 100644 --- a/lldb/test/API/commands/target/basic/TestTargetCommand.py +++ b/lldb/test/API/commands/target/basic/TestTargetCommand.py @@ -326,7 +326,7 @@ def test_target_create_multiple_args(self): @no_debug_info_test def test_target_create_nonexistent_core_file(self): self.expect("target create -c doesntexist", error=True, - substrs=["core file 'doesntexist' doesn't exist"]) + patterns=["Cannot open 'doesntexist'", ": (No such file or directory|The system cannot find the file specified)"]) # Write only files don't seem to be supported on Windows. @skipIfWindows @@ -335,12 +335,12 @@ def test_target_create_unreadable_core_file(self): tf = tempfile.NamedTemporaryFile() os.chmod(tf.name, stat.S_IWRITE) self.expect("target create -c '" + tf.name + "'", error=True, - substrs=["core file '", "' is not readable"]) + substrs=["Cannot open '", "': Permission denied"]) @no_debug_info_test def test_target_create_nonexistent_sym_file(self): self.expect("target create -s doesntexist doesntexisteither", error=True, - substrs=["invalid symbol file path 'doesntexist'"]) + patterns=["Cannot open '", ": (No such file or directory|The system cannot find the file specified)"]) @skipIfWindows @no_debug_info_test @@ -357,7 +357,7 @@ def test_target_create_unreadable_sym_file(self): tf = tempfile.NamedTemporaryFile() os.chmod(tf.name, stat.S_IWRITE) self.expect("target create -s '" + tf.name + "' no_exe", error=True, - substrs=["symbol file '", "' is not readable"]) + substrs=["Cannot open '", "': Permission denied"]) @no_debug_info_test def test_target_delete_all(self): diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py index 52170082238a9..67a5dff6c2d52 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSContainer.py @@ -29,6 +29,8 @@ def nscontainers_data_formatter_commands(self): ' 2 key/value pairs', '(NSDictionary *) newDictionary = ', ' 12 key/value pairs', + '(NSDictionary *) nscfDictionary = ', + ' 4 key/value pairs', '(CFDictionaryRef) cfDictionaryRef = ', ' 3 key/value pairs', '(NSDictionary *) newMutableDictionary = ', @@ -39,6 +41,36 @@ def nscontainers_data_formatter_commands(self): ' @"11 elements"', ]) + self.expect( + 'frame variable -d run-target *nscfDictionary', + patterns=[ + '\(__NSCFDictionary\) \*nscfDictionary =', + 'key = 0x.* @"foo"', + 'value = 0x.* @"foo"', + 'key = 0x.* @"bar"', + 'value = 0x.* @"bar"', + 'key = 0x.* @"baz"', + 'value = 0x.* @"baz"', + 'key = 0x.* @"quux"', + 'value = 0x.* @"quux"', + ]) + + + self.expect( + 'frame var nscfSet', + substrs=[ + '(NSSet *) nscfSet = ', + '2 elements', + ]) + + self.expect( + 'frame variable -d run-target *nscfSet', + patterns=[ + '\(__NSCFSet\) \*nscfSet =', + '\[0\] = 0x.* @".*"', + '\[1\] = 0x.* @".*"', + ]) + self.expect( 'frame variable iset1 iset2 imset', substrs=['4 indexes', '512 indexes', '10 indexes']) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m index 92a616f191219..d3507608ac9d6 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m @@ -376,13 +376,19 @@ int main (int argc, const char * argv[]) [newMutableDictionary setObject:@"foo" forKey:@"bar19"]; [newMutableDictionary setObject:@"foo" forKey:@"bar20"]; - id cfKeys[2] = { @"foo", @"bar", @"baz", @"quux" }; - id cfValues[2] = { @"foo", @"bar", @"baz", @"quux" }; - NSDictionary *nsDictionary = CFBridgingRelease(CFDictionaryCreate(nil, (void *)cfKeys, (void *)cfValues, 2, nil, nil)); - CFDictionaryRef cfDictionaryRef = CFDictionaryCreate(nil, (void *)cfKeys, (void *)cfValues, 3, nil, nil); - - NSAttributedString* attrString = [[NSAttributedString alloc] initWithString:@"hello world from foo" attributes:newDictionary]; - [attrString isEqual:nil]; + id cfKeys[4] = {@"foo", @"bar", @"baz", @"quux"}; + id cfValues[4] = {@"foo", @"bar", @"baz", @"quux"}; + NSDictionary *nsDictionary = CFBridgingRelease(CFDictionaryCreate( + nil, (void *)cfKeys, (void *)cfValues, 2, nil, nil)); + NSDictionary *nscfDictionary = CFBridgingRelease(CFDictionaryCreate( + nil, (void *)cfKeys, (void *)cfValues, 4, nil, nil)); + CFDictionaryRef cfDictionaryRef = CFDictionaryCreate( + nil, (void *)cfKeys, (void *)cfValues, 3, nil, nil); + + NSAttributedString *attrString = [[NSAttributedString alloc] + initWithString:@"hello world from foo" + attributes:newDictionary]; + [attrString isEqual:nil]; NSAttributedString* mutableAttrString = [[NSMutableAttributedString alloc] initWithString:@"hello world from foo" attributes:newDictionary]; [mutableAttrString isEqual:nil]; @@ -411,9 +417,11 @@ int main (int argc, const char * argv[]) NSSet* nsset = [[NSSet alloc] initWithObjects:str1,str2,str3,nil]; NSSet *nsmutableset = [[NSMutableSet alloc] initWithObjects:str1,str2,str3,nil]; - [nsmutableset addObject:str4]; + [nsmutableset addObject:str4]; + NSSet *nscfSet = + CFBridgingRelease(CFSetCreate(nil, (void *)cfValues, 2, nil)); - CFDataRef data_ref = CFDataCreate(kCFAllocatorDefault, [immutableData bytes], 5); + CFDataRef data_ref = CFDataCreate(kCFAllocatorDefault, [immutableData bytes], 5); CFMutableDataRef mutable_data_ref = CFDataCreateMutable(kCFAllocatorDefault, 8); CFDataAppendBytes(mutable_data_ref, [mutableData bytes], 5); diff --git a/lldb/test/API/functionalities/exec/main.cpp b/lldb/test/API/functionalities/exec/main.cpp index bec470fd13ef1..51c67d5f232dc 100644 --- a/lldb/test/API/functionalities/exec/main.cpp +++ b/lldb/test/API/functionalities/exec/main.cpp @@ -12,7 +12,8 @@ int main(int argc, char const **argv) { std::string directory_name(::dirname(buf)); std::string other_program = directory_name + "/secondprog"; - execve(other_program.c_str(), const_cast(argv), nullptr); + argv[0] = other_program.c_str(); + execv(argv[0], const_cast(argv)); perror("execve"); abort(); } diff --git a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py index 435d3358b0308..63bb02e5eb60f 100644 --- a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py +++ b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py @@ -19,6 +19,7 @@ class LinuxCoreTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) + _aarch64_pid = 37688 _i386_pid = 32306 _x86_64_pid = 32259 _s390x_pid = 1045 @@ -27,12 +28,20 @@ class LinuxCoreTestCase(TestBase): _mips_o32_pid = 3532 _ppc64le_pid = 28147 + _aarch64_regions = 4 _i386_regions = 4 _x86_64_regions = 5 _s390x_regions = 2 _mips_regions = 5 _ppc64le_regions = 2 + + @skipIf(triple='^mips') + @skipIfLLVMTargetMissing("AArch64") + def test_aarch64(self): + """Test that lldb can read the process information from an aarch64 linux core file.""" + self.do_test("linux-aarch64", self._aarch64_pid, self._aarch64_regions, "a.out") + @skipIf(triple='^mips') @skipIfLLVMTargetMissing("X86") def test_i386(self): @@ -247,6 +256,61 @@ def test_x86_64_sysroot(self): self.dbg.DeleteTarget(target) + @skipIf(triple='^mips') + @skipIfLLVMTargetMissing("AArch64") + def test_aarch64_regs(self): + # check 64 bit ARM core files + target = self.dbg.CreateTarget(None) + self.assertTrue(target, VALID_TARGET) + process = target.LoadCore("linux-aarch64-neon.core") + + values = {} + values["x1"] = "0x000000000000002f" + values["w1"] = "0x0000002f" + values["fp"] = "0x0000007fc5dd7f20" + values["lr"] = "0x0000000000400180" + values["sp"] = "0x0000007fc5dd7f00" + values["pc"] = "0x000000000040014c" + values["v0"] = "{0x00 0x00 0x00 0x00 0x00 0x00 0xe0 0x3f 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + values["v1"] = "{0x00 0x00 0x00 0x00 0x00 0x00 0xf8 0x3f 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + values["v2"] = "{0x00 0x00 0x00 0x00 0x00 0x00 0x04 0x40 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + values["v3"] = "{0x00 0x00 0x00 0x00 0x00 0x00 0x0c 0x40 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + values["v4"] = "{0x00 0x00 0x90 0x40 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + values["v5"] = "{0x00 0x00 0xb0 0x40 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + values["v6"] = "{0x00 0x00 0xd0 0x40 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + values["v7"] = "{0x00 0x00 0xf0 0x40 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + values["v8"] = "{0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11}" + values["v27"] = "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + values["v28"] = "{0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00}" + values["v31"] = "{0x30 0x30 0x30 0x30 0x30 0x30 0x30 0x30 0x30 0x30 0x30 0x30 0x30 0x30 0x30 0x30}" + values["s2"] = "0" + values["s3"] = "0" + values["s4"] = "4.5" + values["s5"] = "5.5" + values["s6"] = "6.5" + values["s7"] = "7.5" + values["s8"] = "1.14437e-28" + values["s30"] = "0" + values["s31"] = "6.40969e-10" + values["d0"] = "0.5" + values["d1"] = "1.5" + values["d2"] = "2.5" + values["d3"] = "3.5" + values["d4"] = "5.35161536149201e-315" + values["d5"] = "5.36197666906508e-315" + values["d6"] = "5.37233797663815e-315" + values["d7"] = "5.38269928421123e-315" + values["d8"] = "1.80107573659442e-226" + values["d30"] = "0" + values["d31"] = "1.39804328609529e-76" + values["fpsr"] = "0x00000000" + values["fpcr"] = "0x00000000" + + for regname, value in values.items(): + self.expect("register read {}".format(regname), substrs=["{} = {}".format(regname, value)]) + + self.expect("register read --all") + @skipIf(triple='^mips') @skipIfLLVMTargetMissing("ARM") def test_arm_core(self): @@ -276,6 +340,8 @@ def test_arm_core(self): for regname, value in values.items(): self.expect("register read {}".format(regname), substrs=["{} = {}".format(regname, value)]) + self.expect("register read --all") + def check_memory_regions(self, process, region_count): region_list = process.GetMemoryRegions() self.assertEqual(region_list.GetSize(), region_count) diff --git a/lldb/test/API/functionalities/postmortem/elf-core/aarch64-neon.c b/lldb/test/API/functionalities/postmortem/elf-core/aarch64-neon.c new file mode 100644 index 0000000000000..e5742d2e44b72 --- /dev/null +++ b/lldb/test/API/functionalities/postmortem/elf-core/aarch64-neon.c @@ -0,0 +1,28 @@ +// compile with -march=armv8-a+sve on compatible aarch64 compiler +// linux-aarch64-sve.core was generated by: aarch64-linux-gnu-gcc-8 +// commandline: -march=armv8-a+sve -nostdlib -static -g linux-aarch64-sve.c +static void bar(char *boom) { + char F = 'b'; + asm volatile("fmov d0, #0.5\n\t"); + asm volatile("fmov d1, #1.5\n\t"); + asm volatile("fmov d2, #2.5\n\t"); + asm volatile("fmov d3, #3.5\n\t"); + asm volatile("fmov s4, #4.5\n\t"); + asm volatile("fmov s5, #5.5\n\t"); + asm volatile("fmov s6, #6.5\n\t"); + asm volatile("fmov s7, #7.5\n\t"); + asm volatile("movi v8.16b, #0x11\n\t"); + asm volatile("movi v31.16b, #0x30\n\t"); + + *boom = 47; // Frame bar +} + +static void foo(char *boom, void (*boomer)(char *)) { + char F = 'f'; + boomer(boom); // Frame foo +} + +void _start(void) { + char F = '_'; + foo(0, bar); // Frame _start +} diff --git a/lldb/test/API/functionalities/postmortem/elf-core/linux-aarch64-neon.core b/lldb/test/API/functionalities/postmortem/elf-core/linux-aarch64-neon.core new file mode 100644 index 0000000000000..5444c53a896a4 Binary files /dev/null and b/lldb/test/API/functionalities/postmortem/elf-core/linux-aarch64-neon.core differ diff --git a/lldb/test/API/functionalities/postmortem/elf-core/linux-aarch64.core b/lldb/test/API/functionalities/postmortem/elf-core/linux-aarch64.core new file mode 100644 index 0000000000000..f9211cab27789 Binary files /dev/null and b/lldb/test/API/functionalities/postmortem/elf-core/linux-aarch64.core differ diff --git a/lldb/test/API/functionalities/postmortem/elf-core/linux-aarch64.out b/lldb/test/API/functionalities/postmortem/elf-core/linux-aarch64.out new file mode 100755 index 0000000000000..78304a97a826f Binary files /dev/null and b/lldb/test/API/functionalities/postmortem/elf-core/linux-aarch64.out differ diff --git a/lldb/test/API/functionalities/thread/step_out/TestThreadStepOut.py b/lldb/test/API/functionalities/thread/step_out/TestThreadStepOut.py index 2d9632eb2dd6f..eb2d264ec2e3c 100644 --- a/lldb/test/API/functionalities/thread/step_out/TestThreadStepOut.py +++ b/lldb/test/API/functionalities/thread/step_out/TestThreadStepOut.py @@ -70,9 +70,9 @@ def setUp(self): self.bkpt_string = '// Set breakpoint here' self.breakpoint = line_number('main.cpp', self.bkpt_string) - if "gcc" in self.getCompiler() or self.isIntelCompiler() or self.getArchitecture() in ['arm64', 'arm64e']: + if "gcc" in self.getCompiler() or self.isIntelCompiler(): self.step_out_destination = line_number( - 'main.cpp', '// Expect to stop here after step-out (icc and gcc; arm64)') + 'main.cpp', '// Expect to stop here after step-out (icc and gcc)') else: self.step_out_destination = line_number( 'main.cpp', '// Expect to stop here after step-out (clang)') diff --git a/lldb/test/API/functionalities/thread/step_out/main.cpp b/lldb/test/API/functionalities/thread/step_out/main.cpp index 76818fff925aa..14d84010de8aa 100644 --- a/lldb/test/API/functionalities/thread/step_out/main.cpp +++ b/lldb/test/API/functionalities/thread/step_out/main.cpp @@ -22,7 +22,7 @@ thread_func () step_out_of_here(); // Expect to stop here after step-out (clang) // Return - return NULL; // Expect to stop here after step-out (icc and gcc; arm64) + return NULL; // Expect to stop here after step-out (icc and gcc) } int main () diff --git a/lldb/test/API/lang/cpp/operators/main.cpp b/lldb/test/API/lang/cpp/operators/main.cpp index 7afea1e957cea..c52ef1c8cac47 100644 --- a/lldb/test/API/lang/cpp/operators/main.cpp +++ b/lldb/test/API/lang/cpp/operators/main.cpp @@ -4,12 +4,11 @@ int side_effect = 0; struct B { int dummy = 2324; }; struct C { - void *operator new(std::size_t size) { C* r = ::new C; r->custom_new = true; return r; } - void *operator new[](std::size_t size) { C* r = static_cast(std::malloc(size)); r->custom_new = true; return r; } + void *operator new(std::size_t size) { void *p = ::operator new(size); side_effect = 3; return p; } + void *operator new[](std::size_t size) { void *p = ::operator new(size); side_effect = 4; return p; } void operator delete(void *p) { std::free(p); side_effect = 1; } void operator delete[](void *p) { std::free(p); side_effect = 2; } - bool custom_new = false; B b; B* operator->() { return &b; } int operator->*(int) { return 2; } @@ -171,8 +170,8 @@ int main(int argc, char **argv) { //% self.expect("expr static_cast(c)", endstr=" 12\n") //% self.expect("expr c.operatorint()", endstr=" 13\n") //% self.expect("expr c.operatornew()", endstr=" 14\n") - //% self.expect("expr (new struct C)->custom_new", endstr=" true\n") - //% self.expect("expr (new struct C[1])->custom_new", endstr=" true\n") + //% self.expect("expr (new struct C); side_effect", endstr=" = 3\n") + //% self.expect("expr (new struct C[1]); side_effect", endstr=" = 4\n") //% self.expect("expr delete c2; side_effect", endstr=" = 1\n") //% self.expect("expr delete[] c3; side_effect", endstr=" = 2\n") delete c2; diff --git a/lldb/test/API/tools/lldb-vscode/coreFile/TestVSCode_coreFile.py b/lldb/test/API/tools/lldb-vscode/coreFile/TestVSCode_coreFile.py new file mode 100644 index 0000000000000..55efd91d827a6 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/coreFile/TestVSCode_coreFile.py @@ -0,0 +1,43 @@ +""" +Test lldb-vscode coreFile attaching +""" + + +import unittest2 +import vscode +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import lldbvscode_testcase +import os + + +class TestVSCode_coreFile(lldbvscode_testcase.VSCodeTestCaseBase): + + mydir = TestBase.compute_mydir(__file__) + + @skipIfWindows + @skipIfRemote + @skipIfLLVMTargetMissing("X86") + def test_core_file(self): + current_dir = os.path.dirname(os.path.realpath(__file__)) + exe_file = os.path.join(current_dir, "linux-x86_64.out") + core_file = os.path.join(current_dir, "linux-x86_64.core") + + self.create_debug_adaptor() + self.attach(exe_file, coreFile=core_file) + + expected_frames = [ + {'column': 0, 'id': 524288, 'line': 4, 'name': 'bar', 'source': {'name': 'main.c', 'path': '/home/labath/test/main.c'}}, + {'column': 0, 'id': 524289, 'line': 10, 'name': 'foo', 'source': {'name': 'main.c', 'path': '/home/labath/test/main.c'}}, + {'column': 0, 'id': 524290, 'line': 16, 'name': '_start', 'source': {'name': 'main.c', 'path': '/home/labath/test/main.c'}}, + ] + + self.assertEquals(self.get_stackFrames(), expected_frames) + + # Resuming should have no effect and keep the process stopped + self.continue_to_next_stop() + self.assertEquals(self.get_stackFrames(), expected_frames) + + self.vscode.request_next(threadId=32259) + self.assertEquals(self.get_stackFrames(), expected_frames) diff --git a/lldb/test/API/tools/lldb-vscode/coreFile/linux-x86_64.core b/lldb/test/API/tools/lldb-vscode/coreFile/linux-x86_64.core new file mode 100644 index 0000000000000..2675eadd6a7f6 Binary files /dev/null and b/lldb/test/API/tools/lldb-vscode/coreFile/linux-x86_64.core differ diff --git a/lldb/test/API/tools/lldb-vscode/coreFile/linux-x86_64.out b/lldb/test/API/tools/lldb-vscode/coreFile/linux-x86_64.out new file mode 100755 index 0000000000000..842402fd519d2 Binary files /dev/null and b/lldb/test/API/tools/lldb-vscode/coreFile/linux-x86_64.out differ diff --git a/lldb/test/Shell/Commands/command-source.test b/lldb/test/Shell/Commands/command-source.test index d8218850c32c1..fa389f2a12889 100644 --- a/lldb/test/Shell/Commands/command-source.test +++ b/lldb/test/Shell/Commands/command-source.test @@ -1,8 +1,8 @@ # Check that stop command source on error. -# RUN: %lldb -x -b -o "command source -e 1 %s" 2>&1 | FileCheck %s --check-prefix STOP +# RUN: not %lldb -x -b -o "command source -e 1 %s" 2>&1 | FileCheck %s --check-prefix STOP # RUN: %lldb -x -b -o "command source -e 0 %s" 2>&1 | FileCheck %s --check-prefix CONTINUE -# RUN: %lldb -x -b -o 'settings set interpreter.stop-command-source-on-error true' -o "command source %s" 2>&1 | FileCheck %s --check-prefix STOP +# RUN: not %lldb -x -b -o 'settings set interpreter.stop-command-source-on-error true' -o "command source %s" 2>&1 | FileCheck %s --check-prefix STOP # RUN: %lldb -x -b -o 'settings set interpreter.stop-command-source-on-error false' -o "command source %s" 2>&1 | FileCheck %s --check-prefix CONTINUE bogus diff --git a/lldb/test/Shell/Driver/TestProcessAttach.test b/lldb/test/Shell/Driver/TestProcessAttach.test index 4e24ebb161b6e..ab75814e21ce0 100644 --- a/lldb/test/Shell/Driver/TestProcessAttach.test +++ b/lldb/test/Shell/Driver/TestProcessAttach.test @@ -1,2 +1,2 @@ -# RUN: %lldb -x -b -S %S/Inputs/process_attach_pid.in 2>&1 | FileCheck %s +# RUN: not %lldb -x -b -S %S/Inputs/process_attach_pid.in 2>&1 | FileCheck %s # CHECK: last option requires an argument diff --git a/lldb/test/Shell/Host/TestCustomShell.test b/lldb/test/Shell/Host/TestCustomShell.test index fd97b4c2b06e2..75114c5544934 100644 --- a/lldb/test/Shell/Host/TestCustomShell.test +++ b/lldb/test/Shell/Host/TestCustomShell.test @@ -8,7 +8,7 @@ # XFAIL: system-openbsd # RUN: %clang_host %S/Inputs/simple.c -g -o %t.out -# RUN: SHELL=bogus %lldb %t.out -b -o 'run' 2>&1 | FileCheck %s --check-prefix ERROR +# RUN: SHELL=bogus not %lldb %t.out -b -o 'run' 2>&1 | FileCheck %s --check-prefix ERROR # RUN: env -i %lldb %t.out -b -o 'run' 2>&1 | FileCheck %s # ERROR: error: shell expansion failed diff --git a/lldb/test/Shell/Quit/TestQuitExitCodeNonInt.test b/lldb/test/Shell/Quit/TestQuitExitCodeNonInt.test index 87c0bd41bb05f..1747ddd669b60 100644 --- a/lldb/test/Shell/Quit/TestQuitExitCodeNonInt.test +++ b/lldb/test/Shell/Quit/TestQuitExitCodeNonInt.test @@ -1,4 +1,4 @@ # UNSUPPORTED: system-windows -# RUN: %lldb -b -s %s 2>&1 | FileCheck %s +# RUN: not %lldb -b -s %s 2>&1 | FileCheck %s q str // CHECK: Couldn't parse 'str' diff --git a/lldb/test/Shell/Quit/TestQuitExitCodeTooManyArgs.test b/lldb/test/Shell/Quit/TestQuitExitCodeTooManyArgs.test index a67669451e992..315adf02af4d1 100644 --- a/lldb/test/Shell/Quit/TestQuitExitCodeTooManyArgs.test +++ b/lldb/test/Shell/Quit/TestQuitExitCodeTooManyArgs.test @@ -1,4 +1,4 @@ # UNSUPPORTED: system-windows -# RUN: %lldb -b -s %s 2>&1 | FileCheck %s +# RUN: not %lldb -b -s %s 2>&1 | FileCheck %s q 1 2 // CHECK: Too many arguments for 'quit' diff --git a/lldb/test/Shell/Reproducer/TestDiscard.test b/lldb/test/Shell/Reproducer/TestDiscard.test index 829aabbe2b03b..aee56f77c06f7 100644 --- a/lldb/test/Shell/Reproducer/TestDiscard.test +++ b/lldb/test/Shell/Reproducer/TestDiscard.test @@ -6,7 +6,7 @@ # RUN: %clang_host %S/Inputs/simple.c -g -o %t/reproducer.out # Capture but don't generate the reproducer. -# RUN: %lldb -x -b -s %S/Inputs/Discard.in --capture --capture-path %t.repro %t/reproducer.out +# RUN: not %lldb -x -b -s %S/Inputs/Discard.in --capture --capture-path %t.repro %t/reproducer.out # Make sure the directory doesn't exist. # RUN: mkdir %t.repro diff --git a/lldb/test/Shell/Reproducer/TestDump.test b/lldb/test/Shell/Reproducer/TestDump.test index 8300a97004bbf..cf2c89c938b7d 100644 --- a/lldb/test/Shell/Reproducer/TestDump.test +++ b/lldb/test/Shell/Reproducer/TestDump.test @@ -25,9 +25,9 @@ # RUN: %lldb --replay %t.repro | FileCheck %s --check-prefix FILES # RUN: rm %t.repro/gdb-remote.yaml -# RUN: %lldb -b -o 'reproducer dump -p gdb -f %t.repro' 2>&1 | FileCheck %s --check-prefix GDB-ERROR +# RUN: not %lldb -b -o 'reproducer dump -p gdb -f %t.repro' 2>&1 | FileCheck %s --check-prefix GDB-ERROR # GDB-ERROR: error: Unable to create GDB loader. # RUN: rm %t.repro/command-interpreter.yaml -# RUN: %lldb -b -o 'reproducer dump -p commands -f %t.repro' 2>&1 | FileCheck %s --check-prefix COMMANDS-ERROR +# RUN: not %lldb -b -o 'reproducer dump -p commands -f %t.repro' 2>&1 | FileCheck %s --check-prefix COMMANDS-ERROR # COMMANDS-ERROR: error: Unable to create command loader. diff --git a/lldb/test/Shell/Settings/TestSettingsSet.test b/lldb/test/Shell/Settings/TestSettingsSet.test index 0def3faaadbb2..3006a694a16b2 100644 --- a/lldb/test/Shell/Settings/TestSettingsSet.test +++ b/lldb/test/Shell/Settings/TestSettingsSet.test @@ -1,7 +1,7 @@ # This tests setting setting values. # Check that setting an empty value with -f(orce) clears the value. -# RUN: %lldb -b -s %s 2>&1 | FileCheck %s +# RUN: not %lldb -b -s %s 2>&1 | FileCheck %s settings set tab-size 16 settings show tab-size diff --git a/lldb/test/Shell/Settings/TestStopCommandSourceOnError.test b/lldb/test/Shell/Settings/TestStopCommandSourceOnError.test index a53dc2cd6868d..d734a0940a2d7 100644 --- a/lldb/test/Shell/Settings/TestStopCommandSourceOnError.test +++ b/lldb/test/Shell/Settings/TestStopCommandSourceOnError.test @@ -12,13 +12,13 @@ # RUN: %lldb -b -o 'settings set interpreter.stop-command-source-on-error false' -s %S/Inputs/StopCommandSource.in | FileCheck %s --check-prefix CONTINUE # FIXME: Should continue -# RUN: %lldb -b -s %S/Inputs/DontStopCommandSource.in -o 'bogus' -o 'print 111100000 + 11111' | FileCheck %s --check-prefix STOP +# RUN: not %lldb -b -s %S/Inputs/DontStopCommandSource.in -o 'bogus' -o 'print 111100000 + 11111' | FileCheck %s --check-prefix STOP # FIXME: Should continue -# RUN: %lldb -b -o 'settings set interpreter.stop-command-source-on-error false' -o 'bogus' -o 'print 123400000 + 56789' | FileCheck %s --check-prefix STOP +# RUN: not %lldb -b -o 'settings set interpreter.stop-command-source-on-error false' -o 'bogus' -o 'print 123400000 + 56789' | FileCheck %s --check-prefix STOP # FIXME: Should continue -# RUN: %lldb -b -s %S/Inputs/DontStopCommandSource.in | FileCheck %s --check-prefix STOP +# RUN: not %lldb -b -s %S/Inputs/DontStopCommandSource.in | FileCheck %s --check-prefix STOP # FIXME: Should continue -# RUN: %lldb -b -o 'settings set interpreter.stop-command-source-on-error true' -s %S/Inputs/DontStopCommandSource.in | FileCheck %s --check-prefix STOP +# RUN: not %lldb -b -o 'settings set interpreter.stop-command-source-on-error true' -s %S/Inputs/DontStopCommandSource.in | FileCheck %s --check-prefix STOP diff --git a/lldb/test/Shell/SymbolFile/DWARF/debug-types-missing-signature.test b/lldb/test/Shell/SymbolFile/DWARF/debug-types-missing-signature.test index f9c02061fc863..8f2ef7135afc4 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/debug-types-missing-signature.test +++ b/lldb/test/Shell/SymbolFile/DWARF/debug-types-missing-signature.test @@ -14,10 +14,10 @@ LOOKUPE: no type was found matching 'E' RUN: %lldb %t -b -o "type lookup EC" | FileCheck --check-prefix=LOOKUPEC %s LOOKUPEC: no type was found matching 'EC' -RUN: %lldb %t -b -o "print (E) 1" 2>&1 | FileCheck --check-prefix=PRINTE %s +RUN: not %lldb %t -b -o "print (E) 1" 2>&1 | FileCheck --check-prefix=PRINTE %s PRINTE: use of undeclared identifier 'E' -RUN: %lldb %t -b -o "print (EC) 1" 2>&1 | FileCheck --check-prefix=PRINTEC %s +RUN: not %lldb %t -b -o "print (EC) 1" 2>&1 | FileCheck --check-prefix=PRINTEC %s PRINTEC: use of undeclared identifier 'EC' RUN: %lldb %t -b -o "target variable a e ec" | FileCheck --check-prefix=VARS %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/module-ownership.mm b/lldb/test/Shell/SymbolFile/DWARF/module-ownership.mm index f6522e1a808c9..f675c3aa8204f 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/module-ownership.mm +++ b/lldb/test/Shell/SymbolFile/DWARF/module-ownership.mm @@ -1,6 +1,7 @@ // RUN: %clang --target=x86_64-apple-macosx -g -gmodules \ // RUN: -fmodules -fmodules-cache-path=%t.cache \ // RUN: -c -o %t.o %s -I%S/Inputs +// RUN: lldb-test symbols -dump-clang-ast %t.o | FileCheck %s // Verify that the owning module information from DWARF is preserved in the AST. @import A; @@ -51,10 +52,10 @@ @implementation SomeClass { // Template specializations are not yet supported, so they lack the ownership info: Template t2; -// CHECK-DAG: ClassTemplateSpecializationDecl {{.*}} struct Template +// CHECK-DAG: ClassTemplateSpecializationDecl {{.*}} imported in A struct Template Namespace::InNamespace t3; -// CHECK-DAG: ClassTemplateSpecializationDecl {{.*}} struct InNamespace +// CHECK-DAG: ClassTemplateSpecializationDecl {{.*}} imported in A struct InNamespace Namespace::AlsoInNamespace t4; -// CHECK-DAG: ClassTemplateSpecializationDecl {{.*}} struct AlsoInNamespace +// CHECK-DAG: ClassTemplateSpecializationDecl {{.*}} imported in A.B struct AlsoInNamespace diff --git a/lldb/test/Shell/SymbolFile/DWARF/split-dwarf-expression-eval-bug.cpp b/lldb/test/Shell/SymbolFile/DWARF/split-dwarf-expression-eval-bug.cpp new file mode 100644 index 0000000000000..dcdc81b3e7eb5 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/split-dwarf-expression-eval-bug.cpp @@ -0,0 +1,37 @@ +// This tests a crash which occured under very specific circumstances. The +// interesting aspects of this test are: +// - we print a global variable from one compile unit +// - we are stopped in a member function of a class in a namespace +// - that namespace is also present in a third file, which also has a global +// variable + +// UNSUPPORTED: system-darwin, system-windows + +// RUN: %clang_host -c -gsplit-dwarf %s -o %t1.o -DONE +// RUN: %clang_host -c -gsplit-dwarf %s -o %t2.o -DTWO +// RUN: %clang_host -c -gsplit-dwarf %s -o %t3.o -DTHREE +// RUN: %clang_host %t1.o %t2.o %t3.o -o %t +// RUN: %lldb %t -o "br set -n foo" -o run -o "p bool_in_first_cu" -o exit \ +// RUN: | FileCheck %s + +// CHECK: (lldb) p bool_in_first_cu +// CHECK: (bool) $0 = true + + +#if defined(ONE) +bool bool_in_first_cu = true; +#elif defined(TWO) +bool bool_in_second_cu = true; + +namespace NS { +void f() {} +} +#elif defined(THREE) +namespace NS { +struct S { + void foo() {} +}; +} + +int main() { NS::S().foo(); } +#endif diff --git a/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test b/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test index e748b4e5c73c3..682b0e5332b1c 100644 --- a/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test +++ b/lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test @@ -5,7 +5,7 @@ # UNSUPPORTED: system-windows # RUN: %clang_host %p/Inputs/call-asm.c -x assembler-with-cpp %p/Inputs/thread-step-out-ret-addr-check.s -o %t -# RUN: %lldb %t -s %s -b 2>&1 | FileCheck %s +# RUN: not %lldb %t -s %s -b 2>&1 | FileCheck %s breakpoint set -n nonstandard_stub # CHECK: Breakpoint 1: where = {{.*}}`nonstandard_stub diff --git a/lldb/tools/debugserver/CMakeLists.txt b/lldb/tools/debugserver/CMakeLists.txt index 8bd0ecc7d1ff2..fc23cf3c7e206 100644 --- a/lldb/tools/debugserver/CMakeLists.txt +++ b/lldb/tools/debugserver/CMakeLists.txt @@ -13,13 +13,13 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) include(debugserverConfig) include(AddLLDB) - # debugserver contains ObjC++ code, so let's disable Clang modules - # in this subdirectory to avoid building ObjC++ modules (which often - # doesn't properly work). - remove_module_flags() - set(LLDB_SOURCE_DIR "${CMAKE_SOURCE_DIR}/../../") include_directories(${LLDB_SOURCE_DIR}/include) endif() +# debugserver contains ObjC++ code, so let's disable Clang modules +# in this subdirectory to avoid building ObjC++ modules (which often +# doesn't properly work). +remove_module_flags() + add_subdirectory(source) diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index 56f181597b182..b38423b285590 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -9,6 +9,7 @@ #include "Driver.h" #include "lldb/API/SBCommandInterpreter.h" +#include "lldb/API/SBCommandInterpreterRunOptions.h" #include "lldb/API/SBCommandReturnObject.h" #include "lldb/API/SBDebugger.h" #include "lldb/API/SBFile.h" @@ -587,10 +588,7 @@ int Driver::MainLoop() { const char *commands_data = commands_stream.GetData(); const size_t commands_size = commands_stream.GetSize(); - // The command file might have requested that we quit, this variable will - // track that. - bool quit_requested = false; - bool stopped_for_crash = false; + bool go_interactive = true; if ((commands_data != nullptr) && (commands_size != 0u)) { FILE *commands_file = PrepareCommandsForSourcing(commands_data, commands_size); @@ -602,23 +600,33 @@ int Driver::MainLoop() { m_debugger.SetInputFileHandle(commands_file, true); - // Set the debugger into Sync mode when running the command file. - // Otherwise command files - // that run the target won't run in a sensible way. + // Set the debugger into Sync mode when running the command file. Otherwise + // command files that run the target won't run in a sensible way. bool old_async = m_debugger.GetAsync(); m_debugger.SetAsync(false); - int num_errors = 0; SBCommandInterpreterRunOptions options; + options.SetAutoHandleEvents(true); + options.SetSpawnThread(false); options.SetStopOnError(true); - if (m_option_data.m_batch) - options.SetStopOnCrash(true); - - m_debugger.RunCommandInterpreter(handle_events, spawn_thread, options, - num_errors, quit_requested, - stopped_for_crash); + options.SetStopOnCrash(m_option_data.m_batch); + + SBCommandInterpreterRunResult results = + m_debugger.RunCommandInterpreter(options); + if (results.GetResult() == lldb::eCommandInterpreterResultQuitRequested) + go_interactive = false; + if (m_option_data.m_batch && + results.GetResult() != lldb::eCommandInterpreterResultInferiorCrash) + go_interactive = false; + + // When running in batch mode and stopped because of an error, exit with a + // non-zero exit status. + if (m_option_data.m_batch && + results.GetResult() == lldb::eCommandInterpreterResultCommandError) + exit(1); - if (m_option_data.m_batch && stopped_for_crash && + if (m_option_data.m_batch && + results.GetResult() == lldb::eCommandInterpreterResultInferiorCrash && !m_option_data.m_after_crash_commands.empty()) { SBStream crash_commands_stream; WriteCommandsForSourcing(eCommandPlacementAfterCrash, @@ -628,30 +636,27 @@ int Driver::MainLoop() { commands_file = PrepareCommandsForSourcing(crash_commands_data, crash_commands_size); if (commands_file != nullptr) { - bool local_quit_requested; - bool local_stopped_for_crash; m_debugger.SetInputFileHandle(commands_file, true); - - m_debugger.RunCommandInterpreter(handle_events, spawn_thread, options, - num_errors, local_quit_requested, - local_stopped_for_crash); - if (local_quit_requested) - quit_requested = true; + SBCommandInterpreterRunResult local_results = + m_debugger.RunCommandInterpreter(options); + if (local_results.GetResult() == + lldb::eCommandInterpreterResultQuitRequested) + go_interactive = false; + + // When running in batch mode and an error occurred while sourcing + // the crash commands, exit with a non-zero exit status. + if (m_option_data.m_batch && + local_results.GetResult() == + lldb::eCommandInterpreterResultCommandError) + exit(1); } } m_debugger.SetAsync(old_async); } - // Now set the input file handle to STDIN and run the command - // interpreter again in interactive mode or repl mode and let the debugger - // take ownership of stdin - - bool go_interactive = true; - if (quit_requested) - go_interactive = false; - else if (m_option_data.m_batch && !stopped_for_crash) - go_interactive = false; - + // Now set the input file handle to STDIN and run the command interpreter + // again in interactive mode or repl mode and let the debugger take ownership + // of stdin. if (go_interactive) { m_debugger.SetInputFileHandle(stdin, true); diff --git a/lldb/tools/lldb-test/CMakeLists.txt b/lldb/tools/lldb-test/CMakeLists.txt index f3530fd7b8590..60b4a7ca8f70a 100644 --- a/lldb/tools/lldb-test/CMakeLists.txt +++ b/lldb/tools/lldb-test/CMakeLists.txt @@ -24,5 +24,9 @@ add_lldb_tool(lldb-test Support ) +if(PYTHON_RPATH) + set_property(TARGET lldb-test APPEND PROPERTY INSTALL_RPATH "${PYTHON_RPATH}") +endif() + target_include_directories(lldb-test PRIVATE ${LLDB_SOURCE_DIR}/source) target_include_directories(lldb-test PRIVATE ${LLDB_BINARY_DIR}/source) diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 2e7a57ee397e8..8fcf179b29aad 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -9,9 +9,9 @@ #include #include "llvm/ADT/Optional.h" - #include "llvm/Support/FormatAdapters.h" #include "llvm/Support/Path.h" +#include "llvm/Support/ScopedPrinter.h" #include "lldb/API/SBBreakpoint.h" #include "lldb/API/SBBreakpointLocation.h" @@ -41,8 +41,8 @@ llvm::StringRef GetAsString(const llvm::json::Value &value) { // Gets a string from a JSON object using the key, or returns an empty string. llvm::StringRef GetString(const llvm::json::Object &obj, llvm::StringRef key) { - if (auto value = obj.getString(key)) - return GetAsString(*value); + if (llvm::Optional value = obj.getString(key)) + return *value; return llvm::StringRef(); } @@ -114,13 +114,9 @@ std::vector GetStrings(const llvm::json::Object *obj, strs.push_back(value.getAsString()->str()); break; case llvm::json::Value::Number: - case llvm::json::Value::Boolean: { - std::string s; - llvm::raw_string_ostream strm(s); - strm << value; - strs.push_back(strm.str()); + case llvm::json::Value::Boolean: + strs.push_back(llvm::to_string(value)); break; - } case llvm::json::Value::Null: case llvm::json::Value::Object: case llvm::json::Value::Array: diff --git a/lldb/tools/lldb-vscode/README.md b/lldb/tools/lldb-vscode/README.md index 2294659fc2947..20c713164c293 100644 --- a/lldb/tools/lldb-vscode/README.md +++ b/lldb/tools/lldb-vscode/README.md @@ -181,15 +181,15 @@ for processes. Currently MacOS is the only platform that supports this. ### Loading a Core File -Loading a core file can use the `"attach"` request along with the -`"attachCommands"` to implement a custom attach: +This loads the coredump file `/cores/123.core` associated with the program +`/tmp/a.out`: ```javascript { - "name": "Attach to Name (wait)", + "name": "Load coredump", "type": "lldb-vscode", "request": "attach", - "attachCommands": ["target create -c /path/to/123.core /path/to/executable"], - "stopOnEntry": false + "coreFile": "/cores/123.core", + "program": "/tmp/a.out" } ``` diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 04a2125e3456e..764eded8ce8dc 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -530,16 +530,17 @@ void request_attach(const llvm::json::Object &request) { g_vsc.stop_commands = GetStrings(arguments, "stopCommands"); g_vsc.exit_commands = GetStrings(arguments, "exitCommands"); auto attachCommands = GetStrings(arguments, "attachCommands"); - g_vsc.stop_at_entry = GetBoolean(arguments, "stopOnEntry", false); - const auto debuggerRoot = GetString(arguments, "debuggerRoot"); + llvm::StringRef core_file = GetString(arguments, "coreFile"); + g_vsc.stop_at_entry = + core_file.empty() ? GetBoolean(arguments, "stopOnEntry", false) : true; + const llvm::StringRef debuggerRoot = GetString(arguments, "debuggerRoot"); // This is a hack for loading DWARF in .o files on Mac where the .o files // in the debug map of the main executable have relative paths which require // the lldb-vscode binary to have its working directory set to that relative // root for the .o files in order to be able to load debug info. - if (!debuggerRoot.empty()) { - llvm::sys::fs::set_current_path(debuggerRoot.data()); - } + if (!debuggerRoot.empty()) + llvm::sys::fs::set_current_path(debuggerRoot); // Run any initialize LLDB commands the user specified in the launch.json g_vsc.RunInitCommands(); @@ -569,7 +570,10 @@ void request_attach(const llvm::json::Object &request) { // Disable async events so the attach will be successful when we return from // the launch call and the launch will happen synchronously g_vsc.debugger.SetAsync(false); - g_vsc.target.Attach(attach_info, error); + if (core_file.empty()) + g_vsc.target.Attach(attach_info, error); + else + g_vsc.target.LoadCore(core_file.data(), error); // Reenable async events g_vsc.debugger.SetAsync(true); } else { @@ -584,7 +588,7 @@ void request_attach(const llvm::json::Object &request) { SetSourceMapFromArguments(*arguments); - if (error.Success()) { + if (error.Success() && core_file.empty()) { auto attached_pid = g_vsc.target.GetProcess().GetProcessID(); if (attached_pid == LLDB_INVALID_PROCESS_ID) { if (attachCommands.empty()) @@ -1363,15 +1367,14 @@ void request_launch(const llvm::json::Object &request) { g_vsc.exit_commands = GetStrings(arguments, "exitCommands"); auto launchCommands = GetStrings(arguments, "launchCommands"); g_vsc.stop_at_entry = GetBoolean(arguments, "stopOnEntry", false); - const auto debuggerRoot = GetString(arguments, "debuggerRoot"); + const llvm::StringRef debuggerRoot = GetString(arguments, "debuggerRoot"); // This is a hack for loading DWARF in .o files on Mac where the .o files // in the debug map of the main executable have relative paths which require // the lldb-vscode binary to have its working directory set to that relative // root for the .o files in order to be able to load debug info. - if (!debuggerRoot.empty()) { - llvm::sys::fs::set_current_path(debuggerRoot.data()); - } + if (!debuggerRoot.empty()) + llvm::sys::fs::set_current_path(debuggerRoot); // Run any initialize LLDB commands the user specified in the launch.json. // This is run before target is created, so commands can't do anything with diff --git a/lldb/tools/lldb-vscode/package.json b/lldb/tools/lldb-vscode/package.json index 8dabf3de1ec27..1df16d0dfe35d 100644 --- a/lldb/tools/lldb-vscode/package.json +++ b/lldb/tools/lldb-vscode/package.json @@ -218,8 +218,12 @@ }, "exitCommands": { "type": "array", - "description": "Commands executed at the end of debugging session.", - "default": [] + "description": "Commands executed at the end of debugging session.", + "default": [] + }, + "coreFile": { + "type": "string", + "description": "Path to the core file to debug." } } } diff --git a/lldb/unittests/API/CMakeLists.txt b/lldb/unittests/API/CMakeLists.txt index 6f71165a8a702..308249b63add1 100644 --- a/lldb/unittests/API/CMakeLists.txt +++ b/lldb/unittests/API/CMakeLists.txt @@ -4,3 +4,7 @@ add_lldb_unittest(APITests LINK_LIBS liblldb ) + +if(PYTHON_RPATH) + set_property(TARGET APITests APPEND PROPERTY BUILD_RPATH "${PYTHON_RPATH}") +endif() diff --git a/lldb/unittests/DataFormatter/CMakeLists.txt b/lldb/unittests/DataFormatter/CMakeLists.txt index fc60bff058798..45011c56b0b0f 100644 --- a/lldb/unittests/DataFormatter/CMakeLists.txt +++ b/lldb/unittests/DataFormatter/CMakeLists.txt @@ -1,5 +1,6 @@ add_lldb_unittest(LLDBFormatterTests FormatManagerTests.cpp + StringPrinterTests.cpp LINK_LIBS lldbCore diff --git a/lldb/unittests/DataFormatter/StringPrinterTests.cpp b/lldb/unittests/DataFormatter/StringPrinterTests.cpp new file mode 100644 index 0000000000000..180b13772af53 --- /dev/null +++ b/lldb/unittests/DataFormatter/StringPrinterTests.cpp @@ -0,0 +1,159 @@ +//===-- StringPrinterTests.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/DataFormatters/StringPrinter.h" +#include "lldb/Utility/DataExtractor.h" +#include "lldb/Utility/Endian.h" +#include "lldb/Utility/StreamString.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" +#include + +using namespace lldb; +using namespace lldb_private; +using lldb_private::formatters::StringPrinter; +using llvm::Optional; +using llvm::StringRef; + +#define QUOTE(x) std::string("\"" x "\"") + +/// Format \p input according to the specified string encoding and special char +/// escape style. +template +static Optional format(StringRef input, + StringPrinter::EscapeStyle escape_style) { + StreamString out; + StringPrinter::ReadBufferAndDumpToStreamOptions opts; + opts.SetStream(&out); + opts.SetSourceSize(input.size()); + opts.SetNeedsZeroTermination(true); + opts.SetEscapeNonPrintables(true); + opts.SetIgnoreMaxLength(false); + opts.SetEscapeStyle(escape_style); + DataExtractor extractor(input.data(), input.size(), + endian::InlHostByteOrder(), sizeof(void *)); + opts.SetData(extractor); + const bool success = StringPrinter::ReadBufferAndDumpToStream(opts); + if (!success) + return llvm::None; + return out.GetString().str(); +} + +// Test ASCII formatting for C++. This behaves exactly like UTF8 formatting for +// C++, although that's questionable (see FIXME in StringPrinter.cpp). +TEST(StringPrinterTests, CxxASCII) { + auto fmt = [](StringRef str) { + return format( + str, StringPrinter::EscapeStyle::CXX); + }; + + // Special escapes. + EXPECT_EQ(fmt({"\0", 1}), QUOTE("")); + EXPECT_EQ(fmt("\a"), QUOTE(R"(\a)")); + EXPECT_EQ(fmt("\b"), QUOTE(R"(\b)")); + EXPECT_EQ(fmt("\f"), QUOTE(R"(\f)")); + EXPECT_EQ(fmt("\n"), QUOTE(R"(\n)")); + EXPECT_EQ(fmt("\r"), QUOTE(R"(\r)")); + EXPECT_EQ(fmt("\t"), QUOTE(R"(\t)")); + EXPECT_EQ(fmt("\v"), QUOTE(R"(\v)")); + EXPECT_EQ(fmt("\""), QUOTE(R"(\")")); + EXPECT_EQ(fmt("\'"), QUOTE(R"(')")); + EXPECT_EQ(fmt("\\"), QUOTE(R"(\\)")); + + // Printable characters. + EXPECT_EQ(fmt("'"), QUOTE("'")); + EXPECT_EQ(fmt("a"), QUOTE("a")); + EXPECT_EQ(fmt("Z"), QUOTE("Z")); + EXPECT_EQ(fmt("🥑"), QUOTE("🥑")); + + // Octal (\nnn), hex (\xnn), extended octal (\unnnn or \Unnnnnnnn). + EXPECT_EQ(fmt("\uD55C"), QUOTE("\uD55C")); + EXPECT_EQ(fmt("\U00010348"), QUOTE("\U00010348")); + + // FIXME: These strings are all rejected, but shouldn't be AFAICT. LLDB finds + // that these are not valid utf8 sequences, but that's OK, the raw values + // should still be printed out. + EXPECT_NE(fmt("\376"), QUOTE(R"(\xfe)")); // \376 is 254 in decimal. + EXPECT_NE(fmt("\xfe"), QUOTE(R"(\xfe)")); // \xfe is 254 in decimal. +} + +// Test UTF8 formatting for C++. +TEST(StringPrinterTests, CxxUTF8) { + auto fmt = [](StringRef str) { + return format( + str, StringPrinter::EscapeStyle::CXX); + }; + + // Special escapes. + EXPECT_EQ(fmt({"\0", 1}), QUOTE("")); + EXPECT_EQ(fmt("\a"), QUOTE(R"(\a)")); + EXPECT_EQ(fmt("\b"), QUOTE(R"(\b)")); + EXPECT_EQ(fmt("\f"), QUOTE(R"(\f)")); + EXPECT_EQ(fmt("\n"), QUOTE(R"(\n)")); + EXPECT_EQ(fmt("\r"), QUOTE(R"(\r)")); + EXPECT_EQ(fmt("\t"), QUOTE(R"(\t)")); + EXPECT_EQ(fmt("\v"), QUOTE(R"(\v)")); + EXPECT_EQ(fmt("\""), QUOTE(R"(\")")); + EXPECT_EQ(fmt("\'"), QUOTE(R"(')")); + EXPECT_EQ(fmt("\\"), QUOTE(R"(\\)")); + + // Printable characters. + EXPECT_EQ(fmt("'"), QUOTE("'")); + EXPECT_EQ(fmt("a"), QUOTE("a")); + EXPECT_EQ(fmt("Z"), QUOTE("Z")); + EXPECT_EQ(fmt("🥑"), QUOTE("🥑")); + + // Octal (\nnn), hex (\xnn), extended octal (\unnnn or \Unnnnnnnn). + EXPECT_EQ(fmt("\uD55C"), QUOTE("\uD55C")); + EXPECT_EQ(fmt("\U00010348"), QUOTE("\U00010348")); + + // FIXME: These strings are all rejected, but shouldn't be AFAICT. LLDB finds + // that these are not valid utf8 sequences, but that's OK, the raw values + // should still be printed out. + EXPECT_NE(fmt("\376"), QUOTE(R"(\xfe)")); // \376 is 254 in decimal. + EXPECT_NE(fmt("\xfe"), QUOTE(R"(\xfe)")); // \xfe is 254 in decimal. +} + +// Test UTF8 formatting for Swift. +TEST(StringPrinterTests, SwiftUTF8) { + auto fmt = [](StringRef str) { + return format( + str, StringPrinter::EscapeStyle::Swift); + }; + + // Special escapes. + EXPECT_EQ(fmt({"\0", 1}), QUOTE("")); + EXPECT_EQ(fmt("\a"), QUOTE(R"(\a)")); + EXPECT_EQ(fmt("\b"), QUOTE(R"(\u{8})")); + EXPECT_EQ(fmt("\f"), QUOTE(R"(\u{c})")); + EXPECT_EQ(fmt("\n"), QUOTE(R"(\n)")); + EXPECT_EQ(fmt("\r"), QUOTE(R"(\r)")); + EXPECT_EQ(fmt("\t"), QUOTE(R"(\t)")); + EXPECT_EQ(fmt("\v"), QUOTE(R"(\u{b})")); + EXPECT_EQ(fmt("\""), QUOTE(R"(\")")); + EXPECT_EQ(fmt("\'"), QUOTE(R"(\')")); + EXPECT_EQ(fmt("\\"), QUOTE(R"(\\)")); + + // Printable characters. + EXPECT_EQ(fmt("'"), QUOTE(R"(\')")); + EXPECT_EQ(fmt("a"), QUOTE("a")); + EXPECT_EQ(fmt("Z"), QUOTE("Z")); + EXPECT_EQ(fmt("🥑"), QUOTE("🥑")); + + // Octal (\nnn), hex (\xnn), extended octal (\unnnn or \Unnnnnnnn). + EXPECT_EQ(fmt("\uD55C"), QUOTE("\uD55C")); + EXPECT_EQ(fmt("\U00010348"), QUOTE("\U00010348")); + + // FIXME: These strings are all rejected, but shouldn't be AFAICT. LLDB finds + // that these are not valid utf8 sequences, but that's OK, the raw values + // should still be printed out. + EXPECT_NE(fmt("\376"), QUOTE(R"(\xfe)")); // \376 is 254 in decimal. + EXPECT_NE(fmt("\xfe"), QUOTE(R"(\xfe)")); // \xfe is 254 in decimal. +} diff --git a/lldb/unittests/Host/ConnectionFileDescriptorTest.cpp b/lldb/unittests/Host/ConnectionFileDescriptorTest.cpp index 655febc11d14d..76c54a96b22e7 100644 --- a/lldb/unittests/Host/ConnectionFileDescriptorTest.cpp +++ b/lldb/unittests/Host/ConnectionFileDescriptorTest.cpp @@ -22,11 +22,6 @@ class ConnectionFileDescriptorTest : public testing::Test { void TestGetURI(std::string ip) { std::unique_ptr socket_a_up; std::unique_ptr socket_b_up; - if (!IsAddressFamilySupported(ip)) { - GTEST_LOG_(WARNING) << "Skipping test due to missing IPv" - << (IsIPv4(ip) ? "4" : "6") << " support."; - return; - } CreateTCPConnectedSockets(ip, &socket_a_up, &socket_b_up); auto socket = socket_a_up.release(); ConnectionFileDescriptor connection_file_descriptor(socket); @@ -42,6 +37,14 @@ class ConnectionFileDescriptorTest : public testing::Test { } }; -TEST_F(ConnectionFileDescriptorTest, TCPGetURIv4) { TestGetURI("127.0.0.1"); } - -TEST_F(ConnectionFileDescriptorTest, TCPGetURIv6) { TestGetURI("::1"); } +TEST_F(ConnectionFileDescriptorTest, TCPGetURIv4) { + if (!HostSupportsIPv4()) + return; + TestGetURI("127.0.0.1"); +} + +TEST_F(ConnectionFileDescriptorTest, TCPGetURIv6) { + if (!HostSupportsIPv6()) + return; + TestGetURI("::1"); +} diff --git a/lldb/unittests/Host/SocketTest.cpp b/lldb/unittests/Host/SocketTest.cpp index 54548d36956ce..c53d2660f0c81 100644 --- a/lldb/unittests/Host/SocketTest.cpp +++ b/lldb/unittests/Host/SocketTest.cpp @@ -111,10 +111,8 @@ TEST_F(SocketTest, TCPListen0ConnectAccept) { TEST_F(SocketTest, TCPGetAddress) { std::unique_ptr socket_a_up; std::unique_ptr socket_b_up; - if (!IsAddressFamilySupported("127.0.0.1")) { - GTEST_LOG_(WARNING) << "Skipping test due to missing IPv4 support."; + if (!HostSupportsIPv4()) return; - } CreateTCPConnectedSockets("127.0.0.1", &socket_a_up, &socket_b_up); EXPECT_EQ(socket_a_up->GetLocalPortNumber(), @@ -148,10 +146,8 @@ TEST_F(SocketTest, TCPListen0GetPort) { TEST_F(SocketTest, TCPGetConnectURI) { std::unique_ptr socket_a_up; std::unique_ptr socket_b_up; - if (!IsAddressFamilySupported("127.0.0.1")) { - GTEST_LOG_(WARNING) << "Skipping test due to missing IPv4 support."; + if (!HostSupportsIPv4()) return; - } CreateTCPConnectedSockets("127.0.0.1", &socket_a_up, &socket_b_up); llvm::StringRef scheme; @@ -165,10 +161,8 @@ TEST_F(SocketTest, TCPGetConnectURI) { } TEST_F(SocketTest, UDPGetConnectURI) { - if (!IsAddressFamilySupported("127.0.0.1")) { - GTEST_LOG_(WARNING) << "Skipping test due to missing IPv4 support."; + if (!HostSupportsIPv4()) return; - } llvm::Expected> socket = UDPSocket::Connect("127.0.0.1:0", /*child_processes_inherit=*/false); ASSERT_THAT_EXPECTED(socket, llvm::Succeeded()); diff --git a/lldb/unittests/Host/SocketTestUtilities.cpp b/lldb/unittests/Host/SocketTestUtilities.cpp index ab883531bdf20..e2006b85115db 100644 --- a/lldb/unittests/Host/SocketTestUtilities.cpp +++ b/lldb/unittests/Host/SocketTestUtilities.cpp @@ -91,13 +91,37 @@ void lldb_private::CreateDomainConnectedSockets( } #endif -bool lldb_private::IsAddressFamilySupported(std::string ip) { - auto addresses = lldb_private::SocketAddress::GetAddressInfo( - ip.c_str(), NULL, AF_UNSPEC, SOCK_STREAM, IPPROTO_TCP); - return addresses.size() > 0; +static bool CheckIPSupport(llvm::StringRef Proto, llvm::StringRef Addr) { + llvm::Expected> Sock = Socket::TcpListen( + Addr, /*child_processes_inherit=*/false, /*predicate=*/nullptr); + if (Sock) + return true; + llvm::Error Err = Sock.takeError(); + GTEST_LOG_(WARNING) << llvm::formatv( + "Creating a canary {0} TCP socket failed: {1}.", + Proto, Err) + .str(); + bool HasAddrNotAvail = false; + handleAllErrors(std::move(Err), [&](std::unique_ptr ECErr) { + if (ECErr->convertToErrorCode() == + std::make_error_code(std::errc::address_not_available)) + HasAddrNotAvail = true; + }); + if (HasAddrNotAvail) { + GTEST_LOG_(WARNING) + << llvm::formatv( + "Assuming the host does not support {0}. Skipping test.", Proto) + .str(); + return false; + } + GTEST_LOG_(WARNING) << "Continuing anyway. The test will probably fail."; + return true; +} + +bool lldb_private::HostSupportsIPv4() { + return CheckIPSupport("IPv4", "127.0.0.1:0"); } -bool lldb_private::IsIPv4(std::string ip) { - struct sockaddr_in sock_addr; - return inet_pton(AF_INET, ip.c_str(), &(sock_addr.sin_addr)) != 0; +bool lldb_private::HostSupportsIPv6() { + return CheckIPSupport("IPv6", "[::1]:0"); } diff --git a/lldb/unittests/Host/SocketTestUtilities.h b/lldb/unittests/Host/SocketTestUtilities.h index 3562782475417..943d98a96be08 100644 --- a/lldb/unittests/Host/SocketTestUtilities.h +++ b/lldb/unittests/Host/SocketTestUtilities.h @@ -40,8 +40,8 @@ void CreateDomainConnectedSockets(llvm::StringRef path, std::unique_ptr *b_up); #endif -bool IsAddressFamilySupported(std::string ip); -bool IsIPv4(std::string ip); +bool HostSupportsIPv6(); +bool HostSupportsIPv4(); } // namespace lldb_private -#endif \ No newline at end of file +#endif diff --git a/lldb/unittests/ScriptInterpreter/Python/CMakeLists.txt b/lldb/unittests/ScriptInterpreter/Python/CMakeLists.txt index 471f4c6dbc354..913bd629526d7 100644 --- a/lldb/unittests/ScriptInterpreter/Python/CMakeLists.txt +++ b/lldb/unittests/ScriptInterpreter/Python/CMakeLists.txt @@ -8,4 +8,8 @@ add_lldb_unittest(ScriptInterpreterPythonTests LLVMTestingSupport LINK_COMPONENTS Support - ) \ No newline at end of file + ) + +if(PYTHON_RPATH) + set_property(TARGET ScriptInterpreterPythonTests APPEND PROPERTY BUILD_RPATH "${PYTHON_RPATH}") +endif() \ No newline at end of file diff --git a/lldb/unittests/Utility/FileSpecTest.cpp b/lldb/unittests/Utility/FileSpecTest.cpp index 690c5ae331ee2..ad2e328ce82fe 100644 --- a/lldb/unittests/Utility/FileSpecTest.cpp +++ b/lldb/unittests/Utility/FileSpecTest.cpp @@ -246,13 +246,11 @@ TEST(FileSpecTest, GetPath) { {R"(\\net)", R"(\\net)"}, {R"(c:\..)", R"(c:\)"}, {R"(c:\.)", R"(c:\)"}, - // TODO: fix llvm::sys::path::remove_dots() to return "\" below. - {R"(\..)", R"(\..)"}, + {R"(\..)", R"(\)"}, // {R"(c:..)", R"(c:..)"}, {R"(..)", R"(..)"}, {R"(.)", R"(.)"}, - // TODO: fix llvm::sys::path::remove_dots() to return "c:\" below. - {R"(c:..\..)", R"(c:\..\..)"}, + {R"(c:..\..)", R"(c:)"}, {R"(..\..)", R"(..\..)"}, {R"(foo\..)", R"(.)"}, {R"(foo\..\bar)", R"(bar)"}, diff --git a/llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.cpp b/llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.cpp index 81bc578853709..d15345d665171 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.cpp @@ -123,31 +123,51 @@ void SPIRVLowerConstExpr::visit(Module *M) { auto FBegin = I.begin(); while (!WorkList.empty()) { auto II = WorkList.front(); - WorkList.pop_front(); - for (unsigned OI = 0, OE = II->getNumOperands(); OI != OE; ++OI) { - auto Op = II->getOperand(OI); - if (auto CE = dyn_cast(Op)) { - SPIRVDBG(dbgs() << "[lowerConstantExpressions] " << *CE;) - auto ReplInst = CE->getAsInstruction(); - auto InsPoint = II->getParent() == &*FBegin ? II : &FBegin->back(); - ReplInst->insertBefore(InsPoint); - SPIRVDBG(dbgs() << " -> " << *ReplInst << '\n';) - WorkList.push_front(ReplInst); - std::vector Users; - // Do not replace use during iteration of use. Do it in another loop - for (auto U : CE->users()) { - SPIRVDBG(dbgs() - << "[lowerConstantExpressions] Use: " << *U << '\n';) - if (auto InstUser = dyn_cast(U)) { - // Only replace users in scope of current function - if (InstUser->getParent()->getParent() == &I) - Users.push_back(InstUser); - } + auto LowerOp = [&II, &FBegin, &I](ConstantExpr *CE) { + SPIRVDBG(dbgs() << "[lowerConstantExpressions] " << *CE;) + auto ReplInst = CE->getAsInstruction(); + auto InsPoint = II->getParent() == &*FBegin ? II : &FBegin->back(); + ReplInst->insertBefore(InsPoint); + SPIRVDBG(dbgs() << " -> " << *ReplInst << '\n';) + std::vector Users; + // Do not replace use during iteration of use. Do it in another loop + for (auto U : CE->users()) { + SPIRVDBG(dbgs() << "[lowerConstantExpressions] Use: " << *U << '\n';) + if (auto InstUser = dyn_cast(U)) { + // Only replace users in scope of current function + if (InstUser->getParent()->getParent() == &I) + Users.push_back(InstUser); } - for (auto &User : Users) - User->replaceUsesOfWith(CE, ReplInst); } + for (auto &User : Users) + User->replaceUsesOfWith(CE, ReplInst); + return ReplInst; + }; + + WorkList.pop_front(); + for (unsigned OI = 0, OE = II->getNumOperands(); OI != OE; ++OI) { + auto Op = II->getOperand(OI); + auto *Vec = dyn_cast(Op); + if (Vec && std::all_of(Vec->op_begin(), Vec->op_end(), + [](Value *V) { return isa(V); })) { + // Expand a vector of constexprs and construct it back with series of + // insertelement instructions + std::list ReplList; + std::transform( + Vec->op_begin(), Vec->op_end(), std::back_inserter(ReplList), + [LowerOp](Value *V) { return LowerOp(cast(V)); }); + Value *Repl = nullptr; + unsigned Idx = 0; + for (auto V : ReplList) + Repl = InsertElementInst::Create( + (Repl ? Repl : UndefValue::get(Vec->getType())), V, + ConstantInt::get(Type::getInt32Ty(M->getContext()), Idx++), "", + II); + II->replaceUsesOfWith(Op, Repl); + WorkList.splice(WorkList.begin(), ReplList); + } else if (auto CE = dyn_cast(Op)) + WorkList.push_front(LowerOp(CE)); } } } diff --git a/llvm-spirv/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp b/llvm-spirv/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp index 3458d320609d1..61783dd161370 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp @@ -267,7 +267,7 @@ class SPIRVLowerSPIRBlocks : public ModulePass { LLVM_DEBUG(dbgs() << "[lowerGetBlockInvoke] " << *CallInv); // Handle ret = block_func_ptr(context_ptr, args) auto CI = cast(CallInv); - auto F = CI->getCalledValue(); + auto F = CI->getCalledOperand(); if (InvokeF == nullptr) { getBlockInvokeFuncAndContext(CallGetBlkInvoke->getArgOperand(0), &InvokeF, nullptr); diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp index 510c3e0443e6a..1c09fc76a5ef4 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp @@ -47,11 +47,6 @@ namespace SPIRV { -static cl::opt - MangledAtomicTypeNamePrefix("spirv-atomic-prefix", - cl::desc("Mangled atomic type name prefix"), - cl::init("U7_Atomic")); - void SPIRVToOCL::visitCallInst(CallInst &CI) { LLVM_DEBUG(dbgs() << "[visistCallInst] " << CI << '\n'); auto F = CI.getCalledFunction(); @@ -362,23 +357,6 @@ void SPIRVToOCL::visitCallSPIRVBuiltin(CallInst *CI, Op OC) { &Attrs); } -void SPIRVToOCL::translateMangledAtomicTypeName() { - for (auto &I : M->functions()) { - if (!I.hasName()) - continue; - std::string MangledName{I.getName()}; - StringRef DemangledName; - if (!oclIsBuiltin(MangledName, DemangledName) || - DemangledName.find(kOCLBuiltinName::AtomPrefix) != 0) - continue; - auto Loc = MangledName.find(kOCLBuiltinName::AtomPrefix); - Loc = MangledName.find(kMangledName::AtomicPrefixInternal, Loc); - MangledName.replace(Loc, strlen(kMangledName::AtomicPrefixInternal), - MangledAtomicTypeNamePrefix); - I.setName(MangledName); - } -} - std::string SPIRVToOCL::getGroupBuiltinPrefix(CallInst *CI) { std::string Prefix; auto ES = getArgAsScope(CI, 0); diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h index 920a4aebb8b24..0e3af6240dfaf 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h @@ -97,10 +97,6 @@ class SPIRVToOCL : public ModulePass, public InstVisitor { /// No change with arguments. void visitCallSPIRVBuiltin(CallInst *CI, Op OC); - /// Translate mangled atomic type name: "atomic_" => - /// MangledAtomicTypeNamePrefix - void translateMangledAtomicTypeName(); - /// Get prefix work_/sub_ for OCL group builtin functions. /// Assuming the first argument of \param CI is a constant integer for /// workgroup/subgroup scope enums. diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp index 16d3a53a3f1a1..439d25af48cf2 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp @@ -92,8 +92,6 @@ bool SPIRVToOCL20::runOnModule(Module &Module) { Ctx = &M->getContext(); visit(*M); - translateMangledAtomicTypeName(); - eraseUselessFunctions(&Module); LLVM_DEBUG(dbgs() << "After SPIRVToOCL20:\n" << *M); diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index dacc2dfe14821..9dd1010eb1770 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -1525,7 +1525,7 @@ bool checkTypeForSPIRVExtendedInstLowering(IntrinsicInst *II, SPIRVModule *BM) { if ((!Ty->isFloatTy() && !Ty->isDoubleTy()) || ((NumElems > 4) && (NumElems != 8) && (NumElems != 16))) { BM->getErrorLog().checkError(false, SPIRVEC_InvalidFunctionCall, - II->getCalledValue()->getName().str(), "", + II->getCalledOperand()->getName().str(), "", __FILE__, __LINE__); return false; } diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index dbd876bf95538..2b0d7bac55e0c 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -1871,7 +1871,7 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, // Other LLVM intrinsics shouldn't get to SPIRV, because they // can't be represented in SPIRV or aren't implemented yet. BM->getErrorLog().checkError(false, SPIRVEC_InvalidFunctionCall, - II->getCalledValue()->getName().str(), "", + II->getCalledOperand()->getName().str(), "", __FILE__, __LINE__); } return nullptr; @@ -1931,7 +1931,7 @@ SPIRVValue *LLVMToSPIRV::transIndirectCallInst(CallInst *CI, return nullptr; return BM->addIndirectCallInst( - transValue(CI->getCalledValue(), BB), transType(CI->getType()), + transValue(CI->getCalledOperand(), BB), transType(CI->getType()), transArguments(CI, BB, SPIRVEntry::createUnique(OpFunctionCall).get()), BB); } diff --git a/llvm-spirv/test/constexpr_vector.ll b/llvm-spirv/test/constexpr_vector.ll new file mode 100644 index 0000000000000..0d85be8e35d36 --- /dev/null +++ b/llvm-spirv/test/constexpr_vector.ll @@ -0,0 +1,115 @@ +; RUN: llvm-as < %s | llvm-spirv -s | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-LLVM: define dllexport void @vadd() { +; CHECK-LLVM-NEXT: entry: +; CHECK-LLVM-NEXT: %Funcs = alloca <16 x i8>, align 16 +; CHECK-LLVM-NEXT: %0 = ptrtoint i32 (i32)* @_Z2f1u2CMvb32_j to i64 +; CHECK-LLVM-NEXT: %1 = bitcast i64 %0 to <8 x i8> +; CHECK-LLVM-NEXT: %2 = extractelement <8 x i8> %1, i32 0 +; CHECK-LLVM-NEXT: %3 = extractelement <8 x i8> %1, i32 1 +; CHECK-LLVM-NEXT: %4 = extractelement <8 x i8> %1, i32 2 +; CHECK-LLVM-NEXT: %5 = extractelement <8 x i8> %1, i32 3 +; CHECK-LLVM-NEXT: %6 = extractelement <8 x i8> %1, i32 4 +; CHECK-LLVM-NEXT: %7 = extractelement <8 x i8> %1, i32 5 +; CHECK-LLVM-NEXT: %8 = extractelement <8 x i8> %1, i32 6 +; CHECK-LLVM-NEXT: %9 = extractelement <8 x i8> %1, i32 7 +; CHECK-LLVM-NEXT: %10 = ptrtoint i32 (i32)* @_Z2f2u2CMvb32_j to i64 +; CHECK-LLVM-NEXT: %11 = bitcast i64 %10 to <8 x i8> +; CHECK-LLVM-NEXT: %12 = extractelement <8 x i8> %11, i32 0 +; CHECK-LLVM-NEXT: %13 = extractelement <8 x i8> %11, i32 1 +; CHECK-LLVM-NEXT: %14 = extractelement <8 x i8> %11, i32 2 +; CHECK-LLVM-NEXT: %15 = extractelement <8 x i8> %11, i32 3 +; CHECK-LLVM-NEXT: %16 = extractelement <8 x i8> %11, i32 4 +; CHECK-LLVM-NEXT: %17 = extractelement <8 x i8> %11, i32 5 +; CHECK-LLVM-NEXT: %18 = extractelement <8 x i8> %11, i32 6 +; CHECK-LLVM-NEXT: %19 = extractelement <8 x i8> %11, i32 7 +; CHECK-LLVM-NEXT: %20 = insertelement <16 x i8> undef, i8 %2, i32 0 +; CHECK-LLVM-NEXT: %21 = insertelement <16 x i8> %20, i8 %3, i32 1 +; CHECK-LLVM-NEXT: %22 = insertelement <16 x i8> %21, i8 %4, i32 2 +; CHECK-LLVM-NEXT: %23 = insertelement <16 x i8> %22, i8 %5, i32 3 +; CHECK-LLVM-NEXT: %24 = insertelement <16 x i8> %23, i8 %6, i32 4 +; CHECK-LLVM-NEXT: %25 = insertelement <16 x i8> %24, i8 %7, i32 5 +; CHECK-LLVM-NEXT: %26 = insertelement <16 x i8> %25, i8 %8, i32 6 +; CHECK-LLVM-NEXT: %27 = insertelement <16 x i8> %26, i8 %9, i32 7 +; CHECK-LLVM-NEXT: %28 = insertelement <16 x i8> %27, i8 %12, i32 8 +; CHECK-LLVM-NEXT: %29 = insertelement <16 x i8> %28, i8 %13, i32 9 +; CHECK-LLVM-NEXT: %30 = insertelement <16 x i8> %29, i8 %14, i32 10 +; CHECK-LLVM-NEXT: %31 = insertelement <16 x i8> %30, i8 %15, i32 11 +; CHECK-LLVM-NEXT: %32 = insertelement <16 x i8> %31, i8 %16, i32 12 +; CHECK-LLVM-NEXT: %33 = insertelement <16 x i8> %32, i8 %17, i32 13 +; CHECK-LLVM-NEXT: %34 = insertelement <16 x i8> %33, i8 %18, i32 14 +; CHECK-LLVM-NEXT: %35 = insertelement <16 x i8> %34, i8 %19, i32 15 +; CHECK-LLVM-NEXT: store <16 x i8> %35, <16 x i8>* %Funcs, align 16 +; CHECK-LLVM-NEXT: ret void +; CHECK-LLVM-NEXT: } + +; RUN: llvm-as < %s | llvm-spirv -spirv-text | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV: 4 Name [[Funcs:[0-9]+]] "Funcs" + +; CHECK-SPIRV-DAG: 4 TypeInt [[TypeInt8:[0-9]+]] 8 0 +; CHECK-SPIRV-DAG: 4 TypeInt [[TypeInt32:[0-9]+]] 32 0 +; CHECK-SPIRV-DAG: 4 TypeInt [[TypeInt64:[0-9]+]] 64 0 +; CHECK-SPIRV-DAG: 4 TypeVector [[TypeVec16:[0-9]+]] [[TypeInt8]] 16 +; CHECK-SPIRV-DAG: 4 TypeVector [[TypeVec8:[0-9]+]] [[TypeInt8]] 8 +; CHECK-SPIRV-DAG: 4 TypePointer [[StorePtr:[0-9]+]] 7 [[TypeVec16]] +; CHECK-SPIRV-DAG: 3 Undef [[TypeVec16]] [[TypeUndef:[0-9]+]] + +; CHECK-SPIRV: 4 ConvertPtrToU [[TypeInt64]] [[Ptr1:[0-9]+]] {{[0-9]+}} +; CHECK-SPIRV-NEXT: 4 Bitcast [[TypeVec8]] [[Vec1:[0-9]+]] [[Ptr1]] +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v00:[0-9]+]] [[Vec1]] 0 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v01:[0-9]+]] [[Vec1]] 1 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v02:[0-9]+]] [[Vec1]] 2 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v03:[0-9]+]] [[Vec1]] 3 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v04:[0-9]+]] [[Vec1]] 4 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v05:[0-9]+]] [[Vec1]] 5 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v06:[0-9]+]] [[Vec1]] 6 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v07:[0-9]+]] [[Vec1]] 7 +; CHECK-SPIRV-NEXT: 4 ConvertPtrToU [[TypeInt64]] [[Ptr2:[0-9]+]] {{[0-9]+}} +; CHECK-SPIRV-NEXT: 4 Bitcast [[TypeVec8]] [[Vec2:[0-9]+]] [[Ptr2]] +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v10:[0-9]+]] [[Vec2]] 0 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v11:[0-9]+]] [[Vec2]] 1 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v12:[0-9]+]] [[Vec2]] 2 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v13:[0-9]+]] [[Vec2]] 3 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v14:[0-9]+]] [[Vec2]] 4 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v15:[0-9]+]] [[Vec2]] 5 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v16:[0-9]+]] [[Vec2]] 6 +; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v17:[0-9]+]] [[Vec2]] 7 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec0:[0-9]+]] [[v00]] [[TypeUndef]] 0 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec1:[0-9]+]] [[v01]] [[NewVec0]] 1 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec2:[0-9]+]] [[v02]] [[NewVec1]] 2 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec3:[0-9]+]] [[v03]] [[NewVec2]] 3 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec4:[0-9]+]] [[v04]] [[NewVec3]] 4 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec5:[0-9]+]] [[v05]] [[NewVec4]] 5 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec6:[0-9]+]] [[v06]] [[NewVec5]] 6 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec7:[0-9]+]] [[v07]] [[NewVec6]] 7 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec8:[0-9]+]] [[v10]] [[NewVec7]] 8 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec9:[0-9]+]] [[v11]] [[NewVec8]] 9 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec10:[0-9]+]] [[v12]] [[NewVec9]] 10 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec11:[0-9]+]] [[v13]] [[NewVec10]] 11 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec12:[0-9]+]] [[v14]] [[NewVec11]] 12 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec13:[0-9]+]] [[v15]] [[NewVec12]] 13 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec14:[0-9]+]] [[v16]] [[NewVec13]] 14 +; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec15:[0-9]+]] [[v17]] [[NewVec14]] 15 +; CHECK-SPIRV-NEXT: 5 Store [[Funcs]] [[NewVec15]] [[TypeInt32]] [[StorePtr]] + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; Function Attrs: noinline norecurse nounwind readnone +define internal i32 @_Z2f1u2CMvb32_j(i32 %x) { +entry: + ret i32 %x +} +; Function Attrs: noinline norecurse nounwind readnone +define internal i32 @_Z2f2u2CMvb32_j(i32 %x) { +entry: + ret i32 %x +} +; Function Attrs: noinline nounwind +define dllexport void @vadd() { +entry: + %Funcs = alloca <16 x i8>, align 16 + store <16 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 7), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 7)>, <16 x i8>* %Funcs, align 16 + ret void +} diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 755dd560a6f1d..fd3e6f53d70b1 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -672,16 +672,38 @@ option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default include(HandleLLVMOptions) -include(FindPythonInterp) -if( NOT PYTHONINTERP_FOUND ) - message(FATAL_ERROR -"Unable to find Python interpreter, required for builds and testing. +if(CMAKE_VERSION VERSION_LESS 3.12) + include(FindPythonInterp) + if( NOT PYTHONINTERP_FOUND ) + message(FATAL_ERROR + "Unable to find Python interpreter, required for builds and testing. -Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") -endif() + Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") + endif() + + if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 ) + message(FATAL_ERROR "Python 2.7 or newer is required") + endif() -if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 ) - message(FATAL_ERROR "Python 2.7 or newer is required") + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) + set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) +else() + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") + endif() + + # Treat python2 as python3 + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) + endif() endif() ###### @@ -717,7 +739,7 @@ endif (LLVM_USE_PERF) message(STATUS "Constructing LLVMBuild project information") execute_process( - COMMAND ${PYTHON_EXECUTABLE} -B ${LLVMBUILDTOOL} + COMMAND "${Python3_EXECUTABLE}" -B ${LLVMBUILDTOOL} --native-target "${LLVM_NATIVE_ARCH}" --enable-targets "${LLVM_TARGETS_TO_BUILD}" --enable-optional-components "${LLVMOPTIONALCOMPONENTS}" diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index d9624c42da666..777cfd628a846 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -639,7 +639,7 @@ function(find_python_module module) return() endif() - execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "import ${module}" + execute_process(COMMAND "${Python3_EXECUTABLE}" "-c" "import ${module}" RESULT_VARIABLE status ERROR_QUIET) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index ea4967dc38d33..b4bc15e16e8e1 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -117,7 +117,7 @@ function(add_llvm_symbol_exports target_name export_file) set(native_export_file "${target_name}.def") add_custom_command(OUTPUT ${native_export_file} - COMMAND ${PYTHON_EXECUTABLE} -c "import sys;print(''.join(['EXPORTS\\n']+sys.stdin.readlines(),))" + COMMAND "${Python3_EXECUTABLE}" -c "import sys;print(''.join(['EXPORTS\\n']+sys.stdin.readlines(),))" < ${export_file} > ${native_export_file} DEPENDS ${export_file} VERBATIM @@ -467,6 +467,21 @@ function(llvm_add_library name) if(ARG_DEPENDS) add_dependencies(${obj_name} ${ARG_DEPENDS}) endif() + # Treat link libraries like PUBLIC dependencies. LINK_LIBS might + # result in generating header files. Add a dependendency so that + # the generated header is created before this object library. + if(ARG_LINK_LIBS) + cmake_parse_arguments(LINK_LIBS_ARG + "" + "" + "PUBLIC;PRIVATE" + ${ARG_LINK_LIBS}) + foreach(link_lib ${LINK_LIBS_ARG_PUBLIC}) + if(TARGET ${link_lib}) + add_dependencies(${obj_name} ${link_lib}) + endif() + endforeach() + endif() endif() if(ARG_SHARED AND ARG_STATIC) @@ -873,11 +888,17 @@ function(add_llvm_pass_plugin name) endif() option(LLVM_${name_upper}_LINK_INTO_TOOLS "Statically link ${name} into tools (if available)" ${link_into_tools_default}) + # If we statically link the plugin, don't use llvm dylib because we're going + # to be part of it. + if(LLVM_${name_upper}_LINK_INTO_TOOLS) + list(APPEND ARG_UNPARSED_ARGUMENTS DISABLE_LLVM_LINK_LLVM_DYLIB) + endif() + if(LLVM_${name_upper}_LINK_INTO_TOOLS) list(REMOVE_ITEM ARG_UNPARSED_ARGUMENTS BUILDTREE_ONLY) # process_llvm_pass_plugins takes care of the actual linking, just create an # object library as of now - add_llvm_component_library(${name} OBJECT ${ARG_UNPARSED_ARGUMENTS}) + add_llvm_library(${name} OBJECT ${ARG_UNPARSED_ARGUMENTS}) target_compile_definitions(${name} PRIVATE LLVM_${name_upper}_LINK_INTO_TOOLS) set_property(TARGET ${name} APPEND PROPERTY COMPILE_DEFINITIONS LLVM_LINK_INTO_TOOLS) if (TARGET intrinsics_gen) @@ -971,12 +992,12 @@ function(process_llvm_pass_plugins) const char* Name;\n\ const char* RequiredLibraries[1 + 1 + ${llvm_plugin_max_deps_length}];\n\ };\n\ - std::array AvailableExtensions{\n") + std::array AvailableExtensions{\n") foreach(llvm_extension ${LLVM_STATIC_EXTENSIONS}) get_property(llvm_plugin_deps TARGET ${llvm_extension} PROPERTY LINK_LIBRARIES) - file(APPEND "${ExtensionDeps}.tmp" "{\"${llvm_extension}\", {") + file(APPEND "${ExtensionDeps}.tmp" "{{\"${llvm_extension}\", {") foreach(llvm_plugin_dep ${llvm_plugin_deps}) # Turn library dependency back to component name, if possible. # That way llvm-config can avoid redundant dependencies. @@ -990,7 +1011,7 @@ function(process_llvm_pass_plugins) endforeach() # Self + mandatory trailing null, because the number of RequiredLibraries differs between extensions. - file(APPEND "${ExtensionDeps}.tmp" \"${llvm_extension}\", "nullptr}},\n") + file(APPEND "${ExtensionDeps}.tmp" \"${llvm_extension}\", "nullptr}}},\n") endforeach() file(APPEND "${ExtensionDeps}.tmp" "};\n") @@ -1046,7 +1067,7 @@ function(export_executable_symbols target) set(mangling itanium) endif() add_custom_command(OUTPUT ${exported_symbol_file} - COMMAND ${PYTHON_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/extract_symbols.py --mangling=${mangling} ${static_libs} -o ${exported_symbol_file} + COMMAND "${Python3_EXECUTABLE}" ${LLVM_MAIN_SRC_DIR}/utils/extract_symbols.py --mangling=${mangling} ${static_libs} -o ${exported_symbol_file} WORKING_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR} DEPENDS ${LLVM_MAIN_SRC_DIR}/utils/extract_symbols.py ${static_libs} VERBATIM @@ -1447,7 +1468,7 @@ function(make_paths_relative out_pathlist basedir pathlist) # empty list entries. So escape the ;s in the list and do the splitting # ourselves. cmake has no relpath function, so use Python for that. string(REPLACE ";" "\\;" pathlist_escaped "${pathlist}") - execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "\n + execute_process(COMMAND "${Python3_EXECUTABLE}" "-c" "\n import os, sys\n base = sys.argv[1] def haslink(p):\n @@ -1522,7 +1543,6 @@ function(configure_lit_site_cfg site_in site_out) # SHLIBDIR points the build tree. string(REPLACE "${CMAKE_CFG_INTDIR}" "${LLVM_BUILD_MODE}" SHLIBDIR "${LLVM_SHLIB_OUTPUT_INTDIR}") - set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE}) # FIXME: "ENABLE_SHARED" doesn't make sense, since it is used just for # plugins. We may rename it. if(LLVM_ENABLE_PLUGINS) @@ -1680,7 +1700,7 @@ function(add_lit_target target comment) ALLOW_EXTERNAL ) - set(LIT_COMMAND "${PYTHON_EXECUTABLE};${lit_base_dir}/${lit_file_name}") + set(LIT_COMMAND "${Python3_EXECUTABLE};${lit_base_dir}/${lit_file_name}") list(APPEND LIT_COMMAND ${LIT_ARGS}) foreach(param ${ARG_PARAMS}) list(APPEND LIT_COMMAND --param ${param}) diff --git a/llvm/cmake/modules/CMakeLists.txt b/llvm/cmake/modules/CMakeLists.txt index 4bb03b63122bb..1cef87a5e18de 100644 --- a/llvm/cmake/modules/CMakeLists.txt +++ b/llvm/cmake/modules/CMakeLists.txt @@ -56,7 +56,11 @@ set(LLVM_CONFIG_TOOLS_BINARY_DIR "${LLVM_TOOLS_BINARY_DIR}") # Generate a default location for lit if (LLVM_BUILD_UTILS) - set(LLVM_CONFIG_DEFAULT_EXTERNAL_LIT "${LLVM_CONFIG_TOOLS_BINARY_DIR}/llvm-lit") + if (CMAKE_HOST_WIN32 AND NOT CYGWIN) + set(LLVM_CONFIG_DEFAULT_EXTERNAL_LIT "${LLVM_CONFIG_TOOLS_BINARY_DIR}/llvm-lit.py") + else() + set(LLVM_CONFIG_DEFAULT_EXTERNAL_LIT "${LLVM_CONFIG_TOOLS_BINARY_DIR}/llvm-lit") + endif() endif() if (LLVM_LINK_LLVM_DYLIB) diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake index 8a6e880c4e213..01cd37124841e 100644 --- a/llvm/cmake/modules/CrossCompile.cmake +++ b/llvm/cmake/modules/CrossCompile.cmake @@ -6,7 +6,7 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype) if(NOT DEFINED ${project_name}_${target_name}_BUILD) set(${project_name}_${target_name}_BUILD - "${CMAKE_BINARY_DIR}/${target_name}") + "${CMAKE_CURRENT_BINARY_DIR}/${target_name}") set(${project_name}_${target_name}_BUILD ${${project_name}_${target_name}_BUILD} PARENT_SCOPE) message(STATUS "Setting native build dir to " ${${project_name}_${target_name}_BUILD}) @@ -68,7 +68,7 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype) add_custom_command(OUTPUT ${${project_name}_${target_name}_BUILD}/CMakeCache.txt COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" -DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}" - ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_SOURCE_DIR} + ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_CURRENT_SOURCE_DIR} ${CROSS_TOOLCHAIN_FLAGS_${project_name}_${target_name}} -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE -DLLVM_TARGETS_TO_BUILD="${targets_to_build_arg}" @@ -99,17 +99,17 @@ function(build_native_tool target output_path_var) cmake_parse_arguments(ARG "" "" "DEPENDS" ${ARGN}) if(CMAKE_CONFIGURATION_TYPES) - set(output_path "${${CMAKE_PROJECT_NAME}_NATIVE_BUILD}/Release/bin/${target}") + set(output_path "${${PROJECT_NAME}_NATIVE_BUILD}/Release/bin/${target}") else() - set(output_path "${${CMAKE_PROJECT_NAME}_NATIVE_BUILD}/bin/${target}") + set(output_path "${${PROJECT_NAME}_NATIVE_BUILD}/bin/${target}") endif() - llvm_ExternalProject_BuildCmd(build_cmd ${target} ${${CMAKE_PROJECT_NAME}_NATIVE_BUILD} + llvm_ExternalProject_BuildCmd(build_cmd ${target} ${${PROJECT_NAME}_NATIVE_BUILD} CONFIGURATION Release) add_custom_command(OUTPUT "${output_path}" COMMAND ${build_cmd} - DEPENDS CONFIGURE_${CMAKE_PROJECT_NAME}_NATIVE ${ARG_DEPENDS} - WORKING_DIRECTORY "${${CMAKE_PROJECT_NAME}_NATIVE_BUILD}" + DEPENDS CONFIGURE_${PROJECT_NAME}_NATIVE ${ARG_DEPENDS} + WORKING_DIRECTORY "${${PROJECT_NAME}_NATIVE_BUILD}" COMMENT "Building native ${target}..." USES_TERMINAL) set(${output_path_var} "${output_path}" PARENT_SCOPE) diff --git a/llvm/cmake/modules/FindGRPC.cmake b/llvm/cmake/modules/FindGRPC.cmake index b703566962987..a5f4b5b5bb14e 100644 --- a/llvm/cmake/modules/FindGRPC.cmake +++ b/llvm/cmake/modules/FindGRPC.cmake @@ -45,6 +45,7 @@ function(generate_grpc_protos LibraryName ProtoFile) "${ProtoSourceAbsolutePath}" DEPENDS "${ProtoSourceAbsolutePath}") - add_library(${LibraryName} ${GeneratedProtoSource} ${GeneratedGRPCSource}) - target_link_libraries(${LibraryName} grpc++ protobuf) + add_clang_library(${LibraryName} ${GeneratedProtoSource} ${GeneratedGRPCSource} + PARTIAL_SOURCES_INTENDED + LINK_LIBS grpc++ protobuf) endfunction() diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 91133d00782d2..b76300d8d2724 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -886,6 +886,27 @@ if (LLVM_BUILD_INSTRUMENTED) endif() endif() +# When using clang-cl with an instrumentation-based tool, add clang's library +# resource directory to the library search path. Because cmake invokes the +# linker directly, it isn't sufficient to pass -fsanitize=* to the linker. +if (CLANG_CL AND (LLVM_BUILD_INSTRUMENTED OR LLVM_USE_SANITIZER)) + execute_process( + COMMAND ${CMAKE_CXX_COMPILER} /clang:-print-resource-dir + OUTPUT_VARIABLE clang_resource_dir + ERROR_VARIABLE clang_cl_stderr + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE clang_cl_exit_code) + if (NOT "${clang_cl_exit_code}" STREQUAL "0") + message(FATAL_ERROR + "Unable to invoke clang-cl to find resource dir: ${clang_cl_stderr}") + endif() + file(TO_CMAKE_PATH "${clang_resource_dir}" clang_resource_dir) + append("/libpath:${clang_resource_dir}/lib/windows" + CMAKE_EXE_LINKER_FLAGS + CMAKE_SHARED_LINKER_FLAGS) +endif() + if(LLVM_PROFDATA_FILE AND EXISTS ${LLVM_PROFDATA_FILE}) if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) append("-fprofile-instr-use=\"${LLVM_PROFDATA_FILE}\"" diff --git a/llvm/cmake/modules/LLVMProcessSources.cmake b/llvm/cmake/modules/LLVMProcessSources.cmake index d0be0e8b3ba3d..ba8dca313c86f 100644 --- a/llvm/cmake/modules/LLVMProcessSources.cmake +++ b/llvm/cmake/modules/LLVMProcessSources.cmake @@ -57,10 +57,12 @@ endfunction(find_all_header_files) function(llvm_process_sources OUT_VAR) - cmake_parse_arguments(ARG "" "" "ADDITIONAL_HEADERS;ADDITIONAL_HEADER_DIRS" ${ARGN}) + cmake_parse_arguments(ARG "PARTIAL_SOURCES_INTENDED" "" "ADDITIONAL_HEADERS;ADDITIONAL_HEADER_DIRS" ${ARGN}) set(sources ${ARG_UNPARSED_ARGUMENTS}) - llvm_check_source_file_list( ${sources} ) - + if (NOT ARG_PARTIAL_SOURCES_INTENDED) + llvm_check_source_file_list(${sources}) + endif() + # This adds .td and .h files to the Visual Studio solution: add_td_sources(sources) find_all_header_files(hdrs "${ARG_ADDITIONAL_HEADER_DIRS}") diff --git a/llvm/docs/AMDGPUDwarfProposalForHeterogeneousDebugging.rst b/llvm/docs/AMDGPUDwarfProposalForHeterogeneousDebugging.rst index 537359fec55c2..e9094935e0db8 100644 --- a/llvm/docs/AMDGPUDwarfProposalForHeterogeneousDebugging.rst +++ b/llvm/docs/AMDGPUDwarfProposalForHeterogeneousDebugging.rst @@ -1,20 +1,20 @@ .. _amdgpu-dwarf-6-proposal-for-heterogeneous-debugging: -==================================================== +**************************************************** DWARF Version 6 Proposal For Heterogeneous Debugging -==================================================== +**************************************************** .. contents:: :local: .. warning:: - This section describes a **provisional proposal** for DWARF Version 6 + This document describes a **provisional proposal** for DWARF Version 6 [:ref:`DWARF `] to support heterogeneous debugging. It is not currently fully implemented and is subject to change. Introduction ------------- +============ This document proposes a set of backwards compatible extensions to DWARF Version 5 [:ref:`DWARF `] for consideration of inclusion into a @@ -100,7 +100,7 @@ denote the current lane, much like ``DW_OP_push_object_address`` denotes the current object. The ``DW_OP_*piece`` operations only allow literal indices. Therefore, a way to use a computed offset of an arbitrary location description (such as a vector register) is required. See ``DW_OP_LLVM_push_lane``, -``DW_OP_LLVM_offset``, ``DW_OP_LLVM_offset_constu``, and +``DW_OP_LLVM_offset``, ``DW_OP_LLVM_offset_uconst``, and ``DW_OP_LLVM_bit_offset``. If the source language is mapped onto the AMDGPU wavefronts in a SIMT manner @@ -173,16 +173,26 @@ allowing current DWARF expressions to remain legal. See The ``DW_OP_plus`` and ``DW_OP_minus`` can be defined to operate on a memory location description in the default target architecture specific address space -and a generic type value to produce an updated memory location description. -This allows them to continue to be used to offset an address. To generalize +and a generic type value to produce an updated memory location description. This +allows them to continue to be used to offset an address. To generalize offsetting to any location description, including location descriptions that -describe when bytes are in registers, are implicit, or a composite of these, -the ``DW_OP_LLVM_offset``, ``DW_OP_LLVM_offset_constu`` and -``DW_OP_LLVM_bit_offset`` operations are added. These do not perform wrapping -which would be hard to define for location descriptions of non-memory kinds. -This allows ``DW_OP_push_object_address`` to push a location description that -may be in a register, or be an implicit value, and the DWARF expression of -``DW_TAG_ptr_to_member_type`` can contain ``DW_OP_LLVM_offset`` to offset +describe when bytes are in registers, are implicit, or a composite of these, the +``DW_OP_LLVM_offset``, ``DW_OP_LLVM_offset_uconst``, and +``DW_OP_LLVM_bit_offset`` offset operations are added. Unlike ``DW_OP_plus``, +``DW_OP_plus_uconst``, and ``DW_OP_minus`` arithmetic operations, these do not +define that integer overflow causes wrap-around. The offset operations can +operate on location storage of any size. For example, implicit location storage +could be any number of bits in size. It is simpler to define offsets that exceed +the size of the location storage as being ill-formed, than having to force an +implementation to support potentially infinite precision offsets to allow it to +correctly track a series of positive and negative offsets that may transiently +overflow or underflow, but end up in range. This is simple for the arithmetic +operations as they are defined in terms of two's compliment arithmetic on a base +type of a fixed size. + +Having the offset operations allows ``DW_OP_push_object_address`` to push a +location description that may be in a register, or be an implicit value, and the +DWARF expression of ``DW_TAG_ptr_to_member_type`` can contain them to offset within it. ``DW_OP_LLVM_bit_offset`` generalizes DWARF to work with bit fields which is not possible in DWARF Version 5. @@ -340,10 +350,17 @@ implemented as an LLVM vendor extension to DWARF Version 5. If accepted these names would not include the "\ ``LLVM``\ " and would not use encodings in the vendor range. -The proposal is organized to follow the section ordering of DWARF Version 5. -It includes notes to indicate the corresponding DWARF Version 5 sections to -which they pertain. Other notes describe additional changes that may be worth -considering, and to raise questions. +The proposal is described in +:ref:`amdgpu-dwarf-proposed-changes-relative-to-dwarf-version-5` and is +organized to follow the section ordering of DWARF Version 5. It includes notes +to indicate the corresponding DWARF Version 5 sections to which they pertain. +Other notes describe additional changes that may be worth considering, and to +raise questions. + +.. _amdgpu-dwarf-proposed-changes-relative-to-dwarf-version-5: + +Proposed Changes Relative to DWARF Version 5 +============================================ General Description ------------------- @@ -648,7 +665,9 @@ stack assume that the top of the stack (most recently added entry) has index 0. They allow the stack entries to be either a value or location description. If any stack entry accessed by a stack operation is an incomplete composite -location description, then the DWARF expression is ill-formed. +location description (see +:ref:`amdgpu-dwarf-composite-location-description-operations`), then the DWARF +expression is ill-formed. .. note:: @@ -753,14 +772,21 @@ expression. unsigned offset, respectively, of a debugging information entry D in the current compilation unit. - ``DW_OP_LLVM_call_ref`` has one operand that is a 4-byte unsigned value in - the 32-bit DWARF format, or an 8-byte unsigned value in the 64-bit DWARF - format, that represents an offset of a debugging information entry D in a + ``DW_OP_call_ref`` has one operand that is a 4-byte unsigned value in the + 32-bit DWARF format, or an 8-byte unsigned value in the 64-bit DWARF format, + that represents an offset of a debugging information entry D in a ``.debug_info`` section, which may be contained in an executable or shared - object file other than that containing the operation. For references from one - executable or shared object file to another, the relocation must be + object file other than that containing the operation. For references from + one executable or shared object file to another, the relocation must be performed by the consumer. + .. note: + + It is unclear how crossing from one executable or shared object file to + another can work. How would a consumer know which executable or shared + object file is being referenced? In an ELF file the DWARF is in a + non-ALLOC segment so standard dynamic relocations cannot be used. + *Operand interpretation of* ``DW_OP_call2``\ *,* ``DW_OP_call4``\ *, and* ``DW_OP_call_ref`` *is exactly like that for* ``DW_FORM_ref2``\ *, ``DW_FORM_ref4``\ *, and* ``DW_FORM_ref_addr``\ *, respectively.* @@ -1161,7 +1187,7 @@ There are these special value operations currently defined: on the stack with the generic type. *This operation is deprecated as the* ``DW_OP_LLVM_form_aspace_address`` - operation can be used and provides greater expressiveness.* + *operation can be used and provides greater expressiveness.* 6. ``DW_OP_xderef_size`` *Deprecated* @@ -1177,7 +1203,7 @@ There are these special value operations currently defined: value V retrieved is left on the stack with the generic type. *This operation is deprecated as the* ``DW_OP_LLVM_form_aspace_address`` - operation can be used and provides greater expressiveness.* + *operation can be used and provides greater expressiveness.* 7. ``DW_OP_xderef_type`` *Deprecated* @@ -1195,7 +1221,7 @@ There are these special value operations currently defined: retrieved is left on the stack with the type D. *This operation is deprecated as the* ``DW_OP_LLVM_form_aspace_address`` - operation can be used and provides greater expressiveness.* + *operation can be used and provides greater expressiveness.* 8. ``DW_OP_entry_value`` *Deprecated* @@ -1304,9 +1330,9 @@ General Location Description Operations to the size of the location storage specified by SL, then the DWARF expression is ill-formed. -2. ``DW_OP_LLVM_offset_constu`` *New* +2. ``DW_OP_LLVM_offset_uconst`` *New* - ``DW_OP_LLVM_offset_constu`` has a single unsigned LEB128 integer operand + ``DW_OP_LLVM_offset_uconst`` has a single unsigned LEB128 integer operand that represents a byte displacement B. The operation is equivalent to performing ``DW_OP_constu B; @@ -1316,6 +1342,12 @@ General Location Description Operations displacements in two bytes than can be done with* ``DW_OP_lit*; DW_OP_LLVM_offset``\ *.* + .. note:: + + Should this be named ``DW_OP_LLVM_offset_uconst`` to match + ``DW_OP_plus_uconst``, or ``DW_OP_LLVM_offset_constu`` to match + ``DW_OP_constu``? + 3. ``DW_OP_LLVM_bit_offset`` *New* ``DW_OP_LLVM_bit_offset`` pops two stack entries. The first must be an @@ -1582,7 +1614,7 @@ type. entry corresponding to the current subprogram as described in :ref:`amdgpu-dwarf-debugging-information-entry-attributes`. - The location description L is updated as if the ``DW_OP_LLVM_offset_constu + The location description L is updated as if the ``DW_OP_LLVM_offset_uconst B`` operation was applied. The updated L is pushed on the stack. 7. ``DW_OP_breg0``, ``DW_OP_breg1``, ..., ``DW_OP_breg31`` @@ -1818,7 +1850,7 @@ implicit storage value starting at the bit offset. * Otherwise the DWARF expression is ill-formed. - The bit offset of RL is updated as if the ``DW_OP_LLVM_offset_constu B`` + The bit offset of RL is updated as if the ``DW_OP_LLVM_offset_uconst B`` operation was applied. If a ``DW_OP_stack_value`` operation pops a value that is the same as IPV, @@ -1868,6 +1900,8 @@ reconstruct the value of the object when asked to dereference the pointer described by E*\ :sub:`1` *which contains the* ``DW_OP_implicit_pointer`` or ``DW_OP_LLVM_aspace_implicit_pointer`` *operation.* +.. _amdgpu-dwarf-composite-location-description-operations: + Composite Location Description Operations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2773,7 +2807,7 @@ Debugging Information Entry Attributes The format for the augmentation string is: - | ``[``\ *vendor*\ ``v``\ *X*\ ``.``\ *Y*\ [\ ``:``\ *options*\ ]\ ``]``\ * + | ``[``\ *vendor*\ ``:v``\ *X*\ ``.``\ *Y*\ [\ ``:``\ *options*\ ]\ ``]``\ * Where *vendor* is the producer, ``vX.Y`` specifies the major X and minor Y version number of the extensions used, and *options* is an optional string @@ -2861,7 +2895,7 @@ A null-terminated UTF-8 vendor specific augmentation string, which provides additional information about the contents of this index. If provided, the recommended format for augmentation string is: - | ``[``\ *vendor*\ ``v``\ *X*\ ``.``\ *Y*\ [\ ``:``\ *options*\ ]\ ``]``\ * + | ``[``\ *vendor*\ ``:v``\ *X*\ ``.``\ *Y*\ [\ ``:``\ *options*\ ]\ ``]``\ * Where *vendor* is the producer, ``vX.Y`` specifies the major X and minor Y version number of the extensions used in the DWARF of the compilation unit, and @@ -3089,7 +3123,7 @@ Description Entries. There is at least one CIE in every non-empty The recommended format for the augmentation string is: - | ``[``\ *vendor*\ ``v``\ *X*\ ``.``\ *Y*\ [\ ``:``\ *options*\ ]\ ``]``\ * + | ``[``\ *vendor*\ ``:v``\ *X*\ ``.``\ *Y*\ [\ ``:``\ *options*\ ]\ ``]``\ * Where *vendor* is the producer, ``vX.Y`` specifies the major X and minor Y version number of the extensions used, and *options* is an optional string @@ -3541,15 +3575,15 @@ entry attributes. .. table:: Attribute encodings :name: amdgpu-dwarf-attribute-encodings-table - ================================== ===== ==================================== - Attribute Name Value Classes - ================================== ===== ==================================== - DW_AT_LLVM_active_lane *TBD* exprloc, loclist - DW_AT_LLVM_augmentation *TBD* string - DW_AT_LLVM_lanes *TBD* constant - DW_AT_LLVM_lane_pc *TBD* exprloc, loclist - DW_AT_LLVM_vector_size *TBD* constant - ================================== ===== ==================================== + ================================== ====== =================================== + Attribute Name Value Classes + ================================== ====== =================================== + DW_AT_LLVM_active_lane 0x3e08 exprloc, loclist + DW_AT_LLVM_augmentation 0x3e09 string + DW_AT_LLVM_lanes 0x3e0a constant + DW_AT_LLVM_lane_pc 0x3e0b exprloc, loclist + DW_AT_LLVM_vector_size 0x3e0c constant + ================================== ====== =================================== DWARF Expressions ~~~~~~~~~~~~~~~~~ @@ -3583,7 +3617,7 @@ operations. DW_OP_LLVM_form_aspace_address 0xe1 0 DW_OP_LLVM_push_lane 0xe2 0 DW_OP_LLVM_offset 0xe3 0 - DW_OP_LLVM_offset_constu 0xe4 1 ULEB128 byte displacement + DW_OP_LLVM_offset_uconst 0xe4 1 ULEB128 byte displacement DW_OP_LLVM_bit_offset 0xe5 0 DW_OP_LLVM_call_frame_entry_reg 0xe6 1 ULEB128 register number DW_OP_LLVM_undefined 0xe7 0 @@ -3686,8 +3720,8 @@ instructions. Instruction High 2 Low 6 Operand 1 Operand 2 Operand 3 Bits Bits ======================== ====== ====== ================ ================ ================ - DW_CFA_def_aspace_cfa 0 0x2f ULEB128 register ULEB128 offset ULEB128 address space - DW_CFA_def_aspace_cfa_sf 0 0x30 ULEB128 register SLEB128 offset ULEB128 address space + DW_CFA_def_aspace_cfa 0 0x30 ULEB128 register ULEB128 offset ULEB128 address space + DW_CFA_def_aspace_cfa_sf 0 0x31 ULEB128 register SLEB128 offset ULEB128 address space ======================== ====== ====== ================ ================ ================ Attributes by Tag Value (Informative) @@ -3780,4 +3814,4 @@ References .. _amdgpu-dwarf-SEMVER: -15. [SEMVER] `Semantic Versioning `__ \ No newline at end of file +15. [SEMVER] `Semantic Versioning `__ diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index a58bc32dba0e1..01e0c65f98b8d 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -6507,11 +6507,27 @@ On exit from a function: * FLAT_SCRATCH * EXEC * GFX6-8: M0 - * All SGPR and VGPR registers except the clobbered registers of SGPR4-31 and - VGPR0-31. + * All SGPR registers except the clobbered registers of SGPR4-31. + * VGPR40-47 + VGPR56-63 + VGPR72-79 + VGPR88-95 + VGPR104-111 + VGPR120-127 + VGPR136-143 + VGPR152-159 + VGPR168-175 + VGPR184-191 + VGPR200-207 + VGPR216-223 + VGPR232-239 + VGPR248-255 + *Except the argument registers, the VGPR cloberred and the preserved + registers are intermixed at regular intervals in order to + get a better occupancy.* For the AMDGPU backend, an inter-procedural register allocation (IPRA) - optimization may mark some of clobbered SGPR4-31 and VGPR0-31 registers as + optimization may mark some of clobbered SGPR and VGPR registers as preserved if it can be determined that the called function does not change their value. diff --git a/llvm/docs/CommandGuide/FileCheck.rst b/llvm/docs/CommandGuide/FileCheck.rst index 49db57161810d..55fc3f0cabfe5 100644 --- a/llvm/docs/CommandGuide/FileCheck.rst +++ b/llvm/docs/CommandGuide/FileCheck.rst @@ -634,7 +634,8 @@ For example: ; CHECK: load r[[#REG:]], [r0] ; CHECK: load r[[#REG+1]], [r1] - ; CHECK: Loading from 0x[[#%x,ADDR:] to 0x[[#ADDR + 7]] + ; CHECK: Loading from 0x[[#%x,ADDR:]] + ; CHECK-SAME: to 0x[[#ADDR + 7]] The above example would match the text: diff --git a/llvm/docs/CommandGuide/llvm-cxxfilt.rst b/llvm/docs/CommandGuide/llvm-cxxfilt.rst index 5d257b98895e2..b50252aea1e1b 100644 --- a/llvm/docs/CommandGuide/llvm-cxxfilt.rst +++ b/llvm/docs/CommandGuide/llvm-cxxfilt.rst @@ -46,16 +46,21 @@ OPTIONS .. option:: --help, -h - Print a summary of command line options. + Print a summary of command line options. .. option:: --help-list - Print an uncategorized summary of command line options. + Print an uncategorized summary of command line options. + +.. option:: --no-strip-underscore, -n + + Do not strip a leading underscore. This is the default for all platforms + except Mach-O based hosts. .. option:: --strip-underscore, -_ - Discard a single leading underscore, if present, from each input name before - demangling. + Strip a single leading underscore, if present, from each input name before + demangling. On by default on Mach-O based platforms. .. option:: --types, -t diff --git a/llvm/docs/CommandGuide/llvm-dwarfdump.rst b/llvm/docs/CommandGuide/llvm-dwarfdump.rst index 840c93345d296..fcd22299ec080 100644 --- a/llvm/docs/CommandGuide/llvm-dwarfdump.rst +++ b/llvm/docs/CommandGuide/llvm-dwarfdump.rst @@ -112,7 +112,8 @@ OPTIONS .. option:: --statistics Collect debug info quality metrics and print the results - as machine-readable single-line JSON output. + as machine-readable single-line JSON output. The output + format is described in the section below (:ref:`stats-format`). .. option:: --summarize-types @@ -162,6 +163,30 @@ OPTIONS Read command-line options from ``. +.. _stats-format: + +FORMAT OF STATISTICS OUTPUT +--------------------------- + +The ::option:`--statistics` option generates single-line JSON output +representing quality metrics of the processed debug info. These metrics are +useful to compare changes between two compilers, particularly for judging +the effect that a change to the compiler has on the debug info quality. + +The output is formatted as key-value pairs. The first pair contains a version +number. The following naming scheme is used for the keys: + + - `variables` ==> local variables and parameters + - `local vars` ==> local variables + - `params` ==> formal parameters + +For aggregated values, the following keys are used: + + - `sum_of_all_variables(...)` ==> the sum applied to all variables + - `#bytes` ==> the number of bytes + - `#variables - entry values ...` ==> the number of variables excluding + the entry values etc. + EXIT STATUS ----------- diff --git a/llvm/docs/CommandGuide/llvm-objcopy.rst b/llvm/docs/CommandGuide/llvm-objcopy.rst index 14e08d7641d54..c5cff2ee2563d 100644 --- a/llvm/docs/CommandGuide/llvm-objcopy.rst +++ b/llvm/docs/CommandGuide/llvm-objcopy.rst @@ -417,7 +417,8 @@ them. Write the output as the specified format. See `SUPPORTED FORMATS`_ for a list of valid ```` values. If unspecified, the output format is assumed to - be the same as the input file's format. + be the same as the value specified for :option:`--input-target` or the input + file's format if that option is also unspecified. .. option:: --prefix-alloc-sections diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8d86c0610a2b3..cddb5700a8399 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1056,6 +1056,26 @@ Currently, only the following parameter attributes are defined: form and the known alignment of the pointer specified to the call site. If the alignment is not specified, then the code generator makes a target-specific assumption. +``preallocated()`` + This indicates that the pointer parameter should really be passed by + value to the function, and that the pointer parameter's pointee has + already been initialized before the call instruction. This attribute + is only valid on LLVM pointer arguments. The argument must be the value + returned by the appropriate + :ref:`llvm.call.preallocated.arg`, although is + ignored during codegen. + + Any function call with a ``preallocated`` attribute in any parameter + must have a ``"preallocated"`` operand bundle. + + The preallocated attribute requires a type argument, which must be + the same as the pointee type of the argument. + + The preallocated attribute also supports specifying an alignment with the + align attribute. It indicates the alignment of the stack slot to + form and the known alignment of the pointer specified to the call + site. If the alignment is not specified, then the code generator + makes a target-specific assumption. .. _attr_inalloca: @@ -1953,6 +1973,12 @@ attributes are supported: := optional, custom name of the vector function +``preallocated()`` + This attribute is required on calls to ``llvm.call.preallocated.arg`` + and cannot be used on any other call. See + :ref:`llvm.call.preallocated.arg` for more + details. + .. _glattrs: Global Attributes @@ -2165,6 +2191,33 @@ benefits: simplifies and improves heuristics, e.g., for use "use-sensitive" optimizations. +.. _ob_preallocated: + +Preallocated Operand Bundles +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Preallocated operand bundles are characterized by the ``"preallocated"`` +operand bundle tag. These operand bundles allow separation of the allocation +of the call argument memory from the call site. This is necessary to pass +non-trivially copyable objects by value in a way that is compatible with MSVC +on some targets. There can be at most one ``"preallocated"`` operand bundle +attached to a call site and it must have exactly one bundle operand, which is +a token generated by ``@llvm.call.preallocated.setup``. A call with this +operand bundle should not adjust the stack before entering the function, as +that will have been done by one of the ``@llvm.call.preallocated.*`` intrinsics. + +.. code-block:: llvm + + %foo = type { i64, i32 } + + ... + + %t = call token @llvm.call.preallocated.setup(i32 1) + %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%foo) + %b = bitcast i8* %a to %foo* + ; initialize %b + call void @bar(i32 42, %foo* preallocated(%foo) %b) ["preallocated"(token %t)] + .. _moduleasm: Module-Level Inline Assembly @@ -11874,6 +11927,90 @@ call a helper function, read from an alternate memory space, or perform other operations necessary to locate the TLS area. Not all targets support this intrinsic. +'``llvm.call.preallocated.setup``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare token @llvm.call.preallocated.setup(i32 %num_args) + +Overview: +""""""""" + +The '``llvm.call.preallocated.setup``' intrinsic returns a token which can +be used with a call's ``"preallocated"`` operand bundle to indicate that +certain arguments are allocated and initialized before the call. + +Semantics: +"""""""""" + +The '``llvm.call.preallocated.setup``' intrinsic returns a token which is +associated with at most one call. The token can be passed to +'``@llvm.call.preallocated.arg``' to get a pointer to get that +corresponding argument. The token must be the parameter to a +``"preallocated"`` operand bundle for the corresponding call. + +Nested calls to '``llvm.call.preallocated.setup``' are allowed, but must +be properly nested. e.g. + +:: code-block:: llvm + + %t1 = call token @llvm.call.preallocated.setup(i32 0) + %t2 = call token @llvm.call.preallocated.setup(i32 0) + call void foo() ["preallocated"(token %t2)] + call void foo() ["preallocated"(token %t1)] + +is allowed, but not + +:: code-block:: llvm + + %t1 = call token @llvm.call.preallocated.setup(i32 0) + %t2 = call token @llvm.call.preallocated.setup(i32 0) + call void foo() ["preallocated"(token %t1)] + call void foo() ["preallocated"(token %t2)] + +.. _int_call_preallocated_arg: + +'``llvm.call.preallocated.arg``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.call.preallocated.arg(token %setup_token, i32 %arg_index) + +Overview: +""""""""" + +The '``llvm.call.preallocated.arg``' intrinsic returns a pointer to the +corresponding preallocated argument for the preallocated call. + +Semantics: +"""""""""" + +The '``llvm.call.preallocated.arg``' intrinsic returns a pointer to the +``%arg_index``th argument with the ``preallocated`` attribute for +the call associated with the ``%setup_token``, which must be from +'``llvm.call.preallocated.setup``'. + +A call to '``llvm.call.preallocated.arg``' must have a call site +``preallocated`` attribute. The type of the ``preallocated`` attribute must +match the type used by the ``preallocated`` attribute of the corresponding +argument at the preallocated call. The type is used in the case that an +``llvm.call.preallocated.setup`` does not have a corresponding call (e.g. due +to DCE), where otherwise we cannot know how large the arguments are. + +It is undefined behavior if this is called with a token from an +'``llvm.call.preallocated.setup``' if another +'``llvm.call.preallocated.setup``' has already been called or if the +preallocated call corresponding to the '``llvm.call.preallocated.setup``' +has already been called. + Standard C Library Intrinsics ----------------------------- diff --git a/llvm/examples/OrcV2Examples/LLJITWithGDBRegistrationListener/LLJITWithGDBRegistrationListener.cpp b/llvm/examples/OrcV2Examples/LLJITWithGDBRegistrationListener/LLJITWithGDBRegistrationListener.cpp index 5cf7cd00ffc53..493d1214c6025 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithGDBRegistrationListener/LLJITWithGDBRegistrationListener.cpp +++ b/llvm/examples/OrcV2Examples/LLJITWithGDBRegistrationListener/LLJITWithGDBRegistrationListener.cpp @@ -69,8 +69,14 @@ int main(int argc, char *argv[]) { auto ObjLinkingLayer = std::make_unique( ES, std::move(GetMemMgr)); + + // Register the event listener. ObjLinkingLayer->registerJITEventListener( *JITEventListener::createGDBRegistrationListener()); + + // Make sure the debug info sections aren't stripped. + ObjLinkingLayer->setProcessAllSections(true); + return ObjLinkingLayer; }) .create()); diff --git a/llvm/examples/OrcV2Examples/LLJITWithLazyReexports/LLJITWithLazyReexports.cpp b/llvm/examples/OrcV2Examples/LLJITWithLazyReexports/LLJITWithLazyReexports.cpp index b75d5610f9e64..5d4a27c432436 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithLazyReexports/LLJITWithLazyReexports.cpp +++ b/llvm/examples/OrcV2Examples/LLJITWithLazyReexports/LLJITWithLazyReexports.cpp @@ -10,7 +10,7 @@ // IR modules. We will do this in seven steps: // // 1. Create an LLJIT instance. -// 2. Install a transform so that we is being compiled. +// 2. Install a transform so that we can see what is being compiled. // 3. Create an indirect stubs manager and lazy call-through manager. // 4. Add two modules that will be conditionally compiled, plus a main module. // 5. Add lazy-rexports of the symbols in the conditionally compiled modules. diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 2d6490dcbcf1d..25802edc99828 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -3252,8 +3252,8 @@ LLVMTypeRef LLVMGetCalledFunctionType(LLVMValueRef C); * This expects an LLVMValueRef that corresponds to a llvm::CallInst or * llvm::InvokeInst. * - * @see llvm::CallInst::getCalledValue() - * @see llvm::InvokeInst::getCalledValue() + * @see llvm::CallInst::getCalledOperand() + * @see llvm::InvokeInst::getCalledOperand() */ LLVMValueRef LLVMGetCalledValue(LLVMValueRef Instr); diff --git a/llvm/include/llvm/ADT/CachedHashString.h b/llvm/include/llvm/ADT/CachedHashString.h index 80144fb87e0e4..6233d0fc08fd6 100644 --- a/llvm/include/llvm/ADT/CachedHashString.h +++ b/llvm/include/llvm/ADT/CachedHashString.h @@ -19,9 +19,8 @@ #ifndef LLVM_ADT_CACHED_HASH_STRING_H #define LLVM_ADT_CACHED_HASH_STRING_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { diff --git a/llvm/include/llvm/ADT/DenseSet.h b/llvm/include/llvm/ADT/DenseSet.h index 9afb715ae1db5..07edc3d8e4ec7 100644 --- a/llvm/include/llvm/ADT/DenseSet.h +++ b/llvm/include/llvm/ADT/DenseSet.h @@ -66,6 +66,12 @@ class DenseSetImpl { explicit DenseSetImpl(unsigned InitialReserve = 0) : TheMap(InitialReserve) {} + template + DenseSetImpl(const InputIt &I, const InputIt &E) + : DenseSetImpl(PowerOf2Ceil(std::distance(I, E))) { + insert(I, E); + } + DenseSetImpl(std::initializer_list Elems) : DenseSetImpl(PowerOf2Ceil(Elems.size())) { insert(Elems.begin(), Elems.end()); diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index d9a1d0cce2c55..71ad4fc1fe2b8 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1108,7 +1108,7 @@ class indexed_accessor_range_base { }; indexed_accessor_range_base(iterator begin, iterator end) - : base(DerivedT::offset_base(begin.getBase(), begin.getIndex())), + : base(offset_base(begin.getBase(), begin.getIndex())), count(end.getIndex() - begin.getIndex()) {} indexed_accessor_range_base(const iterator_range &range) : indexed_accessor_range_base(range.begin(), range.end()) {} @@ -1141,7 +1141,7 @@ class indexed_accessor_range_base { /// Drop the first N elements, and keep M elements. DerivedT slice(size_t n, size_t m) const { assert(n + m <= size() && "invalid size specifiers"); - return DerivedT(DerivedT::offset_base(base, n), m); + return DerivedT(offset_base(base, n), m); } /// Drop the first n elements. @@ -1174,6 +1174,15 @@ class indexed_accessor_range_base { return RangeT(iterator_range(*this)); } + /// Returns the base of this range. + const BaseT &getBase() const { return base; } + +private: + /// Offset the given base by the given amount. + static BaseT offset_base(const BaseT &base, size_t n) { + return n == 0 ? base : DerivedT::offset_base(base, n); + } + protected: indexed_accessor_range_base(const indexed_accessor_range_base &) = default; indexed_accessor_range_base(indexed_accessor_range_base &&) = default; @@ -1450,24 +1459,6 @@ inline void sort(Container &&C, Compare Comp) { // Extra additions to //===----------------------------------------------------------------------===// -/// For a container of pointers, deletes the pointers and then clears the -/// container. -template -void DeleteContainerPointers(Container &C) { - for (auto V : C) - delete V; - C.clear(); -} - -/// In a container of pairs (usually a map) whose second element is a pointer, -/// deletes the second elements and then clears the container. -template -void DeleteContainerSeconds(Container &C) { - for (auto &V : C) - delete V.second; - C.clear(); -} - /// Get the size of a range. This is a wrapper function around std::distance /// which is only enabled when the operation is O(1). template diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 5656a283681f6..6313fc563beae 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -16,10 +16,10 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemAlloc.h" #include "llvm/Support/type_traits.h" -#include "llvm/Support/ErrorHandling.h" #include #include #include @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -34,11 +35,23 @@ namespace llvm { -/// This is all the non-templated stuff common to all SmallVectors. -class SmallVectorBase { +/// This is all the stuff common to all SmallVectors. +/// +/// The template parameter specifies the type which should be used to hold the +/// Size and Capacity of the SmallVector, so it can be adjusted. +/// Using 32 bit size is desirable to shink the size of the SmallVector. +/// Using 64 bit size is desirable for cases like SmallVector, where a +/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for +/// buffering bitcode output - which can exceed 4GB. +template class SmallVectorBase { protected: void *BeginX; - unsigned Size = 0, Capacity; + Size_T Size = 0, Capacity; + + /// The maximum value of the Size_T used. + static constexpr size_t SizeTypeMax() { + return std::numeric_limits::max(); + } SmallVectorBase() = delete; SmallVectorBase(void *FirstEl, size_t TotalCapacity) @@ -70,9 +83,14 @@ class SmallVectorBase { } }; +template +using SmallVectorSizeType = + typename std::conditional= 8, uint64_t, + uint32_t>::type; + /// Figure out the offset of the first element. template struct SmallVectorAlignmentAndSize { - AlignedCharArrayUnion Base; + AlignedCharArrayUnion>> Base; AlignedCharArrayUnion FirstEl; }; @@ -80,7 +98,10 @@ template struct SmallVectorAlignmentAndSize { /// the type T is a POD. The extra dummy template argument is used by ArrayRef /// to avoid unnecessarily requiring T to be complete. template -class SmallVectorTemplateCommon : public SmallVectorBase { +class SmallVectorTemplateCommon + : public SmallVectorBase> { + using Base = SmallVectorBase>; + /// Find the address of the first element. For this pointer math to be valid /// with small-size of 0 for T with lots of alignment, it's important that /// SmallVectorStorage is properly-aligned even for small-size of 0. @@ -92,21 +113,20 @@ class SmallVectorTemplateCommon : public SmallVectorBase { // Space after 'FirstEl' is clobbered, do not add any instance vars after it. protected: - SmallVectorTemplateCommon(size_t Size) - : SmallVectorBase(getFirstEl(), Size) {} + SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {} void grow_pod(size_t MinCapacity, size_t TSize) { - SmallVectorBase::grow_pod(getFirstEl(), MinCapacity, TSize); + Base::grow_pod(getFirstEl(), MinCapacity, TSize); } /// Return true if this is a smallvector which has not had dynamic /// memory allocated for it. - bool isSmall() const { return BeginX == getFirstEl(); } + bool isSmall() const { return this->BeginX == getFirstEl(); } /// Put this vector in a state of being small. void resetToSmall() { - BeginX = getFirstEl(); - Size = Capacity = 0; // FIXME: Setting Capacity to 0 is suspect. + this->BeginX = getFirstEl(); + this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect. } public: @@ -124,6 +144,10 @@ class SmallVectorTemplateCommon : public SmallVectorBase { using pointer = T *; using const_pointer = const T *; + using Base::capacity; + using Base::empty; + using Base::size; + // forward iterator creation methods. iterator begin() { return (iterator)this->BeginX; } const_iterator begin() const { return (const_iterator)this->BeginX; } @@ -137,7 +161,9 @@ class SmallVectorTemplateCommon : public SmallVectorBase { const_reverse_iterator rend() const { return const_reverse_iterator(begin());} size_type size_in_bytes() const { return size() * sizeof(T); } - size_type max_size() const { return size_type(-1) / sizeof(T); } + size_type max_size() const { + return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T)); + } size_t capacity_in_bytes() const { return capacity() * sizeof(T); } @@ -174,9 +200,17 @@ class SmallVectorTemplateCommon : public SmallVectorBase { } }; -/// SmallVectorTemplateBase - This is where we put method -/// implementations that are designed to work with non-POD-like T's. -template ::value> +/// SmallVectorTemplateBase - This is where we put +/// method implementations that are designed to work with non-trivial T's. +/// +/// We approximate is_trivially_copyable with trivial move/copy construction and +/// trivial destruction. While the standard doesn't specify that you're allowed +/// copy these types with memcpy, there is no way for the type to observe this. +/// This catches the important case of std::pair, which is not +/// trivially assignable. +template ::value) && + (is_trivially_move_constructible::value) && + std::is_trivially_destructible::value> class SmallVectorTemplateBase : public SmallVectorTemplateCommon { protected: SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} @@ -232,18 +266,21 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon { // Define this out-of-line to dissuade the C++ compiler from inlining it. template void SmallVectorTemplateBase::grow(size_t MinSize) { - if (MinSize > UINT32_MAX) + // Ensure we can fit the new capacity. + // This is only going to be applicable when the capacity is 32 bit. + if (MinSize > this->SizeTypeMax()) report_bad_alloc_error("SmallVector capacity overflow during allocation"); // Ensure we can meet the guarantee of space for at least one more element. // The above check alone will not catch the case where grow is called with a // default MinCapacity of 0, but the current capacity cannot be increased. - if (this->capacity() == size_t(UINT32_MAX)) + // This is only going to be applicable when the capacity is 32 bit. + if (this->capacity() == this->SizeTypeMax()) report_bad_alloc_error("SmallVector capacity unable to grow"); // Always grow, even from zero. size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2)); - NewCapacity = std::min(std::max(NewCapacity, MinSize), size_t(UINT32_MAX)); + NewCapacity = std::min(std::max(NewCapacity, MinSize), this->SizeTypeMax()); T *NewElts = static_cast(llvm::safe_malloc(NewCapacity*sizeof(T))); // Move the elements over. @@ -261,7 +298,9 @@ void SmallVectorTemplateBase::grow(size_t MinSize) { } /// SmallVectorTemplateBase - This is where we put -/// method implementations that are designed to work with POD-like T's. +/// method implementations that are designed to work with trivially copyable +/// T's. This allows using memcpy in place of copy/move construction and +/// skipping destruction. template class SmallVectorTemplateBase : public SmallVectorTemplateCommon { protected: diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h index 3f73c0f3d456c..56d5f3d05857f 100644 --- a/llvm/include/llvm/ADT/StringExtras.h +++ b/llvm/include/llvm/ADT/StringExtras.h @@ -107,6 +107,14 @@ inline bool isPrint(char C) { return (0x20 <= UC) && (UC <= 0x7E); } +/// Checks whether character \p C is whitespace in the "C" locale. +/// +/// Locale-independent version of the C standard library isspace. +inline bool isSpace(char C) { + return C == ' ' || C == '\f' || C == '\n' || C == '\r' || C == '\t' || + C == '\v'; +} + /// Returns the corresponding lowercase character if \p x is uppercase. inline char toLower(char x) { if (x >= 'A' && x <= 'Z') diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h index 19638f35d3e66..ea3aad6f1cb1b 100644 --- a/llvm/include/llvm/ADT/StringMapEntry.h +++ b/llvm/include/llvm/ADT/StringMapEntry.h @@ -125,7 +125,8 @@ class StringMapEntry final : public StringMapEntryStorage { // Free memory referenced by the item. size_t AllocSize = sizeof(StringMapEntry) + this->getKeyLength() + 1; this->~StringMapEntry(); - allocator.Deallocate(static_cast(this), AllocSize); + allocator.Deallocate(static_cast(this), AllocSize, + alignof(StringMapEntry)); } }; diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index 8a3cfad13cb6c..4c538d82b8dad 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -55,8 +55,9 @@ class BranchProbabilityInfo { BranchProbabilityInfo() = default; BranchProbabilityInfo(const Function &F, const LoopInfo &LI, - const TargetLibraryInfo *TLI = nullptr) { - calculate(F, LI, TLI); + const TargetLibraryInfo *TLI = nullptr, + PostDominatorTree *PDT = nullptr) { + calculate(F, LI, TLI, PDT); } BranchProbabilityInfo(BranchProbabilityInfo &&Arg) @@ -135,7 +136,7 @@ class BranchProbabilityInfo { } void calculate(const Function &F, const LoopInfo &LI, - const TargetLibraryInfo *TLI = nullptr); + const TargetLibraryInfo *TLI, PostDominatorTree *PDT); /// Forget analysis results for the given basic block. void eraseBlock(const BasicBlock *BB); diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h index 1d1f3f4cc5c0b..eb72f2c5d14dd 100644 --- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h +++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h @@ -18,7 +18,7 @@ namespace llvm { // Visitor class that finds all indirect call. struct PGOIndirectCallVisitor : public InstVisitor { - std::vector IndirectCalls; + std::vector IndirectCalls; PGOIndirectCallVisitor() {} void visitCallBase(CallBase &Call) { @@ -28,7 +28,7 @@ struct PGOIndirectCallVisitor : public InstVisitor { }; // Helper function that finds all indirect call sites. -inline std::vector findIndirectCalls(Function &F) { +inline std::vector findIndirectCalls(Function &F) { PGOIndirectCallVisitor ICV; ICV.visit(F); return ICV.IndirectCalls; diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index df3bc1b87921e..0b37a99a56577 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -246,6 +246,20 @@ Optional getAttributeBasedInliningDecision( CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref GetTLI); +/// Get the cost estimate ignoring thresholds. This is similar to getInlineCost +/// when passed InlineParams::ComputeFullInlineCost, or a non-null ORE. It +/// uses default InlineParams otherwise. +/// Contrary to getInlineCost, which makes a threshold-based final evaluation of +/// should/shouldn't inline, captured in InlineResult, getInliningCostEstimate +/// returns: +/// - None, if the inlining cannot happen (is illegal) +/// - an integer, representing the cost. +Optional getInliningCostEstimate( + CallBase &Call, TargetTransformInfo &CalleeTTI, + std::function &GetAssumptionCache, + Optional> GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE); + /// Minimal filter to detect invalid constructs for inlining. InlineResult isInlineViable(Function &Callee); } // namespace llvm diff --git a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h index cae0778cd16d8..f4249f74104cf 100644 --- a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h @@ -63,7 +63,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass { BranchProbabilityInfo &getCalculated() { if (!Calculated) { assert(F && LI && "call setAnalysis"); - BPI.calculate(*F, *LI, TLI); + BPI.calculate(*F, *LI, TLI, nullptr); Calculated = true; } return BPI; diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index dc950a9a4ea4e..26ddf92650d2f 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -324,9 +324,45 @@ class MemoryDepChecker { void mergeInStatus(VectorizationSafetyStatus S); }; +class RuntimePointerChecking; +/// A grouping of pointers. A single memcheck is required between +/// two groups. +struct RuntimeCheckingPtrGroup { + /// Create a new pointer checking group containing a single + /// pointer, with index \p Index in RtCheck. + RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck); + + /// Tries to add the pointer recorded in RtCheck at index + /// \p Index to this pointer checking group. We can only add a pointer + /// to a checking group if we will still be able to get + /// the upper and lower bounds of the check. Returns true in case + /// of success, false otherwise. + bool addPointer(unsigned Index); + + /// Constitutes the context of this pointer checking group. For each + /// pointer that is a member of this group we will retain the index + /// at which it appears in RtCheck. + RuntimePointerChecking &RtCheck; + /// The SCEV expression which represents the upper bound of all the + /// pointers in this group. + const SCEV *High; + /// The SCEV expression which represents the lower bound of all the + /// pointers in this group. + const SCEV *Low; + /// Indices of all the pointers that constitute this grouping. + SmallVector Members; +}; + +/// A memcheck which made up of a pair of grouped pointers. +typedef std::pair + RuntimePointerCheck; + /// Holds information about the memory runtime legality checks to verify /// that a group of pointers do not overlap. class RuntimePointerChecking { + friend struct RuntimeCheckingPtrGroup; + public: struct PointerInfo { /// Holds the pointer value that we need to check. @@ -376,59 +412,20 @@ class RuntimePointerChecking { /// No run-time memory checking is necessary. bool empty() const { return Pointers.empty(); } - /// A grouping of pointers. A single memcheck is required between - /// two groups. - struct CheckingPtrGroup { - /// Create a new pointer checking group containing a single - /// pointer, with index \p Index in RtCheck. - CheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck) - : RtCheck(RtCheck), High(RtCheck.Pointers[Index].End), - Low(RtCheck.Pointers[Index].Start) { - Members.push_back(Index); - } - - /// Tries to add the pointer recorded in RtCheck at index - /// \p Index to this pointer checking group. We can only add a pointer - /// to a checking group if we will still be able to get - /// the upper and lower bounds of the check. Returns true in case - /// of success, false otherwise. - bool addPointer(unsigned Index); - - /// Constitutes the context of this pointer checking group. For each - /// pointer that is a member of this group we will retain the index - /// at which it appears in RtCheck. - RuntimePointerChecking &RtCheck; - /// The SCEV expression which represents the upper bound of all the - /// pointers in this group. - const SCEV *High; - /// The SCEV expression which represents the lower bound of all the - /// pointers in this group. - const SCEV *Low; - /// Indices of all the pointers that constitute this grouping. - SmallVector Members; - }; - - /// A memcheck which made up of a pair of grouped pointers. - /// - /// These *have* to be const for now, since checks are generated from - /// CheckingPtrGroups in LAI::addRuntimeChecks which is a const member - /// function. FIXME: once check-generation is moved inside this class (after - /// the PtrPartition hack is removed), we could drop const. - typedef std::pair - PointerCheck; - /// Generate the checks and store it. This also performs the grouping /// of pointers to reduce the number of memchecks necessary. void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies); /// Returns the checks that generateChecks created. - const SmallVector &getChecks() const { return Checks; } + const SmallVector &getChecks() const { + return Checks; + } /// Decide if we need to add a check between two groups of pointers, /// according to needsChecking. - bool needsChecking(const CheckingPtrGroup &M, - const CheckingPtrGroup &N) const; + bool needsChecking(const RuntimeCheckingPtrGroup &M, + const RuntimeCheckingPtrGroup &N) const; /// Returns the number of run-time checks required according to /// needsChecking. @@ -438,7 +435,8 @@ class RuntimePointerChecking { void print(raw_ostream &OS, unsigned Depth = 0) const; /// Print \p Checks. - void printChecks(raw_ostream &OS, const SmallVectorImpl &Checks, + void printChecks(raw_ostream &OS, + const SmallVectorImpl &Checks, unsigned Depth = 0) const; /// This flag indicates if we need to add the runtime check. @@ -448,7 +446,7 @@ class RuntimePointerChecking { SmallVector Pointers; /// Holds a partitioning of pointers into "check groups". - SmallVector CheckingGroups; + SmallVector CheckingGroups; /// Check if pointers are in the same partition /// @@ -476,15 +474,14 @@ class RuntimePointerChecking { bool UseDependencies); /// Generate the checks and return them. - SmallVector - generateChecks() const; + SmallVector generateChecks() const; /// Holds a pointer to the ScalarEvolution analysis. ScalarEvolution *SE; /// Set of run-time checks required to establish independence of /// otherwise may-aliasing pointers in the loop. - SmallVector Checks; + SmallVector Checks; }; /// Drive the analysis of memory accesses in the loop @@ -557,10 +554,9 @@ class LoopAccessInfo { /// Returns a pair of instructions where the first element is the first /// instruction generated in possibly a sequence of instructions and the /// second value is the final comparator value or NULL if no check is needed. - std::pair - addRuntimeChecks(Instruction *Loc, - const SmallVectorImpl - &PointerChecks) const; + std::pair addRuntimeChecks( + Instruction *Loc, + const SmallVectorImpl &PointerChecks) const; /// The diagnostics report generated for the analysis. E.g. why we /// couldn't analyze the loop. diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index cc79b7ea776b4..24b806b2f4987 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -723,6 +723,8 @@ class MemorySSA { return cast_or_null(ValueToMemoryAccess.lookup(cast(BB))); } + DominatorTree &getDomTree() const { return *DT; } + void dump() const; void print(raw_ostream &) const; @@ -1179,9 +1181,9 @@ class upward_defs_iterator using BaseT = upward_defs_iterator::iterator_facade_base; public: - upward_defs_iterator(const MemoryAccessPair &Info) + upward_defs_iterator(const MemoryAccessPair &Info, DominatorTree *DT) : DefIterator(Info.first), Location(Info.second), - OriginalAccess(Info.first) { + OriginalAccess(Info.first), DT(DT) { CurrentPair.first = nullptr; WalkingPhi = Info.first && isa(Info.first); @@ -1220,12 +1222,13 @@ class upward_defs_iterator const_cast(Location.Ptr), OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr); if (!Translator.PHITranslateValue(OriginalAccess->getBlock(), - DefIterator.getPhiArgBlock(), nullptr, - false)) + DefIterator.getPhiArgBlock(), DT, + false)) { if (Translator.getAddr() != Location.Ptr) { CurrentPair.second = Location.getWithNewPtr(Translator.getAddr()); return; } + } } CurrentPair.second = Location; } @@ -1235,17 +1238,19 @@ class upward_defs_iterator MemoryLocation Location; MemoryAccess *OriginalAccess = nullptr; bool WalkingPhi = false; + DominatorTree *DT = nullptr; }; -inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair) { - return upward_defs_iterator(Pair); +inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair, + DominatorTree &DT) { + return upward_defs_iterator(Pair, &DT); } inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); } inline iterator_range -upward_defs(const MemoryAccessPair &Pair) { - return make_range(upward_defs_begin(Pair), upward_defs_end()); +upward_defs(const MemoryAccessPair &Pair, DominatorTree &DT) { + return make_range(upward_defs_begin(Pair, DT), upward_defs_end()); } /// Walks the defining accesses of MemoryDefs. Stops after we hit something that diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h index 16da0c0a46503..181fdacad2334 100644 --- a/llvm/include/llvm/Analysis/MustExecute.h +++ b/llvm/include/llvm/Analysis/MustExecute.h @@ -416,11 +416,6 @@ struct MustBeExecutedContextExplorer { ExploreCFGBackward(ExploreCFGBackward), LIGetter(LIGetter), DTGetter(DTGetter), PDTGetter(PDTGetter), EndIterator(*this, nullptr) {} - /// Clean up the dynamically allocated iterators. - ~MustBeExecutedContextExplorer() { - DeleteContainerSeconds(InstructionIteratorMap); - } - /// Iterator-based interface. \see MustBeExecutedIterator. ///{ using iterator = MustBeExecutedIterator; @@ -428,15 +423,15 @@ struct MustBeExecutedContextExplorer { /// Return an iterator to explore the context around \p PP. iterator &begin(const Instruction *PP) { - auto *&It = InstructionIteratorMap[PP]; + auto &It = InstructionIteratorMap[PP]; if (!It) - It = new iterator(*this, PP); + It.reset(new iterator(*this, PP)); return *It; } /// Return an iterator to explore the cached context around \p PP. const_iterator &begin(const Instruction *PP) const { - return *InstructionIteratorMap.lookup(PP); + return *InstructionIteratorMap.find(PP)->second; } /// Return an universal end iterator. @@ -544,7 +539,7 @@ struct MustBeExecutedContextExplorer { DenseMap> IrreducibleControlMap; /// Map from instructions to associated must be executed iterators. - DenseMap + DenseMap> InstructionIteratorMap; /// A unique end iterator. diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h index e961f6ba8eaf2..3edcc9894cf75 100644 --- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h +++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -151,9 +151,7 @@ inline bool IsPotentialRetainableObjPtr(const Value *Op) { return false; // Special arguments can not be a valid retainable object pointer. if (const Argument *Arg = dyn_cast(Op)) - if (Arg->hasByValAttr() || - Arg->hasInAllocaAttr() || - Arg->hasNestAttr() || + if (Arg->hasPassPointeeByValueAttr() || Arg->hasNestAttr() || Arg->hasStructRetAttr()) return false; // Only consider values with pointer types. diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index 50057a26d9064..e293d069f1f1b 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -25,7 +25,6 @@ namespace llvm { class BasicBlock; class BlockFrequencyInfo; class CallBase; -class Instruction; class ProfileSummary; /// Analysis providing profile information. /// @@ -73,6 +72,13 @@ class ProfileSummaryInfo { Summary->getKind() == ProfileSummary::PSK_Sample; } + /// Returns true if module \c M has partial-profile sample profile. + bool hasPartialSampleProfile() { + return hasProfileSummary() && + Summary->getKind() == ProfileSummary::PSK_Sample && + Summary->isPartialProfile(); + } + /// Returns true if module \c M has instrumentation profile. bool hasInstrumentationProfile() { return hasProfileSummary() && @@ -97,7 +103,7 @@ class ProfileSummaryInfo { } /// Returns the profile count for \p CallInst. - Optional getProfileCount(const Instruction *CallInst, + Optional getProfileCount(const CallBase &CallInst, BlockFrequencyInfo *BFI, bool AllowSynthetic = false); /// Returns true if the working set size of the code is considered huge. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 07e34821c76f8..f3e57567b6bd6 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -105,6 +105,9 @@ struct HardwareLoopInfo { bool canAnalyze(LoopInfo &LI); }; +class TargetTransformInfo; +typedef TargetTransformInfo TTI; + /// This pass provides access to the codegen interfaces that are needed /// for IR-level transformations. class TargetTransformInfo { @@ -153,7 +156,8 @@ class TargetTransformInfo { enum TargetCostKind { TCK_RecipThroughput, ///< Reciprocal throughput. TCK_Latency, ///< The latency of instruction. - TCK_CodeSize ///< Instruction code size. + TCK_CodeSize, ///< Instruction code size. + TCK_SizeAndLatency ///< The weighted sum of size and latency. }; /// Query the cost of a specified instruction. @@ -172,7 +176,8 @@ class TargetTransformInfo { return getInstructionLatency(I); case TCK_CodeSize: - return getUserCost(I); + case TCK_SizeAndLatency: + return getUserCost(I, kind); } llvm_unreachable("Unknown instruction cost kind"); } @@ -203,7 +208,8 @@ class TargetTransformInfo { /// Estimate the cost of a GEP operation when lowered. int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) const; + ArrayRef Operands, + TargetCostKind CostKind = TCK_SizeAndLatency) const; /// Estimate the cost of a EXT operation when lowered. int getExtCost(const Instruction *I, const Value *Src) const; @@ -231,12 +237,14 @@ class TargetTransformInfo { /// Estimate the cost of an intrinsic when lowered. int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef ParamTys, - const User *U = nullptr) const; + const User *U = nullptr, + TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const; /// Estimate the cost of an intrinsic when lowered. int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments, - const User *U = nullptr) const; + const User *U = nullptr, + TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const; /// \return the expected cost of a memcpy, which could e.g. depend on the /// source/destination type and alignment and the number of bytes copied. @@ -263,14 +271,15 @@ class TargetTransformInfo { /// /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. - int getUserCost(const User *U, ArrayRef Operands) const; + int getUserCost(const User *U, ArrayRef Operands, + TargetCostKind CostKind) const; /// This is a helper function which calls the two-argument getUserCost /// with \p Operands which are the current operands U has. - int getUserCost(const User *U) const { + int getUserCost(const User *U, TargetCostKind CostKind) const { SmallVector Operands(U->value_op_begin(), U->value_op_end()); - return getUserCost(U, Operands); + return getUserCost(U, Operands, CostKind); } /// Return true if branch divergence exists. @@ -608,8 +617,15 @@ class TargetTransformInfo { /// should use coldcc calling convention. bool useColdCCForColdCall(Function &F) const; - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the demanded result elements need to be inserted and/or + /// extracted from vectors. + unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, + bool Insert, bool Extract) const; + /// Estimate the overhead of scalarizing an instructions unique + /// non-constant operands. The types of the arguments are ordinarily + /// scalar, in which case the costs are multiplied with VF. unsigned getOperandsScalarizationOverhead(ArrayRef Args, unsigned VF) const; @@ -692,15 +708,15 @@ class TargetTransformInfo { /// Return the expected cost of materializing for the given integer /// immediate of the specified type. - int getIntImmCost(const APInt &Imm, Type *Ty) const; + int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const; /// Return the expected cost of materialization for the given integer /// immediate of the specified type for a given instruction. The cost can be /// zero if the immediate can be folded into the specified instruction. int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty) const; + Type *Ty, TargetCostKind CostKind) const; int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) const; + Type *Ty, TargetCostKind CostKind) const; /// Return the expected cost for the given integer when optimising /// for size. This is different than the other integer immediate cost @@ -866,7 +882,9 @@ class TargetTransformInfo { /// \p CxtI is the optional original context instruction, if one exists, to /// provide even more information. int getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, + unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + OperandValueKind Opd1Info = OK_AnyValue, OperandValueKind Opd2Info = OK_AnyValue, OperandValueProperties Opd1PropInfo = OP_None, OperandValueProperties Opd2PropInfo = OP_None, @@ -885,6 +903,7 @@ class TargetTransformInfo { /// zext, etc. If there is an existing instruction that holds Opcode, it /// may be passed in the 'I' parameter. int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, const Instruction *I = nullptr) const; /// \return The expected cost of a sign- or zero-extended vector extract. Use @@ -894,12 +913,14 @@ class TargetTransformInfo { /// \return The expected cost of control-flow related instructions such as /// Phi, Ret, Br. - int getCFInstrCost(unsigned Opcode) const; + int getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; /// \returns The expected cost of compare and select instructions. If there /// is an existing instruction that holds Opcode, it may be passed in the /// 'I' parameter. int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr) const; /// \return The expected cost of vector Insert and Extract. @@ -909,11 +930,13 @@ class TargetTransformInfo { /// \return The cost of Load and Store instructions. int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr) const; /// \return The cost of masked Load and Store instructions. - int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const; + int getMaskedMemoryOpCost( + unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; /// \return The cost of Gather or Scatter operation /// \p Opcode - is a type of memory access Load or Store @@ -924,9 +947,10 @@ class TargetTransformInfo { /// \p Alignment - alignment of single element /// \p I - the optional original context instruction, if one exists, e.g. the /// load/store to transform or the call to the gather/scatter intrinsic - int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, - bool VariableMask, unsigned Alignment, - const Instruction *I = nullptr) const; + int getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \return The cost of the interleaved memory operation. /// \p Opcode is the memory operation code @@ -938,11 +962,11 @@ class TargetTransformInfo { /// \p AddressSpace is address space of the pointer. /// \p UseMaskForCond indicates if the memory access is predicated. /// \p UseMaskForGaps indicates if gaps should be masked. - int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, - ArrayRef Indices, unsigned Alignment, - unsigned AddressSpace, - bool UseMaskForCond = false, - bool UseMaskForGaps = false) const; + int getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, + unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + bool UseMaskForCond = false, bool UseMaskForGaps = false) const; /// Calculate the cost of performing a vector reduction. /// @@ -957,33 +981,39 @@ class TargetTransformInfo { /// Split: /// (v0, v1, v2, v3) /// ((v0+v2), (v1+v3), undef, undef) - int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm) const; - int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, bool IsUnsigned) const; + int getArithmeticReductionCost( + unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + + int getMinMaxReductionCost( + VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; /// \returns The cost of Intrinsic instructions. Analyses the real arguments. /// Three cases are handled: 1. scalar instruction 2. vector instruction /// 3. scalar instruction which is to be vectorized with VF. /// I is the optional original context instruction holding the call to the /// intrinsic - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, - const Instruction *I = nullptr) const; + int getIntrinsicInstrCost( + Intrinsic::ID ID, Type *RetTy, ArrayRef Args, + FastMathFlags FMF, unsigned VF = 1, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \returns The cost of Intrinsic instructions. Types analysis only. /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the /// arguments and the return value will be computed based on types. /// I is the optional original context instruction holding the call to the /// intrinsic - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, - FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - const Instruction *I = nullptr) const; + int getIntrinsicInstrCost( + Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \returns The cost of Call instructions. - int getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys) const; + int getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; /// \returns The number of pieces into which the provided type must be /// split during legalization. Zero is returned when the answer is unknown. @@ -1156,21 +1186,25 @@ class TargetTransformInfo::Concept { virtual ~Concept() = 0; virtual const DataLayout &getDataLayout() const = 0; virtual int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) = 0; + ArrayRef Operands, + TTI::TargetCostKind CostKind) = 0; virtual int getExtCost(const Instruction *I, const Value *Src) = 0; virtual unsigned getInliningThresholdMultiplier() = 0; virtual int getInlinerVectorBonusPercent() = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U) = 0; + ArrayRef ParamTys, const User *U, + enum TargetCostKind CostKind) = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments, - const User *U) = 0; + const User *U, + enum TargetCostKind CostKind) = 0; virtual int getMemcpyCost(const Instruction *I) = 0; virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) = 0; - virtual int getUserCost(const User *U, ArrayRef Operands) = 0; + virtual int getUserCost(const User *U, ArrayRef Operands, + TargetCostKind CostKind) = 0; virtual bool hasBranchDivergence() = 0; virtual bool useGPUDivergenceAnalysis() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; @@ -1227,8 +1261,9 @@ class TargetTransformInfo::Concept { virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; virtual bool useColdCCForColdCall(Function &F) = 0; - virtual unsigned getScalarizationOverhead(Type *Ty, bool Insert, - bool Extract) = 0; + virtual unsigned getScalarizationOverhead(VectorType *Ty, + const APInt &DemandedElts, + bool Insert, bool Extract) = 0; virtual unsigned getOperandsScalarizationOverhead(ArrayRef Args, unsigned VF) = 0; @@ -1250,11 +1285,13 @@ class TargetTransformInfo::Concept { virtual int getFPOpCost(Type *Ty) = 0; virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) = 0; - virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0; + virtual int getIntImmCost(const APInt &Imm, Type *Ty, + TargetCostKind CostKind) = 0; virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty) = 0; + Type *Ty, TargetCostKind CostKind) = 0; virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) = 0; + const APInt &Imm, Type *Ty, + TargetCostKind CostKind) = 0; virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0; @@ -1295,47 +1332,65 @@ class TargetTransformInfo::Concept { virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, + OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI = nullptr) = 0; virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, VectorType *SubTp) = 0; virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) = 0; - virtual int getCFInstrCost(unsigned Opcode) = 0; + virtual int getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) = 0; virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) = 0; virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I) = 0; + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) = 0; virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) = 0; - virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, - bool VariableMask, unsigned Alignment, - const Instruction *I = nullptr) = 0; + unsigned AddressSpace, + TTI::TargetCostKind CostKind) = 0; + virtual int getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) = 0; + virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, - unsigned AddressSpace, bool UseMaskForCond = false, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm) = 0; + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) = 0; virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, bool IsUnsigned) = 0; + bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) = 0; + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I) = 0; virtual int getCallInstrCost(Function *F, Type *RetTy, - ArrayRef Tys) = 0; + ArrayRef Tys, + TTI::TargetCostKind CostKind) = 0; virtual unsigned getNumberOfParts(Type *Tp) = 0; virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0; @@ -1397,7 +1452,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { } int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) override { + ArrayRef Operands, + enum TargetTransformInfo::TargetCostKind CostKind) override { return Impl.getGEPCost(PointeeType, Ptr, Operands); } int getExtCost(const Instruction *I, const Value *Src) override { @@ -1411,19 +1467,22 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { } int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef ParamTys, - const User *U = nullptr) override { - return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U); + const User *U = nullptr, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override { + return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind); } int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments, - const User *U = nullptr) override { - return Impl.getIntrinsicCost(IID, RetTy, Arguments, U); + const User *U = nullptr, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override { + return Impl.getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); } int getMemcpyCost(const Instruction *I) override { return Impl.getMemcpyCost(I); } - int getUserCost(const User *U, ArrayRef Operands) override { - return Impl.getUserCost(U, Operands); + int getUserCost(const User *U, ArrayRef Operands, + TargetCostKind CostKind) override { + return Impl.getUserCost(U, Operands, CostKind); } bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } bool useGPUDivergenceAnalysis() override { @@ -1551,9 +1610,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.useColdCCForColdCall(F); } - unsigned getScalarizationOverhead(Type *Ty, bool Insert, - bool Extract) override { - return Impl.getScalarizationOverhead(Ty, Insert, Extract); + unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, + bool Insert, bool Extract) override { + return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); } unsigned getOperandsScalarizationOverhead(ArrayRef Args, unsigned VF) override { @@ -1601,16 +1660,17 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { Type *Ty) override { return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); } - int getIntImmCost(const APInt &Imm, Type *Ty) override { - return Impl.getIntImmCost(Imm, Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, + TargetCostKind CostKind) override { + return Impl.getIntImmCost(Imm, Ty, CostKind); } int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty) override { - return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty); + Type *Ty, TargetCostKind CostKind) override { + return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind); } int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) override { - return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty); + Type *Ty, TargetCostKind CostKind) override { + return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); } unsigned getNumberOfRegisters(unsigned ClassID) const override { return Impl.getNumberOfRegisters(ClassID); @@ -1686,13 +1746,14 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI = nullptr) override { - return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); } int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, @@ -1700,67 +1761,84 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.getShuffleCost(Kind, Tp, Index, SubTp); } int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) override { - return Impl.getCastInstrCost(Opcode, Dst, Src, I); + return Impl.getCastInstrCost(Opcode, Dst, Src, CostKind, I); } int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) override { return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); } - int getCFInstrCost(unsigned Opcode) override { - return Impl.getCFInstrCost(Opcode); + int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override { + return Impl.getCFInstrCost(Opcode, CostKind); } int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) override { - return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); } int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); } int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I) override { - return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + unsigned AddressSpace, TTI::TargetCostKind CostKind, + const Instruction *I) override { + return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, I); } int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) override { - return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); - } - int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, - bool VariableMask, unsigned Alignment, - const Instruction *I = nullptr) override { + unsigned AddressSpace, + TTI::TargetCostKind CostKind) override { + return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); + } + int getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) override { return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment, I); + Alignment, CostKind, I); } int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, - unsigned AddressSpace, bool UseMaskForCond, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + bool UseMaskForCond, bool UseMaskForGaps) override { return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm) override { - return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) override { + return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, + CostKind); } int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, bool IsUnsigned) override { - return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); + bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind) override { + return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, + CostKind); } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) override { return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } int getCallInstrCost(Function *F, Type *RetTy, - ArrayRef Tys) override { - return Impl.getCallInstrCost(F, RetTy, Tys); + ArrayRef Tys, + TTI::TargetCostKind CostKind) override { + return Impl.getCallInstrCost(F, RetTy, Tys, CostKind); } unsigned getNumberOfParts(Type *Tp) override { return Impl.getNumberOfParts(Tp); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 773608fa84c80..529cdbcb20dd0 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -44,7 +44,8 @@ class TargetTransformInfoImplBase { const DataLayout &getDataLayout() const { return DL; } int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) { + ArrayRef Operands, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -239,7 +240,8 @@ class TargetTransformInfoImplBase { bool useColdCCForColdCall(Function &F) { return false; } - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, + bool Insert, bool Extract) { return 0; } @@ -284,15 +286,19 @@ class TargetTransformInfoImplBase { return 0; } - unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } + unsigned getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { + return TTI::TCC_Basic; + } unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) { + Type *Ty, TTI::TargetCostKind CostKind) { return TTI::TCC_Free; } unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { return TTI::TCC_Free; } @@ -365,6 +371,7 @@ class TargetTransformInfoImplBase { unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -380,6 +387,7 @@ class TargetTransformInfoImplBase { } unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { switch (Opcode) { default: @@ -418,10 +426,12 @@ class TargetTransformInfoImplBase { return 1; } - unsigned getCFInstrCost(unsigned Opcode) { return 1; } + unsigned getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) { return 1; } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - const Instruction *I) { + TTI::TargetCostKind CostKind, + const Instruction *I) const { return 1; } @@ -430,18 +440,21 @@ class TargetTransformInfoImplBase { } unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I) { + unsigned AddressSpace, TTI::TargetCostKind CostKind, + const Instruction *I) const { return 1; } unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, + TTI::TargetCostKind CostKind) { return 1; } - unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, - bool VariableMask, unsigned Alignment, - const Instruction *I = nullptr) { + unsigned getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) { return 1; } @@ -449,24 +462,28 @@ class TargetTransformInfoImplBase { unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, - bool UseMaskForCond = false, - bool UseMaskForGaps = false) { + TTI::TargetCostKind CostKind, + bool UseMaskForCond, + bool UseMaskForGaps) { return 1; } unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { return 1; } unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) { + unsigned VF, TTI::TargetCostKind CostKind, + const Instruction *I) { return 1; } - unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys) { + unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, + TTI::TargetCostKind CostKind) { return 1; } @@ -477,9 +494,11 @@ class TargetTransformInfoImplBase { return 0; } - unsigned getArithmeticReductionCost(unsigned, VectorType *, bool) { return 1; } + unsigned getArithmeticReductionCost(unsigned, VectorType *, bool, + TTI::TargetCostKind) { return 1; } - unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool) { return 1; } + unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool, + TTI::TargetCostKind) { return 1; } unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) { return 0; } @@ -679,7 +698,8 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { using BaseT::getGEPCost; int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) { + ArrayRef Operands, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); // TODO: will remove this when pointers have an opaque type. assert(Ptr->getType()->getScalarType()->getPointerElementType() == @@ -737,7 +757,8 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U) { + ArrayRef ParamTys, const User *U, + TTI::TargetCostKind TCK_SizeAndLatency) { switch (IID) { default: // Intrinsics rarely (if ever) have normal argument setup constraints. @@ -781,7 +802,8 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, const User *U) { + ArrayRef Arguments, const User *U, + TTI::TargetCostKind CostKind) { // Delegate to the generic intrinsic handling code. This mostly provides an // opportunity for targets to (for example) special case the cost of // certain intrinsics based on constants used as arguments. @@ -789,12 +811,15 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { ParamTys.reserve(Arguments.size()); for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) ParamTys.push_back(Arguments[Idx]->getType()); - return static_cast(this)->getIntrinsicCost(IID, RetTy, ParamTys, U); + return static_cast(this)->getIntrinsicCost(IID, RetTy, ParamTys, U, + CostKind); } - unsigned getUserCost(const User *U, ArrayRef Operands) { + unsigned getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind) { auto *TargetTTI = static_cast(this); + // FIXME: Unlikely to be true for anything but CodeSize. if (const auto *CB = dyn_cast(U)) { const Function *F = CB->getCalledFunction(); if (F) { @@ -802,7 +827,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { if (Intrinsic::ID IID = F->getIntrinsicID()) { SmallVector ParamTys(FTy->param_begin(), FTy->param_end()); return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(), - ParamTys, U); + ParamTys, U, CostKind); } if (!TargetTTI->isLoweredToCall(F)) @@ -841,16 +866,17 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { case Instruction::SRem: case Instruction::UDiv: case Instruction::URem: + // FIXME: Unlikely to be true for CodeSize. return TTI::TCC_Expensive; case Instruction::IntToPtr: case Instruction::PtrToInt: case Instruction::Trunc: - if (getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free || - TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free) + if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free || + TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free) return TTI::TCC_Free; break; case Instruction::BitCast: - if (getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free) + if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free) return TTI::TCC_Free; break; case Instruction::FPExt: @@ -867,7 +893,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { int getInstructionLatency(const Instruction *I) { SmallVector Operands(I->value_op_begin(), I->value_op_end()); - if (getUserCost(I, Operands) == TTI::TCC_Free) + if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) return 0; if (isa(I)) diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 89a2ea2922090..66c68be5b6cbb 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -96,6 +96,12 @@ struct VFShape { assert(hasValidParameterList() && "Invalid parameter list"); } + // Retrieve the VFShape that can be used to map a (scalar) function to itself, + // with VF = 1. + static VFShape getScalarShape(const CallInst &CI) { + return VFShape::get(CI, /*EC*/ {1, false}, /*HasGlobalPredicate*/ false); + } + // Retrieve the basic vectorization shape of the function, where all // parameters are mapped to VFParamKind::Vector with \p EC // lanes. Specifies whether the function has a Global Predicate @@ -186,6 +192,8 @@ void getVectorVariantNames(const CallInst &CI, class VFDatabase { /// The Module of the CallInst CI. const Module *M; + /// The CallInst instance being queried for scalar to vector mappings. + const CallInst &CI; /// List of vector functions descritors associated to the call /// instruction. const SmallVector ScalarToVectorMappings; @@ -233,13 +241,16 @@ class VFDatabase { /// Constructor, requires a CallInst instance. VFDatabase(CallInst &CI) - : M(CI.getModule()), ScalarToVectorMappings(VFDatabase::getMappings(CI)) { - } + : M(CI.getModule()), CI(CI), + ScalarToVectorMappings(VFDatabase::getMappings(CI)) {} /// \defgroup VFDatabase query interface. /// /// @{ /// Retrieve the Function with VFShape \p Shape. Function *getVectorizedFunction(const VFShape &Shape) const { + if (Shape == VFShape::getScalarShape(CI)) + return CI.getCalledFunction(); + for (const auto &Info : ScalarToVectorMappings) if (Info.Shape == Shape) return M->getFunction(Info.VectorName); diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index dd743dbbbefa6..21c5bac2c500e 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -77,8 +77,8 @@ struct WasmInitExpr { union { int32_t Int32; int64_t Int64; - int32_t Float32; - int64_t Float64; + uint32_t Float32; + uint64_t Float64; uint32_t Global; } Value; }; diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h index f09938a5788f5..130e1d8262b1e 100644 --- a/llvm/include/llvm/BinaryFormat/XCOFF.h +++ b/llvm/include/llvm/BinaryFormat/XCOFF.h @@ -13,7 +13,8 @@ #ifndef LLVM_BINARYFORMAT_XCOFF_H #define LLVM_BINARYFORMAT_XCOFF_H -#include +#include +#include namespace llvm { class StringRef; @@ -21,14 +22,13 @@ class StringRef; namespace XCOFF { // Constants used in the XCOFF definition. -enum { - FileNamePadSize = 6, - NameSize = 8, - SymbolTableEntrySize = 18, - RelocationSerializationSize32 = 10 -}; -enum ReservedSectionNum { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 }; +constexpr size_t FileNamePadSize = 6; +constexpr size_t NameSize = 8; +constexpr size_t SymbolTableEntrySize = 18; +constexpr size_t RelocationSerializationSize32 = 10; + +enum ReservedSectionNum : int16_t { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 }; // x_smclas field of x_csect from system header: /usr/include/syms.h /// Storage Mapping Class definitions. @@ -60,9 +60,10 @@ enum StorageMappingClass : uint8_t { XMC_TE = 22 ///< Symbol mapped at the end of TOC }; -// Flags for defining the section type. Used for the s_flags field of -// the section header structure. Defined in the system header `scnhdr.h`. -enum SectionTypeFlags { +// Flags for defining the section type. Masks for use with the (signed, 32-bit) +// s_flags field of the section header structure, selecting for values in the +// lower 16 bits. Defined in the system header `scnhdr.h`. +enum SectionTypeFlags : int32_t { STYP_PAD = 0x0008, STYP_DWARF = 0x0010, STYP_TEXT = 0x0020, @@ -147,7 +148,10 @@ enum StorageClass : uint8_t { C_TCSYM = 134 // Reserved }; -enum SymbolType { +// Flags for defining the symbol type. Values to be encoded into the lower 3 +// bits of the (unsigned, 8-bit) x_smtyp field of csect auxiliary symbol table +// entries. Defined in the system header `syms.h`. +enum SymbolType : uint8_t { XTY_ER = 0, ///< External reference. XTY_SD = 1, ///< Csect definition for initialized storage. XTY_LD = 2, ///< Label definition. diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 2cfd66b965028..9b2a3da4c8939 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -633,6 +633,7 @@ enum AttributeKindCodes { ATTR_KIND_NOFREE = 62, ATTR_KIND_NOSYNC = 63, ATTR_KIND_SANITIZE_MEMTAG = 64, + ATTR_KIND_PREALLOCATED = 65, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 0aca5cf3844b1..140e39d26da71 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -305,12 +305,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, const User *U) { - return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U); + ArrayRef Arguments, const User *U, + TTI::TargetCostKind CostKind) { + return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U) { + ArrayRef ParamTys, const User *U, + TTI::TargetCostKind CostKind) { if (IID == Intrinsic::cttz) { if (getTLI()->isCheapToSpeculateCttz()) return TargetTransformInfo::TCC_Basic; @@ -323,7 +325,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return TargetTransformInfo::TCC_Expensive; } - return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); + return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind); } unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, @@ -548,23 +550,36 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getRegisterBitWidth(bool Vector) const { return 32; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { - auto *VTy = cast(Ty); + /// are set if the demanded result elements need to be inserted and/or + /// extracted from vectors. + unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, + bool Insert, bool Extract) { + assert(DemandedElts.getBitWidth() == Ty->getNumElements() && + "Vector size mismatch"); + unsigned Cost = 0; - for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { + for (int i = 0, e = Ty->getNumElements(); i < e; ++i) { + if (!DemandedElts[i]) + continue; if (Insert) Cost += static_cast(this)->getVectorInstrCost( - Instruction::InsertElement, VTy, i); + Instruction::InsertElement, Ty, i); if (Extract) Cost += static_cast(this)->getVectorInstrCost( - Instruction::ExtractElement, VTy, i); + Instruction::ExtractElement, Ty, i); } return Cost; } + /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead. + unsigned getScalarizationOverhead(VectorType *Ty, bool Insert, bool Extract) { + APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements()); + return static_cast(this)->getScalarizationOverhead(Ty, DemandedElts, + Insert, Extract); + } + /// Estimate the overhead of scalarizing an instructions unique /// non-constant operands. The types of the arguments are ordinarily /// scalar, in which case the costs are multiplied with VF. @@ -574,11 +589,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { SmallPtrSet UniqueOperands; for (const Value *A : Args) { if (!isa(A) && UniqueOperands.insert(A).second) { - Type *VecTy = nullptr; - if (A->getType()->isVectorTy()) { - VecTy = A->getType(); + auto *VecTy = dyn_cast(A->getType()); + if (VecTy) { // If A is a vector operand, VF should be 1 or correspond to A. - assert((VF == 1 || VF == cast(VecTy)->getNumElements()) && + assert((VF == 1 || VF == VecTy->getNumElements()) && "Vector argument does not match VF"); } else @@ -591,17 +605,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return Cost; } - unsigned getScalarizationOverhead(Type *VecTy, ArrayRef Args) { + unsigned getScalarizationOverhead(VectorType *Ty, ArrayRef Args) { unsigned Cost = 0; - auto *VecVTy = cast(VecTy); - Cost += getScalarizationOverhead(VecVTy, true, false); + Cost += getScalarizationOverhead(Ty, true, false); if (!Args.empty()) - Cost += getOperandsScalarizationOverhead(Args, VecVTy->getNumElements()); + Cost += getOperandsScalarizationOverhead(Args, Ty->getNumElements()); else // When no information on arguments is provided, we add the cost // associated with one argument as a heuristic. - Cost += getScalarizationOverhead(VecVTy, false, true); + Cost += getScalarizationOverhead(Ty, false, true); return Cost; } @@ -610,6 +623,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -646,7 +660,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (auto *VTy = dyn_cast(Ty)) { unsigned Num = VTy->getNumElements(); unsigned Cost = static_cast(this)->getArithmeticInstrCost( - Opcode, VTy->getScalarType()); + Opcode, VTy->getScalarType(), CostKind); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. return getScalarizationOverhead(VTy, Args) + Num * Cost; @@ -676,6 +690,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -723,13 +738,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { break; } + auto *SrcVTy = dyn_cast(Src); + auto *DstVTy = dyn_cast(Dst); + // If the cast is marked as legal (or promote) then assume low cost. if (SrcLT.first == DstLT.first && TLI->isOperationLegalOrPromote(ISD, DstLT.second)) return SrcLT.first; // Handle scalar conversions. - if (!Src->isVectorTy() && !Dst->isVectorTy()) { + if (!SrcVTy && !DstVTy) { // Scalar bitcasts are usually free. if (Opcode == Instruction::BitCast) return 0; @@ -744,9 +762,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } // Check vector-to-vector casts. - if (Dst->isVectorTy() && Src->isVectorTy()) { - auto *SrcVTy = cast(Src); - auto *DstVTy = cast(Dst); + if (DstVTy && SrcVTy) { // If the cast is between same-sized registers, then the check is simple. if (SrcLT.first == DstLT.first && SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { @@ -770,41 +786,48 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // of casting the original vector twice. We also need to factor in the // cost of the split itself. Count that as 1, to be consistent with // TLI->getTypeLegalizationCost(). - if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == - TargetLowering::TypeSplitVector || - TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == - TargetLowering::TypeSplitVector) && - SrcVTy->getNumElements() > 1 && DstVTy->getNumElements() > 1) { - Type *SplitDst = VectorType::get(DstVTy->getElementType(), - DstVTy->getNumElements() / 2); - Type *SplitSrc = VectorType::get(SrcVTy->getElementType(), - SrcVTy->getNumElements() / 2); + bool SplitSrc = + TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == + TargetLowering::TypeSplitVector; + bool SplitDst = + TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == + TargetLowering::TypeSplitVector; + if ((SplitSrc || SplitDst) && SrcVTy->getNumElements() > 1 && + DstVTy->getNumElements() > 1) { + Type *SplitDstTy = VectorType::get(DstVTy->getElementType(), + DstVTy->getNumElements() / 2); + Type *SplitSrcTy = VectorType::get(SrcVTy->getElementType(), + SrcVTy->getNumElements() / 2); T *TTI = static_cast(this); - return TTI->getVectorSplitCost() + - (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); + // If both types need to be split then the split is free. + unsigned SplitCost = + (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0; + return SplitCost + + (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, + CostKind, I)); } // In other cases where the source or destination are illegal, assume // the operation will get scalarized. unsigned Num = DstVTy->getNumElements(); unsigned Cost = static_cast(this)->getCastInstrCost( - Opcode, Dst->getScalarType(), Src->getScalarType(), I); + Opcode, Dst->getScalarType(), Src->getScalarType(), + CostKind, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. - return getScalarizationOverhead(Dst, true, true) + Num * Cost; + return getScalarizationOverhead(DstVTy, true, true) + Num * Cost; } // We already handled vector-to-vector and scalar-to-scalar conversions. // This // is where we handle bitcast between vectors and scalars. We need to assume // that the conversion is scalarized in one way or another. - if (Opcode == Instruction::BitCast) + if (Opcode == Instruction::BitCast) { // Illegal bitcasts are done by storing and loading from a stack slot. - return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) - : 0) + - (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) - : 0); + return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) + + (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0); + } llvm_unreachable("Unhandled cast"); } @@ -814,16 +837,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return static_cast(this)->getVectorInstrCost( Instruction::ExtractElement, VecTy, Index) + static_cast(this)->getCastInstrCost(Opcode, Dst, - VecTy->getElementType()); + VecTy->getElementType(), + TTI::TCK_RecipThroughput); } - unsigned getCFInstrCost(unsigned Opcode) { + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) { // Branches are assumed to be predicted. return 0; } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - const Instruction *I) { + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) { const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -851,7 +876,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (CondTy) CondTy = CondTy->getScalarType(); unsigned Cost = static_cast(this)->getCmpSelInstrCost( - Opcode, ValVTy->getScalarType(), CondTy, I); + Opcode, ValVTy->getScalarType(), CondTy, CostKind, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -871,6 +896,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { assert(!Src->isVoidTy() && "Invalid type"); std::pair LT = getTLI()->getTypeLegalizationCost(DL, Src); @@ -893,7 +919,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { // This is a vector load/store for some illegal type that is scalarized. // We must account for the cost of building or decomposing the vector. - Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, + Cost += getScalarizationOverhead(cast(Src), + Opcode != Instruction::Store, Opcode == Instruction::Store); } } @@ -905,6 +932,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) { auto *VT = cast(VecTy); @@ -919,10 +947,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned Cost; if (UseMaskForCond || UseMaskForGaps) Cost = static_cast(this)->getMaskedMemoryOpCost( - Opcode, VecTy, Alignment, AddressSpace); + Opcode, VecTy, Alignment, AddressSpace, CostKind); else Cost = static_cast(this)->getMemoryOpCost( - Opcode, VecTy, MaybeAlign(Alignment), AddressSpace); + Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind); // Legalize the vector type, and get the legalized and unlegalized type // sizes. @@ -1050,16 +1078,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // inside the loop. if (UseMaskForGaps) Cost += static_cast(this)->getArithmeticInstrCost( - BinaryOperator::And, MaskVT); + BinaryOperator::And, MaskVT, CostKind); return Cost; } /// Get intrinsic cost based on arguments. - unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, - const Instruction *I = nullptr) { + unsigned getIntrinsicInstrCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Args, + FastMathFlags FMF, unsigned VF = 1, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) { + unsigned RetVF = (RetTy->isVectorTy() ? cast(RetTy)->getNumElements() : 1); assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); @@ -1085,12 +1115,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (RetVF > 1 || VF > 1) { ScalarizationCost = 0; if (!RetTy->isVoidTy()) - ScalarizationCost += getScalarizationOverhead(RetTy, true, false); + ScalarizationCost += + getScalarizationOverhead(cast(RetTy), true, false); ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); } return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, - ScalarizationCost); + ScalarizationCost, CostKind, + I); } case Intrinsic::masked_scatter: { assert(VF == 1 && "Can't vectorize types here."); @@ -1099,7 +1131,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned Alignment = cast(Args[2])->getZExtValue(); return ConcreteTTI->getGatherScatterOpCost(Instruction::Store, Args[0]->getType(), Args[1], - VarMask, Alignment, I); + VarMask, Alignment, CostKind, + I); } case Intrinsic::masked_gather: { assert(VF == 1 && "Can't vectorize types here."); @@ -1107,7 +1140,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { bool VarMask = !isa(Mask); unsigned Alignment = cast(Args[1])->getZExtValue(); return ConcreteTTI->getGatherScatterOpCost( - Instruction::Load, RetTy, Args[0], VarMask, Alignment, I); + Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); } case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_mul: @@ -1122,7 +1155,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::experimental_vector_reduce_fmin: case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: - return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); + return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF, 1, + CostKind, I); case Intrinsic::fshl: case Intrinsic::fshr: { Value *X = Args[0]; @@ -1138,25 +1172,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) unsigned Cost = 0; - Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); - Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); + Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy, + CostKind); + Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, + CostKind); Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, + CostKind, OpKindX, OpKindZ, OpPropsX); Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, + CostKind, OpKindY, OpKindZ, OpPropsY); // Non-constant shift amounts requires a modulo. if (OpKindZ != TTI::OK_UniformConstantValue && OpKindZ != TTI::OK_NonUniformConstantValue) Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, + CostKind, OpKindZ, OpKindBW, OpPropsZ, OpPropsBW); // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { Type *CondTy = RetTy->getWithNewBitWidth(1); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, - CondTy, nullptr); + CondTy, CostKind); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, nullptr); + CondTy, CostKind); } return Cost; } @@ -1170,6 +1209,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getIntrinsicInstrCost( Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = std::numeric_limits::max(), + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr) { auto *ConcreteTTI = static_cast(this); auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast(Tys[0]); @@ -1182,21 +1222,19 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned ScalarizationCost = ScalarizationCostPassed; unsigned ScalarCalls = 1; Type *ScalarRetTy = RetTy; - if (RetTy->isVectorTy()) { + if (auto *RetVTy = dyn_cast(RetTy)) { if (ScalarizationCostPassed == std::numeric_limits::max()) - ScalarizationCost = getScalarizationOverhead(RetTy, true, false); - ScalarCalls = - std::max(ScalarCalls, cast(RetTy)->getNumElements()); + ScalarizationCost = getScalarizationOverhead(RetVTy, true, false); + ScalarCalls = std::max(ScalarCalls, RetVTy->getNumElements()); ScalarRetTy = RetTy->getScalarType(); } SmallVector ScalarTys; for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { Type *Ty = Tys[i]; - if (Ty->isVectorTy()) { + if (auto *VTy = dyn_cast(Ty)) { if (ScalarizationCostPassed == std::numeric_limits::max()) - ScalarizationCost += getScalarizationOverhead(Ty, false, true); - ScalarCalls = - std::max(ScalarCalls, cast(Ty)->getNumElements()); + ScalarizationCost += getScalarizationOverhead(VTy, false, true); + ScalarCalls = std::max(ScalarCalls, VTy->getNumElements()); Ty = Ty->getScalarType(); } ScalarTys.push_back(Ty); @@ -1205,7 +1243,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. unsigned ScalarCost = - ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); + ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF, + CostKind); return ScalarCalls * ScalarCost + ScalarizationCost; } @@ -1291,34 +1330,40 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return 0; case Intrinsic::masked_store: return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, - 0); + 0, CostKind); case Intrinsic::masked_load: - return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); + return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0, + CostKind); case Intrinsic::experimental_vector_reduce_add: return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, VecOpTy, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_mul: return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, VecOpTy, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_and: return ConcreteTTI->getArithmeticReductionCost(Instruction::And, VecOpTy, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_or: return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, VecOpTy, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_xor: return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, VecOpTy, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_v2_fadd: + // FIXME: Add new flag for cost of strict reductions. return ConcreteTTI->getArithmeticReductionCost( Instruction::FAdd, VecOpTy, - /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict - // reductions. + /*IsPairwiseForm=*/false, CostKind); case Intrinsic::experimental_vector_reduce_v2_fmul: + // FIXME: Add new flag for cost of strict reductions. return ConcreteTTI->getArithmeticReductionCost( Instruction::FMul, VecOpTy, - /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict - // reductions. + /*IsPairwiseForm=*/false, CostKind); case Intrinsic::experimental_vector_reduce_smax: case Intrinsic::experimental_vector_reduce_smin: case Intrinsic::experimental_vector_reduce_fmax: @@ -1326,13 +1371,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return ConcreteTTI->getMinMaxReductionCost( VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, - /*IsUnsigned=*/false); + /*IsUnsigned=*/false, CostKind); case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: return ConcreteTTI->getMinMaxReductionCost( VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, - /*IsUnsigned=*/true); + /*IsUnsigned=*/true, CostKind); case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { Type *CondTy = RetTy->getWithNewBitWidth(1); @@ -1346,11 +1391,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // SatMin -> Overflow && SumDiff >= 0 unsigned Cost = 0; Cost += ConcreteTTI->getIntrinsicInstrCost( - OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); + OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed, + CostKind); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, - CondTy, nullptr); + CondTy, CostKind); Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, nullptr); + CondTy, CostKind); return Cost; } case Intrinsic::uadd_sat: @@ -1364,9 +1410,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned Cost = 0; Cost += ConcreteTTI->getIntrinsicInstrCost( - OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); + OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed, + CostKind); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, nullptr); + CondTy, CostKind); return Cost; } case Intrinsic::smul_fix: @@ -1378,17 +1425,22 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; unsigned Cost = 0; - Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); + Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy, CostKind); + Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy, + CostKind); Cost += - 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); + 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, + CostKind); Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, + CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, + CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); + Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy, + CostKind); return Cost; } case Intrinsic::sadd_with_overflow: @@ -1408,13 +1460,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // Sub: // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) unsigned Cost = 0; - Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); + Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy, CostKind); Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, nullptr); + OverflowTy, CostKind); Cost += 2 * ConcreteTTI->getCmpSelInstrCost( - BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); + BinaryOperator::ICmp, OverflowTy, OverflowTy, CostKind); Cost += - ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); + ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy, + CostKind); return Cost; } case Intrinsic::uadd_with_overflow: @@ -1426,9 +1479,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { : BinaryOperator::Sub; unsigned Cost = 0; - Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); + Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy, CostKind); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, nullptr); + OverflowTy, CostKind); return Cost; } case Intrinsic::smul_with_overflow: @@ -1442,21 +1495,24 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; unsigned Cost = 0; - Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); + Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy, CostKind); + Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy, + CostKind); Cost += - 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); + 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, + CostKind); Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, + CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); if (IID == Intrinsic::smul_with_overflow) Cost += ConcreteTTI->getArithmeticInstrCost( - Instruction::AShr, MulTy, TTI::OK_AnyValue, + Instruction::AShr, MulTy, CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, - OverflowTy, nullptr); + OverflowTy, CostKind); return Cost; } case Intrinsic::ctpop: @@ -1513,24 +1569,27 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // If we can't lower fmuladd into an FMA estimate the cost as a floating // point mul followed by an add. if (IID == Intrinsic::fmuladd) - return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + - ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); + return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy, + CostKind) + + ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy, + CostKind); if (IID == Intrinsic::experimental_constrained_fmuladd) return ConcreteTTI->getIntrinsicCost( - Intrinsic::experimental_constrained_fmul, RetTy, Tys, - nullptr) + + Intrinsic::experimental_constrained_fmul, RetTy, Tys, nullptr, + CostKind) + ConcreteTTI->getIntrinsicCost( - Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr); + Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr, + CostKind); // Else, assume that we need to scalarize this intrinsic. For math builtins // this will emit a costly libcall, adding call overhead and spills. Make it // very expensive. - if (RetTy->isVectorTy()) { + if (auto *RetVTy = dyn_cast(RetTy)) { unsigned ScalarizationCost = ((ScalarizationCostPassed != std::numeric_limits::max()) ? ScalarizationCostPassed - : getScalarizationOverhead(RetTy, true, false)); - unsigned ScalarCalls = cast(RetTy)->getNumElements(); + : getScalarizationOverhead(RetVTy, true, false)); + unsigned ScalarCalls = RetVTy->getNumElements(); SmallVector ScalarTys; for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { Type *Ty = Tys[i]; @@ -1539,16 +1598,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ScalarTys.push_back(Ty); } unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( - IID, RetTy->getScalarType(), ScalarTys, FMF); + IID, RetTy->getScalarType(), ScalarTys, FMF, CostKind); for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { - if (Tys[i]->isVectorTy()) { + if (auto *VTy = dyn_cast(Tys[i])) { if (ScalarizationCostPassed == std::numeric_limits::max()) - ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); - ScalarCalls = - std::max(ScalarCalls, cast(Tys[i])->getNumElements()); + ScalarizationCost += getScalarizationOverhead(VTy, false, true); + ScalarCalls = std::max(ScalarCalls, VTy->getNumElements()); } } - return ScalarCalls * ScalarCost + ScalarizationCost; } @@ -1567,7 +1624,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /// \param RetTy Return value types. /// \param Tys Argument types. /// \returns The cost of Call instruction. - unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys) { + unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { return 10; } @@ -1617,7 +1675,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /// The cost model should take into account that the actual length of the /// vector is reduced on each iteration. unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwise) { + bool IsPairwise, + TTI::TargetCostKind CostKind) { Type *ScalarTy = Ty->getElementType(); unsigned NumVecElts = Ty->getNumElements(); unsigned NumReduxLevels = Log2_32(NumVecElts); @@ -1636,7 +1695,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ShuffleCost += (IsPairwise + 1) * ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy); - ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); + ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy, CostKind); Ty = SubTy; ++LongVectorCount; } @@ -1666,7 +1725,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /// Try to calculate op costs for min/max reduction operations. /// \param CondTy Conditional type for the Select instruction. unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwise, bool) { + bool IsPairwise, bool IsUnsigned, + TTI::TargetCostKind CostKind) { Type *ScalarTy = Ty->getElementType(); Type *ScalarCondTy = CondTy->getElementType(); unsigned NumVecElts = Ty->getNumElements(); @@ -1697,9 +1757,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy); MinMaxCost += - ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + + ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) + ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, - nullptr); + CostKind); Ty = SubTy; ++LongVectorCount; } @@ -1722,9 +1782,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { 0, Ty); MinMaxCost += NumReduxLevels * - (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + + (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, - nullptr)); + CostKind)); // The last min/max should be in vector registers and we counted it above. // So just need a single extractelement. return ShuffleCost + MinMaxCost + diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index 02ec1d38dfb45..7662179db44de 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -127,7 +127,7 @@ class FastISel { const CallBase &Call, unsigned FixedArgs = ~0U) { RetTy = ResultTy; - Callee = Call.getCalledValue(); + Callee = Call.getCalledOperand(); Symbol = Target; IsInReg = Call.hasRetAttr(Attribute::InReg); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 6a0157f9243e3..17039a840fc2b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -50,7 +50,7 @@ class LegalizationArtifactCombiner { SmallVectorImpl &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); - Builder.setInstr(MI); + Builder.setInstrAndDebugLoc(MI); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); @@ -100,7 +100,7 @@ class LegalizationArtifactCombiner { GISelObserverWrapper &Observer) { assert(MI.getOpcode() == TargetOpcode::G_ZEXT); - Builder.setInstr(MI); + Builder.setInstrAndDebugLoc(MI); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h index 49276fb1a94d4..8b2c27e7b8882 100644 --- a/llvm/include/llvm/CodeGen/MachinePipeliner.h +++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h @@ -43,6 +43,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -60,6 +61,7 @@ extern cl::opt SwpEnableCopyToPhi; class MachinePipeliner : public MachineFunctionPass { public: MachineFunction *MF = nullptr; + MachineOptimizationRemarkEmitter *ORE = nullptr; const MachineLoopInfo *MLI = nullptr; const MachineDominatorTree *MDT = nullptr; const InstrItineraryData *InstrItins; @@ -96,6 +98,7 @@ class MachinePipeliner : public MachineFunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 7792738f4ec97..f36385d7df797 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1316,7 +1316,7 @@ class TargetInstrInfo : public MCInstrInfo { /// Returns true if the instruction is a /// terminator instruction that has not been predicated. - virtual bool isUnpredicatedTerminator(const MachineInstr &MI) const; + bool isUnpredicatedTerminator(const MachineInstr &MI) const; /// Returns true if MI is an unconditional tail call. virtual bool isUnconditionalTailCall(const MachineInstr &MI) const { diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h index bcb379f00d68a..e915d8a5830c4 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h @@ -35,44 +35,38 @@ struct DebugSubsectionHeader { class DebugSubsectionRecord { public: DebugSubsectionRecord(); - DebugSubsectionRecord(DebugSubsectionKind Kind, BinaryStreamRef Data, - CodeViewContainer Container); + DebugSubsectionRecord(DebugSubsectionKind Kind, BinaryStreamRef Data); - static Error initialize(BinaryStreamRef Stream, DebugSubsectionRecord &Info, - CodeViewContainer Container); + static Error initialize(BinaryStreamRef Stream, DebugSubsectionRecord &Info); uint32_t getRecordLength() const; DebugSubsectionKind kind() const; BinaryStreamRef getRecordData() const; private: - CodeViewContainer Container = CodeViewContainer::ObjectFile; DebugSubsectionKind Kind = DebugSubsectionKind::None; BinaryStreamRef Data; }; class DebugSubsectionRecordBuilder { public: - DebugSubsectionRecordBuilder(std::shared_ptr Subsection, - CodeViewContainer Container); + DebugSubsectionRecordBuilder(std::shared_ptr Subsection); /// Use this to copy existing subsections directly from source to destination. /// For example, line table subsections in an object file only need to be /// relocated before being copied into the PDB. - DebugSubsectionRecordBuilder(const DebugSubsectionRecord &Contents, - CodeViewContainer Container); + DebugSubsectionRecordBuilder(const DebugSubsectionRecord &Contents); - uint32_t calculateSerializedLength(); - Error commit(BinaryStreamWriter &Writer) const; + uint32_t calculateSerializedLength() const; + Error commit(BinaryStreamWriter &Writer, CodeViewContainer Container) const; private: /// The subsection to build. Will be null if Contents is non-empty. std::shared_ptr Subsection; /// The bytes of the subsection. Only non-empty if Subsection is null. + /// FIXME: Reduce the size of this. DebugSubsectionRecord Contents; - - CodeViewContainer Container; }; } // end namespace codeview @@ -83,8 +77,7 @@ template <> struct VarStreamArrayExtractor { // FIXME: We need to pass the container type through to this function. In // practice this isn't super important since the subsection header describes // its length and we can just skip it. It's more important when writing. - if (auto EC = codeview::DebugSubsectionRecord::initialize( - Stream, Info, codeview::CodeViewContainer::Pdb)) + if (auto EC = codeview::DebugSubsectionRecord::initialize(Stream, Info)) return EC; Length = alignTo(Info.getRecordLength(), 4); return Error::success(); diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h index 1fcef9dd06c83..4383534b0db28 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -350,6 +350,13 @@ class InlineSiteSym : public SymbolRecord { uint32_t RecordOffset = 0; }; +struct PublicSym32Header { + ulittle32_t Flags; + ulittle32_t Offset; + ulittle16_t Segment; + // char Name[]; +}; + // S_PUB32 class PublicSym32 : public SymbolRecord { public: diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index c448c5f690a30..e58a46bc9d0ab 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -394,16 +394,17 @@ class DWARFContext : public DIContext { return getDWARFObj().getFile()->getArch(); } -private: - /// Parse a macro[.dwo] or macinfo[.dwo] section. - std::unique_ptr - parseMacroOrMacinfo(MacroSecType SectionType); - /// Return the compile unit which contains instruction with provided /// address. /// TODO: change input parameter from "uint64_t Address" /// into "SectionedAddress Address" DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address); + +private: + /// Parse a macro[.dwo] or macinfo[.dwo] section. + std::unique_ptr + parseMacroOrMacinfo(MacroSecType SectionType); + void addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, DWARFDie Die, std::vector &Result); }; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h index 158bd82edee0b..05a6056e8e21f 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -241,10 +241,22 @@ class DWARFDie { /// Returns null if no name is found. const char *getSubroutineName(DINameKind Kind) const; - /// Return the DIE name resolving DW_AT_sepcification or DW_AT_abstract_origin - /// references if necessary. Returns null if no name is found. + /// Return the DIE name resolving DW_AT_specification or DW_AT_abstract_origin + /// references if necessary. For the LinkageName case it additionaly searches + /// for ShortName if LinkageName is not found. + /// Returns null if no name is found. const char *getName(DINameKind Kind) const; + /// Return the DIE short name resolving DW_AT_specification or + /// DW_AT_abstract_origin references if necessary. Returns null if no name + /// is found. + const char *getShortName() const; + + /// Return the DIE linkage name resolving DW_AT_specification or + /// DW_AT_abstract_origin references if necessary. Returns null if no name + /// is found. + const char *getLinkageName() const; + /// Returns the declaration line (start line) for a DIE, assuming it specifies /// a subprogram. This may be fetched from specification or abstract origin /// for this subprogram by resolving DW_AT_sepcification or diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h index 4f5d28bbd05aa..beaaef0c5a6c5 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h @@ -93,8 +93,7 @@ class DbiModuleDescriptorBuilder { std::vector SourceFiles; std::vector> Symbols; - std::vector> - C13Builders; + std::vector C13Builders; ModuleInfoHeader Layout; }; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h index a497956000283..c4d35438541a4 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h @@ -51,9 +51,9 @@ class GSIStreamBuilder { Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer); - uint32_t getPublicsStreamIndex() const; - uint32_t getGlobalsStreamIndex() const; - uint32_t getRecordStreamIdx() const { return RecordStreamIdx; } + uint32_t getPublicsStreamIndex() const { return PublicsStreamIndex; } + uint32_t getGlobalsStreamIndex() const { return GlobalsStreamIndex; } + uint32_t getRecordStreamIndex() const { return RecordStreamIndex; } void addPublicSymbol(const codeview::PublicSym32 &Pub); @@ -69,7 +69,9 @@ class GSIStreamBuilder { Error commitPublicsHashStream(WritableBinaryStreamRef Stream); Error commitGlobalsHashStream(WritableBinaryStreamRef Stream); - uint32_t RecordStreamIdx = kInvalidStreamIndex; + uint32_t PublicsStreamIndex = kInvalidStreamIndex; + uint32_t GlobalsStreamIndex = kInvalidStreamIndex; + uint32_t RecordStreamIndex = kInvalidStreamIndex; msf::MSFBuilder &Msf; std::unique_ptr PSH; std::unique_ptr GSH; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index be5152d09ad29..0424d5043cca8 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -825,47 +825,46 @@ class JITDylib { /// have been added and not yet removed). void removeGenerator(DefinitionGenerator &G); - /// Set the search order to be used when fixing up definitions in JITDylib. - /// This will replace the previous search order, and apply to any symbol + /// Set the link order to be used when fixing up definitions in JITDylib. + /// This will replace the previous link order, and apply to any symbol /// resolutions made for definitions in this JITDylib after the call to - /// setSearchOrder (even if the definition itself was added before the + /// setLinkOrder (even if the definition itself was added before the /// call). /// - /// If SearchThisJITDylibFirst is set, which by default it is, then this - /// JITDylib will add itself to the beginning of the SearchOrder (Clients - /// should *not* put this JITDylib in the list in this case, to avoid - /// redundant lookups). + /// If LinkAgainstThisJITDylibFirst is true (the default) then this JITDylib + /// will add itself to the beginning of the LinkOrder (Clients should not + /// put this JITDylib in the list in this case, to avoid redundant lookups). /// - /// If SearchThisJITDylibFirst is false then the search order will be used as - /// given. The main motivation for this feature is to support deliberate + /// If LinkAgainstThisJITDylibFirst is false then the link order will be used + /// as-is. The primary motivation for this feature is to support deliberate /// shadowing of symbols in this JITDylib by a facade JITDylib. For example, /// the facade may resolve function names to stubs, and the stubs may compile /// lazily by looking up symbols in this dylib. Adding the facade dylib - /// as the first in the search order (instead of this dylib) ensures that + /// as the first in the link order (instead of this dylib) ensures that /// definitions within this dylib resolve to the lazy-compiling stubs, /// rather than immediately materializing the definitions in this dylib. - void setSearchOrder(JITDylibSearchOrder NewSearchOrder, - bool SearchThisJITDylibFirst = true); + void setLinkOrder(JITDylibSearchOrder NewSearchOrder, + bool LinkAgainstThisJITDylibFirst = true); - /// Add the given JITDylib to the search order for definitions in this + /// Add the given JITDylib to the link order for definitions in this /// JITDylib. - void addToSearchOrder(JITDylib &JD, - JITDylibLookupFlags JDLookupFlags = - JITDylibLookupFlags::MatchExportedSymbolsOnly); + void addToLinkOrder(JITDylib &JD, + JITDylibLookupFlags JDLookupFlags = + JITDylibLookupFlags::MatchExportedSymbolsOnly); - /// Replace OldJD with NewJD in the search order if OldJD is present. + /// Replace OldJD with NewJD in the link order if OldJD is present. /// Otherwise this operation is a no-op. - void replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD, - JITDylibLookupFlags JDLookupFlags = - JITDylibLookupFlags::MatchExportedSymbolsOnly); + void replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD, + JITDylibLookupFlags JDLookupFlags = + JITDylibLookupFlags::MatchExportedSymbolsOnly); - /// Remove the given JITDylib from the search order for this JITDylib if it is + /// Remove the given JITDylib from the link order for this JITDylib if it is /// present. Otherwise this operation is a no-op. - void removeFromSearchOrder(JITDylib &JD); + void removeFromLinkOrder(JITDylib &JD); - /// Do something with the search order (run under the session lock). + /// Do something with the link order (run under the session lock). template - auto withSearchOrderDo(Func &&F) + auto withLinkOrderDo(Func &&F) -> decltype(F(std::declval())); /// Define all symbols provided by the materialization unit to be part of this @@ -1049,7 +1048,7 @@ class JITDylib { UnmaterializedInfosMap UnmaterializedInfos; MaterializingInfosMap MaterializingInfos; std::vector> DefGenerators; - JITDylibSearchOrder SearchOrder; + JITDylibSearchOrder LinkOrder; }; /// Platforms set up standard symbols and mediate interactions between dynamic @@ -1297,9 +1296,9 @@ GeneratorT &JITDylib::addGenerator(std::unique_ptr DefGenerator) { } template -auto JITDylib::withSearchOrderDo(Func &&F) +auto JITDylib::withLinkOrderDo(Func &&F) -> decltype(F(std::declval())) { - return ES.runSessionLocked([&]() { return F(SearchOrder); }); + return ES.runSessionLocked([&]() { return F(LinkOrder); }); } template diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 7599a7c981757..8677b38a9afb4 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -196,6 +196,7 @@ __OMP_CLAUSE(destroy, OMPDestroyClause) __OMP_CLAUSE(detach, OMPDetachClause) __OMP_CLAUSE(inclusive, OMPInclusiveClause) __OMP_CLAUSE(exclusive, OMPExclusiveClause) +__OMP_CLAUSE(uses_allocators, OMPUsesAllocatorsClause) __OMP_CLAUSE_NO_CLASS(uniform) __OMP_CLAUSE_NO_CLASS(device_type) @@ -887,6 +888,7 @@ __OMP_DIRECTIVE_CLAUSE(target, 1, ~0, firstprivate) __OMP_DIRECTIVE_CLAUSE(target, 1, ~0, is_device_ptr) __OMP_DIRECTIVE_CLAUSE(target, 1, ~0, reduction) __OMP_DIRECTIVE_CLAUSE(target, 1, ~0, allocate) +__OMP_DIRECTIVE_CLAUSE(target, 50, ~0, uses_allocators) __OMP_DIRECTIVE_CLAUSE(requires, 1, ~0, unified_address) __OMP_DIRECTIVE_CLAUSE(requires, 1, ~0, unified_shared_memory) @@ -928,6 +930,7 @@ __OMP_DIRECTIVE_CLAUSE(target_parallel, 1, ~0, shared) __OMP_DIRECTIVE_CLAUSE(target_parallel, 1, ~0, reduction) __OMP_DIRECTIVE_CLAUSE(target_parallel, 1, ~0, is_device_ptr) __OMP_DIRECTIVE_CLAUSE(target_parallel, 1, ~0, allocate) +__OMP_DIRECTIVE_CLAUSE(target_parallel, 50, ~0, uses_allocators) __OMP_DIRECTIVE_CLAUSE(target_parallel_for, 1, ~0, if) __OMP_DIRECTIVE_CLAUSE(target_parallel_for, 1, ~0, device) @@ -950,6 +953,7 @@ __OMP_DIRECTIVE_CLAUSE(target_parallel_for, 1, ~0, linear) __OMP_DIRECTIVE_CLAUSE(target_parallel_for, 1, ~0, is_device_ptr) __OMP_DIRECTIVE_CLAUSE(target_parallel_for, 1, ~0, allocate) __OMP_DIRECTIVE_CLAUSE(target_parallel_for, 50, ~0, order) +__OMP_DIRECTIVE_CLAUSE(target_parallel_for, 50, ~0, uses_allocators) __OMP_DIRECTIVE_CLAUSE(target_update, 1, ~0, if) __OMP_DIRECTIVE_CLAUSE(target_update, 1, ~0, device) @@ -1187,6 +1191,7 @@ __OMP_DIRECTIVE_CLAUSE(target_parallel_for_simd, 1, ~0, is_device_ptr) __OMP_DIRECTIVE_CLAUSE(target_parallel_for_simd, 1, ~0, allocate) __OMP_DIRECTIVE_CLAUSE(target_parallel_for_simd, 50, ~0, nontemporal) __OMP_DIRECTIVE_CLAUSE(target_parallel_for_simd, 50, ~0, order) +__OMP_DIRECTIVE_CLAUSE(target_parallel_for_simd, 50, ~0, uses_allocators) __OMP_DIRECTIVE_CLAUSE(target_simd, 1, ~0, if) __OMP_DIRECTIVE_CLAUSE(target_simd, 1, ~0, device) @@ -1207,6 +1212,7 @@ __OMP_DIRECTIVE_CLAUSE(target_simd, 1, ~0, reduction) __OMP_DIRECTIVE_CLAUSE(target_simd, 1, ~0, allocate) __OMP_DIRECTIVE_CLAUSE(target_simd, 50, ~0, nontemporal) __OMP_DIRECTIVE_CLAUSE(target_simd, 50, ~0, order) +__OMP_DIRECTIVE_CLAUSE(target_simd, 50, ~0, uses_allocators) __OMP_DIRECTIVE_CLAUSE(teams_distribute, 1, ~0, default) __OMP_DIRECTIVE_CLAUSE(teams_distribute, 1, ~0, private) @@ -1294,6 +1300,7 @@ __OMP_DIRECTIVE_CLAUSE(target_teams, 1, ~0, reduction) __OMP_DIRECTIVE_CLAUSE(target_teams, 1, ~0, num_teams) __OMP_DIRECTIVE_CLAUSE(target_teams, 1, ~0, thread_limit) __OMP_DIRECTIVE_CLAUSE(target_teams, 1, ~0, allocate) +__OMP_DIRECTIVE_CLAUSE(target_teams, 50, ~0, uses_allocators) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute, 1, ~0, if) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute, 1, ~0, device) @@ -1313,6 +1320,7 @@ __OMP_DIRECTIVE_CLAUSE(target_teams_distribute, 1, ~0, lastprivate) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute, 1, ~0, collapse) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute, 1, ~0, dist_schedule) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute, 1, ~0, allocate) +__OMP_DIRECTIVE_CLAUSE(target_teams_distribute, 50, ~0, uses_allocators) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for, 1, ~0, if) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for, 1, ~0, device) @@ -1340,6 +1348,8 @@ __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for, 1, ~0, proc_bind) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for, 1, ~0, schedule) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for, 1, ~0, allocate) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for, 50, ~0, order) +__OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for, 50, ~0, + uses_allocators) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for_simd, 1, ~0, if) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for_simd, 1, ~0, device) @@ -1387,6 +1397,8 @@ __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for_simd, 1, ~0, __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for_simd, 50, ~0, nontemporal) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for_simd, 50, ~0, order) +__OMP_DIRECTIVE_CLAUSE(target_teams_distribute_parallel_for_simd, 50, ~0, + uses_allocators) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_simd, 1, ~0, if) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_simd, 1, ~0, device) @@ -1411,6 +1423,7 @@ __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_simd, 1, ~0, simdlen) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_simd, 1, ~0, allocate) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_simd, 50, ~0, nontemporal) __OMP_DIRECTIVE_CLAUSE(target_teams_distribute_simd, 50, ~0, order) +__OMP_DIRECTIVE_CLAUSE(target_teams_distribute_simd, 50, ~0, uses_allocators) __OMP_DIRECTIVE_CLAUSE(taskgroup, 1, ~0, task_reduction) __OMP_DIRECTIVE_CLAUSE(taskgroup, 1, ~0, allocate) diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h index c13521e157d08..c78afc729d43a 100644 --- a/llvm/include/llvm/IR/AbstractCallSite.h +++ b/llvm/include/llvm/IR/AbstractCallSite.h @@ -141,6 +141,12 @@ class AbstractCallSite { assert(!CI.ParameterEncoding.empty() && "Callback without parameter encoding!"); + // If the use is actually in a constant cast expression which itself + // has only one use, we look through the constant cast expression. + if (auto *CE = dyn_cast(U->getUser())) + if (CE->hasOneUse() && CE->isCast()) + U = &*CE->use_begin(); + return (int)CB->getArgOperandNo(U) == CI.ParameterEncoding[0]; } @@ -201,16 +207,16 @@ class AbstractCallSite { } /// Return the pointer to function that is being called. - Value *getCalledValue() const { + Value *getCalledOperand() const { if (isDirectCall()) - return CB->getCalledValue(); + return CB->getCalledOperand(); return CB->getArgOperand(getCallArgOperandNoForCallee()); } /// Return the function being called if this is a direct call, otherwise /// return null (if it's an indirect call). Function *getCalledFunction() const { - Value *V = getCalledValue(); + Value *V = getCalledOperand(); return V ? dyn_cast(V->stripPointerCasts()) : nullptr; } }; diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h index 244878bd31558..e23e9d08ca597 100644 --- a/llvm/include/llvm/IR/Argument.h +++ b/llvm/include/llvm/IR/Argument.h @@ -71,9 +71,9 @@ class Argument final : public Value { /// Return true if this argument has the swifterror attribute. bool hasSwiftErrorAttr() const; - /// Return true if this argument has the byval attribute or inalloca + /// Return true if this argument has the byval, inalloca, or preallocated /// attribute. These attributes represent arguments being passed by value. - bool hasByValOrInAllocaAttr() const; + bool hasPassPointeeByValueAttr() const; /// If this is a byval or inalloca argument, return its alignment. /// FIXME: Remove this function once transition to Align is over. diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index 2de11ba5171e5..05c076816320b 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -108,6 +108,7 @@ class Attribute { unsigned ElemSizeArg, const Optional &NumElemsArg); static Attribute getWithByValType(LLVMContext &Context, Type *Ty); + static Attribute getWithPreallocatedType(LLVMContext &Context, Type *Ty); static Attribute::AttrKind getAttrKindFromName(StringRef AttrName); @@ -302,6 +303,7 @@ class AttributeSet { uint64_t getDereferenceableBytes() const; uint64_t getDereferenceableOrNullBytes() const; Type *getByValType() const; + Type *getPreallocatedType() const; std::pair> getAllocSizeArgs() const; std::string getAsString(bool InAttrGrp = false) const; @@ -542,9 +544,6 @@ class AttributeList { // AttributeList Accessors //===--------------------------------------------------------------------===// - /// Retrieve the LLVM context. - LLVMContext &getContext() const; - /// The attributes for the specified index are returned. AttributeSet getAttributes(unsigned Index) const; @@ -724,6 +723,7 @@ class AttrBuilder { uint64_t DerefOrNullBytes = 0; uint64_t AllocSizeArgs = 0; Type *ByValType = nullptr; + Type *PreallocatedType = nullptr; public: AttrBuilder() = default; @@ -802,6 +802,9 @@ class AttrBuilder { /// Retrieve the byval type. Type *getByValType() const { return ByValType; } + /// Retrieve the preallocated type. + Type *getPreallocatedType() const { return PreallocatedType; } + /// Retrieve the allocsize args, if the allocsize attribute exists. If it /// doesn't exist, pair(0, 0) is returned. std::pair> getAllocSizeArgs() const; @@ -845,6 +848,9 @@ class AttrBuilder { /// This turns a byval type into the form used internally in Attribute. AttrBuilder &addByValAttr(Type *Ty); + /// This turns a preallocated type into the form used internally in Attribute. + AttrBuilder &addPreallocatedAttr(Type *Ty); + /// Add an allocsize attribute, using the representation returned by /// Attribute.getIntValue(). AttrBuilder &addAllocSizeAttrFromRawRepr(uint64_t RawAllocSizeRepr); diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index ee07813b99501..5095728748f34 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -133,6 +133,9 @@ def OptimizeForSize : EnumAttr<"optsize">; /// Function must not be optimized. def OptimizeNone : EnumAttr<"optnone">; +/// Similar to byval but without a copy. +def Preallocated : TypeAttr<"preallocated">; + /// Function does not access memory. def ReadNone : EnumAttr<"readnone">; diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index c41d1582a834d..b31bcb751ab75 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -516,12 +516,13 @@ class ConstantVector final : public ConstantAggregate { public: /// Return a ConstantVector with the specified constant in each element. + /// Note that this might not return an instance of ConstantVector static Constant *getSplat(ElementCount EC, Constant *Elt); - /// Specialize the getType() method to always return a VectorType, + /// Specialize the getType() method to always return a FixedVectorType, /// which reduces the amount of casting needed in parts of the compiler. - inline VectorType *getType() const { - return cast(Value::getType()); + inline FixedVectorType *getType() const { + return cast(Value::getType()); } /// If all elements of the vector constant have the same value, return that @@ -801,10 +802,10 @@ class ConstantDataVector final : public ConstantDataSequential { /// same value, return that value. Otherwise return NULL. Constant *getSplatValue() const; - /// Specialize the getType() method to always return a VectorType, + /// Specialize the getType() method to always return a FixedVectorType, /// which reduces the amount of casting needed in parts of the compiler. - inline VectorType *getType() const { - return cast(Value::getType()); + inline FixedVectorType *getType() const { + return cast(Value::getType()); } /// Methods for support type inquiry through isa, cast, and dyn_cast: diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index e7af4cf7fbc5f..336a7b061ea3c 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -519,9 +519,8 @@ class VectorType : public Type { /// input type and the same element type. static VectorType *getDoubleElementsVectorType(VectorType *VTy) { auto EltCnt = VTy->getElementCount(); - assert((VTy->getNumElements() * 2ull) <= UINT_MAX && - "Too many elements in vector"); - return VectorType::get(VTy->getElementType(), EltCnt*2); + assert((EltCnt.Min * 2ull) <= UINT_MAX && "Too many elements in vector"); + return VectorType::get(VTy->getElementType(), EltCnt * 2); } /// Return true if the specified type is valid as a element type. @@ -549,6 +548,10 @@ class FixedVectorType : public VectorType { public: static FixedVectorType *get(Type *ElementType, unsigned NumElts); + static FixedVectorType *get(Type *ElementType, const FixedVectorType *FVTy) { + return get(ElementType, FVTy->getNumElements()); + } + static bool classof(const Type *T) { return T->getTypeID() == FixedVectorTyID; } @@ -563,6 +566,11 @@ class ScalableVectorType : public VectorType { public: static ScalableVectorType *get(Type *ElementType, unsigned MinNumElts); + static ScalableVectorType *get(Type *ElementType, + const ScalableVectorType *SVTy) { + return get(ElementType, SVTy->getMinNumElements()); + } + /// Get the minimum number of elements in this vector. The actual number of /// elements in the vector is an integer multiple of this value. uint64_t getMinNumElements() const { return ElementQuantity; } diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 4f4f58a961631..8a93126148750 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1709,7 +1709,7 @@ class IRBuilderBase { const DataLayout &DL = BB->getModule()->getDataLayout(); Align = DL.getABITypeAlign(Val->getType()); } - return Insert(new StoreInst(Val, Ptr, isVolatile, Align)); + return Insert(new StoreInst(Val, Ptr, isVolatile, *Align)); } FenceInst *CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID = SyncScope::System, diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index d28b3301fa6a1..37e5e42ee8f84 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1286,10 +1286,6 @@ class CallBase : public Instruction { Value *getCalledOperand() const { return Op(); } - // DEPRECATED: This routine will be removed in favor of `getCalledOperand` in - // the near future. - Value *getCalledValue() const { return getCalledOperand(); } - const Use &getCalledOperandUse() const { return Op(); } Use &getCalledOperandUse() { return Op(); } @@ -1545,10 +1541,12 @@ class CallBase : public Instruction { return paramHasAttr(ArgNo, Attribute::InAlloca); } - /// Determine whether this argument is passed by value or in an alloca. - bool isByValOrInAllocaArgument(unsigned ArgNo) const { + /// Determine whether this argument is passed by value, in an alloca, or is + /// preallocated. + bool isPassPointeeByValueArgument(unsigned ArgNo) const { return paramHasAttr(ArgNo, Attribute::ByVal) || - paramHasAttr(ArgNo, Attribute::InAlloca); + paramHasAttr(ArgNo, Attribute::InAlloca) || + paramHasAttr(ArgNo, Attribute::Preallocated); } /// Determine if there are is an inalloca argument. Only the last argument can diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index eda69b31fdb23..fc17ffe3118f2 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -508,6 +508,9 @@ def int_instrprof_value_profile : Intrinsic<[], llvm_i32_ty], []>; +def int_call_preallocated_setup : Intrinsic<[llvm_token_ty], [llvm_i32_ty]>; +def int_call_preallocated_arg : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_i32_ty]>; + //===------------------- Standard C Library Intrinsics --------------------===// // diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 4a23d3ff77f71..56fb18a28df7d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -913,12 +913,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". llvm_anyvector_ty], [IntrNoMem]>; - class AdvSIMD_SVE_FP_Reduce_Intrinsic - : Intrinsic<[llvm_anyfloat_ty], - [LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, - llvm_anyvector_ty], - [IntrNoMem]>; - class AdvSIMD_SVE_ReduceWithInit_Intrinsic : Intrinsic<[LLVMVectorElementType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -926,13 +920,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". llvm_anyvector_ty], [IntrNoMem]>; - class AdvSIMD_SVE_FP_ReduceWithInit_Intrinsic - : Intrinsic<[llvm_anyfloat_ty], - [LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, - LLVMMatchType<0>, - llvm_anyvector_ty], - [IntrNoMem]>; - class AdvSIMD_SVE_ShiftByImm_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1699,12 +1686,12 @@ def int_aarch64_sve_ftssel_x : AdvSIMD_SVE_TSMUL_Intrinsic; // Floating-point reductions // -def int_aarch64_sve_fadda : AdvSIMD_SVE_FP_ReduceWithInit_Intrinsic; -def int_aarch64_sve_faddv : AdvSIMD_SVE_FP_Reduce_Intrinsic; -def int_aarch64_sve_fmaxv : AdvSIMD_SVE_FP_Reduce_Intrinsic; -def int_aarch64_sve_fmaxnmv : AdvSIMD_SVE_FP_Reduce_Intrinsic; -def int_aarch64_sve_fminv : AdvSIMD_SVE_FP_Reduce_Intrinsic; -def int_aarch64_sve_fminnmv : AdvSIMD_SVE_FP_Reduce_Intrinsic; +def int_aarch64_sve_fadda : AdvSIMD_SVE_ReduceWithInit_Intrinsic; +def int_aarch64_sve_faddv : AdvSIMD_SVE_Reduce_Intrinsic; +def int_aarch64_sve_fmaxv : AdvSIMD_SVE_Reduce_Intrinsic; +def int_aarch64_sve_fmaxnmv : AdvSIMD_SVE_Reduce_Intrinsic; +def int_aarch64_sve_fminv : AdvSIMD_SVE_Reduce_Intrinsic; +def int_aarch64_sve_fminnmv : AdvSIMD_SVE_Reduce_Intrinsic; // // Floating-point conversions diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index 41c25c563f706..3c11de14a24dc 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -83,12 +83,14 @@ class LLVMContext { /// Known operand bundle tag IDs, which always have the same value. All /// operand bundle tags that LLVM has special knowledge of are listed here. /// Additionally, this scheme allows LLVM to efficiently check for specific - /// operand bundle tags without comparing strings. + /// operand bundle tags without comparing strings. Keep this in sync with + /// LLVMContext::LLVMContext(). enum : unsigned { OB_deopt = 0, // "deopt" OB_funclet = 1, // "funclet" OB_gc_transition = 2, // "gc-transition" OB_cfguardtarget = 3, // "cfguardtarget" + OB_preallocated = 4, // "preallocated" }; /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 390301b61c0ba..0e22e2388572b 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -262,20 +262,20 @@ template inline constantint_match m_ConstantInt() { return constantint_match(); } -/// This helper class is used to match scalar and vector integer constants that -/// satisfy a specified predicate. +/// This helper class is used to match scalar and fixed width vector integer +/// constants that satisfy a specified predicate. /// For vector constants, undefined elements are ignored. template struct cst_pred_ty : public Predicate { template bool match(ITy *V) { if (const auto *CI = dyn_cast(V)) return this->isValue(CI->getValue()); - if (V->getType()->isVectorTy()) { + if (const auto *FVTy = dyn_cast(V->getType())) { if (const auto *C = dyn_cast(V)) { if (const auto *CI = dyn_cast_or_null(C->getSplatValue())) return this->isValue(CI->getValue()); // Non-splat vector constant: check each element for a match. - unsigned NumElts = cast(V->getType())->getNumElements(); + unsigned NumElts = FVTy->getNumElements(); assert(NumElts != 0 && "Constant vector with no elements?"); bool HasNonUndefElements = false; for (unsigned i = 0; i != NumElts; ++i) { @@ -462,6 +462,7 @@ inline cst_pred_ty m_ZeroInt() { struct is_zero { template bool match(ITy *V) { auto *C = dyn_cast(V); + // FIXME: this should be able to do something for scalable vectors return C && (C->isNullValue() || cst_pred_ty().match(C)); } }; diff --git a/llvm/include/llvm/IR/ProfileSummary.h b/llvm/include/llvm/IR/ProfileSummary.h index 7532b44edd987..b105f8dd55a94 100644 --- a/llvm/include/llvm/IR/ProfileSummary.h +++ b/llvm/include/llvm/IR/ProfileSummary.h @@ -64,15 +64,16 @@ class ProfileSummary { ProfileSummary(Kind K, SummaryEntryVector DetailedSummary, uint64_t TotalCount, uint64_t MaxCount, uint64_t MaxInternalCount, uint64_t MaxFunctionCount, - uint32_t NumCounts, uint32_t NumFunctions) + uint32_t NumCounts, uint32_t NumFunctions, + bool Partial = false) : PSK(K), DetailedSummary(std::move(DetailedSummary)), TotalCount(TotalCount), MaxCount(MaxCount), MaxInternalCount(MaxInternalCount), MaxFunctionCount(MaxFunctionCount), - NumCounts(NumCounts), NumFunctions(NumFunctions) {} + NumCounts(NumCounts), NumFunctions(NumFunctions), Partial(Partial) {} Kind getKind() const { return PSK; } /// Return summary information as metadata. - Metadata *getMD(LLVMContext &Context); + Metadata *getMD(LLVMContext &Context, bool AddPartialField = true); /// Construct profile summary from metdata. static ProfileSummary *getFromMD(Metadata *MD); SummaryEntryVector &getDetailedSummary() { return DetailedSummary; } diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index a6b116f6bc86c..20fed3488f290 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -66,6 +66,11 @@ struct Config { /// link. bool HasWholeProgramVisibility = false; + /// Always emit a Regular LTO object even when it is empty because no Regular + /// LTO modules were linked. This option is useful for some build system which + /// want to know a priori all possible output files. + bool AlwaysEmitRegularLTOObj = false; + /// If this field is set, the set of passes run in the middle-end optimizer /// will be the one specified by the string. Only works with the new pass /// manager as the old one doesn't have this ability. diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index e934398194155..0a635b45e5a24 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -331,6 +331,7 @@ class LTO { std::vector Keep; }; std::vector ModsWithSummaries; + bool EmptyCombinedModule = true; } RegularLTO; struct ThinLTOState { diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index 18f1637deb502..e7e48a318c8f8 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -307,6 +307,10 @@ class MCAsmInfo { /// false. bool HasAltEntry = false; + /// True if this target supports the XCOFF .extern directive. Defaults to + /// false. + bool HasDotExternDirective = false; + /// Used to declare a global as being a weak symbol. Defaults to ".weak". const char *WeakDirective; @@ -583,6 +587,7 @@ class MCAsmInfo { bool hasIdentDirective() const { return HasIdentDirective; } bool hasNoDeadStrip() const { return HasNoDeadStrip; } bool hasAltEntry() const { return HasAltEntry; } + bool hasDotExternDirective() const { return HasDotExternDirective; } const char *getWeakDirective() const { return WeakDirective; } const char *getWeakRefDirective() const { return WeakRefDirective; } bool hasWeakDefDirective() const { return HasWeakDefDirective; } diff --git a/llvm/include/llvm/MC/MCDirectives.h b/llvm/include/llvm/MC/MCDirectives.h index cad08c8574d23..51e57ad370215 100644 --- a/llvm/include/llvm/MC/MCDirectives.h +++ b/llvm/include/llvm/MC/MCDirectives.h @@ -29,6 +29,7 @@ enum MCSymbolAttr { MCSA_ELF_TypeGnuUniqueObject, /// .type _foo, @gnu_unique_object MCSA_Global, ///< .globl MCSA_LGlobal, ///< .lglobl (XCOFF) + MCSA_Extern, ///< .extern (XCOFF) MCSA_Hidden, ///< .hidden (ELF) MCSA_IndirectSymbol, ///< .indirect_symbol (MachO) MCSA_Internal, ///< .internal (ELF) diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h index 9143adf0e97bd..2f9b4ba8782d9 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h +++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -40,7 +40,10 @@ class MemoryGroup { unsigned NumInstructions; unsigned NumExecuting; unsigned NumExecuted; - SmallVector Succ; + // Successors that are in a order dependency with this group. + SmallVector OrderSucc; + // Successors that are in a data dependency with this group. + SmallVector DataSucc; CriticalDependency CriticalPredecessor; InstRef CriticalMemoryInstruction; @@ -55,8 +58,9 @@ class MemoryGroup { NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {} MemoryGroup(MemoryGroup &&) = default; - ArrayRef getSuccessors() const { return Succ; } - unsigned getNumSuccessors() const { return Succ.size(); } + size_t getNumSuccessors() const { + return OrderSucc.size() + DataSucc.size(); + } unsigned getNumPredecessors() const { return NumPredecessors; } unsigned getNumExecutingPredecessors() const { return NumExecutingPredecessors; @@ -75,12 +79,22 @@ class MemoryGroup { return CriticalPredecessor; } - void addSuccessor(MemoryGroup *Group) { + void addSuccessor(MemoryGroup *Group, bool IsDataDependent) { + // Do not need to add a dependency if there is no data + // dependency and all instructions from this group have been + // issued already. + if (!IsDataDependent && isExecuting()) + return; + Group->NumPredecessors++; assert(!isExecuted() && "Should have been removed!"); if (isExecuting()) - Group->onGroupIssued(CriticalMemoryInstruction); - Succ.emplace_back(Group); + Group->onGroupIssued(CriticalMemoryInstruction, IsDataDependent); + + if (IsDataDependent) + DataSucc.emplace_back(Group); + else + OrderSucc.emplace_back(Group); } bool isWaiting() const { @@ -98,10 +112,13 @@ class MemoryGroup { } bool isExecuted() const { return NumInstructions == NumExecuted; } - void onGroupIssued(const InstRef &IR) { + void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) { assert(!isReady() && "Unexpected group-start event!"); NumExecutingPredecessors++; + if (!ShouldUpdateCriticalDep) + return; + unsigned Cycles = IR.getInstruction()->getCyclesLeft(); if (CriticalPredecessor.Cycles < Cycles) { CriticalPredecessor.IID = IR.getSourceIndex(); @@ -133,8 +150,14 @@ class MemoryGroup { return; // Notify successors that this group started execution. - for (MemoryGroup *MG : Succ) - MG->onGroupIssued(CriticalMemoryInstruction); + for (MemoryGroup *MG : OrderSucc) { + MG->onGroupIssued(CriticalMemoryInstruction, false); + // Release the order dependency with this group. + MG->onGroupExecuted(); + } + + for (MemoryGroup *MG : DataSucc) + MG->onGroupIssued(CriticalMemoryInstruction, true); } void onInstructionExecuted() { @@ -145,8 +168,8 @@ class MemoryGroup { if (!isExecuted()) return; - // Notify successors that this group has finished execution. - for (MemoryGroup *MG : Succ) + // Notify data dependent successors that this group has finished execution. + for (MemoryGroup *MG : DataSucc) MG->onGroupExecuted(); } @@ -412,6 +435,7 @@ class LSUnit : public LSUnitBase { unsigned CurrentLoadGroupID; unsigned CurrentLoadBarrierGroupID; unsigned CurrentStoreGroupID; + unsigned CurrentStoreBarrierGroupID; public: LSUnit(const MCSchedModel &SM) @@ -420,7 +444,8 @@ class LSUnit : public LSUnitBase { : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {} LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias) : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0), - CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0) {} + CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0), + CurrentStoreBarrierGroupID(0) {} /// Returns LSU_AVAILABLE if there are enough load/store queue entries to /// accomodate instruction IR. diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index b0aa86942a3f7..f6435d8b7ccc8 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -516,7 +516,12 @@ uint64_t ELFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { template Expected ELFObjectFile::getSymbolAddress(DataRefImpl Symb) const { - uint64_t Result = getSymbolValue(Symb); + Expected SymbolValueOrErr = getSymbolValue(Symb); + if (!SymbolValueOrErr) + // TODO: Test this error. + return SymbolValueOrErr.takeError(); + + uint64_t Result = *SymbolValueOrErr; const Elf_Sym *ESym = getSymbol(Symb); switch (ESym->st_shndx) { case ELF::SHN_COMMON: diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index e7d1dcaec9c17..4d51430ffaf73 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -188,7 +188,7 @@ class SymbolRef : public BasicSymbolRef { /// Return the value of the symbol depending on the object this can be an /// offset or a virtual address. - uint64_t getValue() const; + Expected getValue() const; /// Get the alignment of this symbol as the actual value (not log 2). uint32_t getAlignment() const; @@ -289,7 +289,7 @@ class ObjectFile : public SymbolicFile { virtual void getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl &Result) const = 0; - uint64_t getSymbolValue(DataRefImpl Symb) const; + Expected getSymbolValue(DataRefImpl Symb) const; public: ObjectFile() = delete; @@ -390,7 +390,7 @@ inline Expected SymbolRef::getAddress() const { return getObject()->getSymbolAddress(getRawDataRefImpl()); } -inline uint64_t SymbolRef::getValue() const { +inline Expected SymbolRef::getValue() const { return getObject()->getSymbolValue(getRawDataRefImpl()); } diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index 3d3f88358b6af..99effa5f1cf8f 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -179,6 +179,8 @@ AARCH64_CPU_NAME("tsv110", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("a64fx", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_SVE)) +AARCH64_CPU_NAME("carmel", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + AArch64::AEK_FP16) // Invalid CPU AARCH64_CPU_NAME("invalid", INVALID, FK_INVALID, true, AArch64::AEK_INVALID) #undef AARCH64_CPU_NAME diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h index be09bd635219f..40c967ccc4857 100644 --- a/llvm/include/llvm/Support/Allocator.h +++ b/llvm/include/llvm/Support/Allocator.h @@ -170,7 +170,7 @@ class BumpPtrAllocatorImpl // If Size is really big, allocate a separate slab for it. size_t PaddedSize = SizeToAllocate + Alignment.value() - 1; if (PaddedSize > SizeThreshold) { - void *NewSlab = Allocator.Allocate(PaddedSize, 0); + void *NewSlab = Allocator.Allocate(PaddedSize, alignof(std::max_align_t)); // We own the new slab and don't want anyone reading anyting other than // pieces returned from this method. So poison the whole slab. __asan_poison_memory_region(NewSlab, PaddedSize); @@ -208,7 +208,7 @@ class BumpPtrAllocatorImpl // Bump pointer allocators are expected to never free their storage; and // clients expect pointers to remain valid for non-dereferencing uses even // after deallocation. - void Deallocate(const void *Ptr, size_t Size) { + void Deallocate(const void *Ptr, size_t Size, size_t /*Alignment*/) { __asan_poison_memory_region(Ptr, Size); } @@ -332,7 +332,8 @@ class BumpPtrAllocatorImpl void StartNewSlab() { size_t AllocatedSlabSize = computeSlabSize(Slabs.size()); - void *NewSlab = Allocator.Allocate(AllocatedSlabSize, 0); + void *NewSlab = + Allocator.Allocate(AllocatedSlabSize, alignof(std::max_align_t)); // We own the new slab and don't want anyone reading anything other than // pieces returned from this method. So poison the whole slab. __asan_poison_memory_region(NewSlab, AllocatedSlabSize); @@ -348,7 +349,7 @@ class BumpPtrAllocatorImpl for (; I != E; ++I) { size_t AllocatedSlabSize = computeSlabSize(std::distance(Slabs.begin(), I)); - Allocator.Deallocate(*I, AllocatedSlabSize); + Allocator.Deallocate(*I, AllocatedSlabSize, alignof(std::max_align_t)); } } @@ -357,7 +358,7 @@ class BumpPtrAllocatorImpl for (auto &PtrAndSize : CustomSizedSlabs) { void *Ptr = PtrAndSize.first; size_t Size = PtrAndSize.second; - Allocator.Deallocate(Ptr, Size); + Allocator.Deallocate(Ptr, Size, alignof(std::max_align_t)); } } @@ -434,17 +435,8 @@ void * operator new(size_t Size, llvm::BumpPtrAllocatorImpl &Allocator) { - struct S { - char c; - union { - double D; - long double LD; - long long L; - void *P; - } x; - }; - return Allocator.Allocate( - Size, std::min((size_t)llvm::NextPowerOf2(Size), offsetof(S, x))); + return Allocator.Allocate(Size, std::min((size_t)llvm::NextPowerOf2(Size), + alignof(std::max_align_t))); } template class AllocatorBase { /// Deallocate \a Ptr to \a Size bytes of memory allocated by this /// allocator. - void Deallocate(const void *Ptr, size_t Size) { + void Deallocate(const void *Ptr, size_t Size, size_t Alignment) { #ifdef __clang__ - static_assert(static_cast( - &AllocatorBase::Deallocate) != - static_cast( - &DerivedT::Deallocate), - "Class derives from AllocatorBase without implementing the " - "core Deallocate(void *) overload!"); + static_assert( + static_cast( + &AllocatorBase::Deallocate) != + static_cast( + &DerivedT::Deallocate), + "Class derives from AllocatorBase without implementing the " + "core Deallocate(void *) overload!"); #endif - return static_cast(this)->Deallocate(Ptr, Size); + return static_cast(this)->Deallocate(Ptr, Size, Alignment); } // The rest of these methods are helpers that redirect to one of the above @@ -72,7 +73,7 @@ template class AllocatorBase { template std::enable_if_t, void>::value, void> Deallocate(T *Ptr, size_t Num = 1) { - Deallocate(static_cast(Ptr), Num * sizeof(T)); + Deallocate(static_cast(Ptr), Num * sizeof(T), alignof(T)); } }; @@ -80,16 +81,15 @@ class MallocAllocator : public AllocatorBase { public: void Reset() {} - LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, - size_t /*Alignment*/) { - return safe_malloc(Size); + LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, size_t Alignment) { + return allocate_buffer(Size, Alignment); } // Pull in base class overloads. using AllocatorBase::Allocate; - void Deallocate(const void *Ptr, size_t /*Size*/) { - free(const_cast(Ptr)); + void Deallocate(const void *Ptr, size_t Size, size_t Alignment) { + deallocate_buffer(const_cast(Ptr), Size, Alignment); } // Pull in base class overloads. diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 2c87d8fbbaad3..9a75cbc4e90d0 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -2027,6 +2027,13 @@ void TokenizeWindowsCommandLine(StringRef Source, StringSaver &Saver, SmallVectorImpl &NewArgv, bool MarkEOLs = false); +/// Tokenizes a Windows command line while attempting to avoid copies. If no +/// quoting or escaping was used, this produces substrings of the original +/// string. If a token requires unquoting, it will be allocated with the +/// StringSaver. +void TokenizeWindowsCommandLineNoCopy(StringRef Source, StringSaver &Saver, + SmallVectorImpl &NewArgv); + /// String tokenization function type. Should be compatible with either /// Windows or Unix command line tokenizers. using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver, diff --git a/llvm/include/llvm/Support/FileCollector.h b/llvm/include/llvm/Support/FileCollector.h index 1b6383ef8cc2d..30e3470ece3cd 100644 --- a/llvm/include/llvm/Support/FileCollector.h +++ b/llvm/include/llvm/Support/FileCollector.h @@ -23,6 +23,7 @@ class FileCollectorFileSystem; /// the VFS. class FileCollector { public: + /// \p Root directory gets created in copyFiles unless it already exists. FileCollector(std::string Root, std::string OverlayRoot); void addFile(const Twine &file); @@ -69,7 +70,7 @@ class FileCollector { addDirectoryImpl(const llvm::Twine &Dir, IntrusiveRefCntPtr FS, std::error_code &EC); - /// Synchronizes adding files. + /// Synchronizes access to Seen, VFSWriter and SymlinkMap. std::mutex Mutex; /// The root directory where files are copied. diff --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h index 728b63c54c050..3b712c00dc70e 100644 --- a/llvm/include/llvm/Support/Path.h +++ b/llvm/include/llvm/Support/Path.h @@ -47,7 +47,7 @@ enum class Style { windows, posix, native }; /// foo/ => foo,. /// /foo/bar => /,foo,bar /// ../ => ..,. -/// C:\foo\bar => C:,/,foo,bar +/// C:\foo\bar => C:,\,foo,bar /// @endcode class const_iterator : public iterator_facade_base &result); /// @result True if a home directory is set, false otherwise. bool home_directory(SmallVectorImpl &result); +/// Get the directory where installed packages should put their +/// machine-local cache, e.g. $XDG_CACHE_HOME. +/// +/// @param result Holds the resulting path name. +/// @result True if the appropriate path was determined, it need not exist. +bool cache_directory(SmallVectorImpl &result); + /// Has root name? /// /// root_name != "" diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h index 57bddd8145b68..e9ab815d5717d 100644 --- a/llvm/include/llvm/Support/ThreadPool.h +++ b/llvm/include/llvm/Support/ThreadPool.h @@ -72,6 +72,8 @@ class ThreadPool { unsigned getThreadCount() const { return ThreadCount; } private: + bool workCompletedUnlocked() { return !ActiveThreads && Tasks.empty(); } + /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. std::shared_future asyncImpl(TaskTy F); @@ -86,16 +88,15 @@ class ThreadPool { std::mutex QueueLock; std::condition_variable QueueCondition; - /// Locking and signaling for job completion - std::mutex CompletionLock; + /// Signaling for job completion std::condition_variable CompletionCondition; /// Keep track of the number of thread actually busy - std::atomic ActiveThreads; + unsigned ActiveThreads = 0; #if LLVM_ENABLE_THREADS // avoids warning for unused variable /// Signal for the destruction of the pool, asking thread to exit. - bool EnableFlag; + bool EnableFlag = true; #endif unsigned ThreadCount; diff --git a/llvm/include/llvm/Support/YAMLParser.h b/llvm/include/llvm/Support/YAMLParser.h index 3570119a3bfdb..53009d7ff4aad 100644 --- a/llvm/include/llvm/Support/YAMLParser.h +++ b/llvm/include/llvm/Support/YAMLParser.h @@ -139,7 +139,7 @@ class Node { void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t Size) noexcept { - Alloc.Deallocate(Ptr, Size); + Alloc.Deallocate(Ptr, Size, 0); } void operator delete(void *) noexcept = delete; diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index 316a6ad5d0944..f93f36037679a 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -649,8 +649,8 @@ inline bool isBool(StringRef S) { inline QuotingType needsQuotes(StringRef S) { if (S.empty()) return QuotingType::Single; - if (isspace(static_cast(S.front())) || - isspace(static_cast(S.back()))) + if (isSpace(static_cast(S.front())) || + isSpace(static_cast(S.back()))) return QuotingType::Single; if (isNull(S)) return QuotingType::Single; diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index 3a2edc1ff9ce7..28e454d3b0fc7 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -14,6 +14,7 @@ #ifndef LLVM_TRANSFORMS_IPO_H #define LLVM_TRANSFORMS_IPO_H +#include "llvm/ADT/SmallVector.h" #include #include @@ -27,7 +28,6 @@ class Pass; class BasicBlock; class GlobalValue; class raw_ostream; -template class SmallVectorImpl; //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 8763b9fce06d4..42675f89070b1 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -153,29 +153,24 @@ enum class DepClassTy { /// are floating values that do not have a corresponding attribute list /// position. struct IRPosition { - virtual ~IRPosition() {} /// The positions we distinguish in the IR. - /// - /// The values are chosen such that the KindOrArgNo member has a value >= 0 - /// if it is an argument or call site argument while a value < 0 indicates the - /// respective kind of that value. - enum Kind : int { - IRP_INVALID = -6, ///< An invalid position. - IRP_FLOAT = -5, ///< A position that is not associated with a spot suitable - ///< for attributes. This could be any value or instruction. - IRP_RETURNED = -4, ///< An attribute for the function return value. - IRP_CALL_SITE_RETURNED = -3, ///< An attribute for a call site return value. - IRP_FUNCTION = -2, ///< An attribute for a function (scope). - IRP_CALL_SITE = -1, ///< An attribute for a call site (function scope). - IRP_ARGUMENT = 0, ///< An attribute for a function argument. - IRP_CALL_SITE_ARGUMENT = 1, ///< An attribute for a call site argument. + enum Kind : char { + IRP_INVALID, ///< An invalid position. + IRP_FLOAT, ///< A position that is not associated with a spot suitable + ///< for attributes. This could be any value or instruction. + IRP_RETURNED, ///< An attribute for the function return value. + IRP_CALL_SITE_RETURNED, ///< An attribute for a call site return value. + IRP_FUNCTION, ///< An attribute for a function (scope). + IRP_CALL_SITE, ///< An attribute for a call site (function scope). + IRP_ARGUMENT, ///< An attribute for a function argument. + IRP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument. }; /// Default constructor available to create invalid positions implicitly. All /// other positions need to be created explicitly through the appropriate /// static member function. - IRPosition() : AnchorVal(nullptr), KindOrArgNo(IRP_INVALID) { verify(); } + IRPosition() : Enc(nullptr, ENC_VALUE) { verify(); } /// Create a position describing the value of \p V. static const IRPosition value(const Value &V) { @@ -198,7 +193,7 @@ struct IRPosition { /// Create a position describing the argument \p Arg. static const IRPosition argument(const Argument &Arg) { - return IRPosition(const_cast(Arg), Kind(Arg.getArgNo())); + return IRPosition(const_cast(Arg), IRP_ARGUMENT); } /// Create a position describing the function scope of \p CB. @@ -214,7 +209,8 @@ struct IRPosition { /// Create a position describing the argument of \p CB at position \p ArgNo. static const IRPosition callsite_argument(const CallBase &CB, unsigned ArgNo) { - return IRPosition(const_cast(CB), Kind(ArgNo)); + return IRPosition(const_cast(CB.getArgOperandUse(ArgNo)), + IRP_CALL_SITE_ARGUMENT); } /// Create a position describing the argument of \p ACS at position \p ArgNo. @@ -242,9 +238,7 @@ struct IRPosition { return IRPosition::function(*IRP.getAssociatedFunction()); } - bool operator==(const IRPosition &RHS) const { - return (AnchorVal == RHS.AnchorVal) && (KindOrArgNo == RHS.KindOrArgNo); - } + bool operator==(const IRPosition &RHS) const { return Enc == RHS.Enc; } bool operator!=(const IRPosition &RHS) const { return !(*this == RHS); } /// Return the value this abstract attribute is anchored with. @@ -254,16 +248,21 @@ struct IRPosition { /// far, only the case for call site arguments as the value is not sufficient /// to pinpoint them. Instead, we can use the call site as an anchor. Value &getAnchorValue() const { - assert(KindOrArgNo != IRP_INVALID && - "Invalid position does not have an anchor value!"); - return *AnchorVal; + switch (getEncodingBits()) { + case ENC_VALUE: + case ENC_RETURNED_VALUE: + case ENC_FLOATING_FUNCTION: + return *getAsValuePtr(); + case ENC_CALL_SITE_ARGUMENT_USE: + return *(getAsUsePtr()->getUser()); + default: + llvm_unreachable("Unkown encoding!"); + }; } /// Return the associated function, if any. Function *getAssociatedFunction() const { - assert(KindOrArgNo != IRP_INVALID && - "Invalid position does not have an anchor scope!"); - if (auto *CB = dyn_cast(AnchorVal)) + if (auto *CB = dyn_cast(&getAnchorValue())) return CB->getCalledFunction(); return getAnchorScope(); } @@ -312,18 +311,14 @@ struct IRPosition { /// Return the value this abstract attribute is associated with. Value &getAssociatedValue() const { - assert(KindOrArgNo != IRP_INVALID && - "Invalid position does not have an associated value!"); - if (getArgNo() < 0 || isa(AnchorVal)) - return *AnchorVal; - assert(isa(AnchorVal) && "Expected a call base!"); - return *cast(AnchorVal)->getArgOperand(getArgNo()); + if (getArgNo() < 0 || isa(&getAnchorValue())) + return getAnchorValue(); + assert(isa(&getAnchorValue()) && "Expected a call base!"); + return *cast(&getAnchorValue())->getArgOperand(getArgNo()); } /// Return the type this abstract attribute is associated with. Type *getAssociatedType() const { - assert(KindOrArgNo != IRP_INVALID && - "Invalid position does not have an associated type!"); if (getPositionKind() == IRPosition::IRP_RETURNED) return getAssociatedFunction()->getReturnType(); return getAssociatedValue().getType(); @@ -331,7 +326,18 @@ struct IRPosition { /// Return the argument number of the associated value if it is an argument or /// call site argument, otherwise a negative value. - int getArgNo() const { return KindOrArgNo; } + int getArgNo() const { + switch (getPositionKind()) { + case IRPosition::IRP_ARGUMENT: + return cast(getAsValuePtr())->getArgNo(); + case IRPosition::IRP_CALL_SITE_ARGUMENT: { + Use &U = *getAsUsePtr(); + return cast(U.getUser())->getArgOperandNo(&U); + } + default: + return -1; + } + } /// Return the index in the attribute list for this position. unsigned getAttrIdx() const { @@ -347,7 +353,7 @@ struct IRPosition { return AttributeList::ReturnIndex; case IRPosition::IRP_ARGUMENT: case IRPosition::IRP_CALL_SITE_ARGUMENT: - return KindOrArgNo + AttributeList::FirstArgIndex; + return getArgNo() + AttributeList::FirstArgIndex; } llvm_unreachable( "There is no attribute index for a floating or invalid position!"); @@ -355,19 +361,23 @@ struct IRPosition { /// Return the associated position kind. Kind getPositionKind() const { - if (getArgNo() >= 0) { - assert(((isa(getAnchorValue()) && - isa(getAssociatedValue())) || - isa(getAnchorValue())) && - "Expected argument or call base due to argument number!"); - if (isa(getAnchorValue())) - return IRP_CALL_SITE_ARGUMENT; + char EncodingBits = getEncodingBits(); + if (EncodingBits == ENC_CALL_SITE_ARGUMENT_USE) + return IRP_CALL_SITE_ARGUMENT; + if (EncodingBits == ENC_FLOATING_FUNCTION) + return IRP_FLOAT; + + Value *V = getAsValuePtr(); + if (!V) + return IRP_INVALID; + if (isa(V)) return IRP_ARGUMENT; - } - - assert(KindOrArgNo < 0 && - "Expected (call site) arguments to never reach this point!"); - return Kind(KindOrArgNo); + if (isa(V)) + return isReturnPosition(EncodingBits) ? IRP_RETURNED : IRP_FUNCTION; + if (isa(V)) + return isReturnPosition(EncodingBits) ? IRP_CALL_SITE_RETURNED + : IRP_CALL_SITE; + return IRP_FLOAT; } /// TODO: Figure out if the attribute related helper functions should live @@ -435,14 +445,52 @@ struct IRPosition { static const IRPosition TombstoneKey; ///} + /// Conversion into a void * to allow reuse of pointer hashing. + operator void *() const { return Enc.getOpaqueValue(); } + private: /// Private constructor for special values only! - explicit IRPosition(int KindOrArgNo) - : AnchorVal(0), KindOrArgNo(KindOrArgNo) {} + explicit IRPosition(void *Ptr) { Enc.setFromOpaqueValue(Ptr); } /// IRPosition anchored at \p AnchorVal with kind/argument numbet \p PK. - explicit IRPosition(Value &AnchorVal, Kind PK) - : AnchorVal(&AnchorVal), KindOrArgNo(PK) { + explicit IRPosition(Value &AnchorVal, Kind PK) { + switch (PK) { + case IRPosition::IRP_INVALID: + llvm_unreachable("Cannot create invalid IRP with an anchor value!"); + break; + case IRPosition::IRP_FLOAT: + // Special case for floating functions. + if (isa(AnchorVal)) + Enc = {&AnchorVal, ENC_FLOATING_FUNCTION}; + else + Enc = {&AnchorVal, ENC_VALUE}; + break; + case IRPosition::IRP_FUNCTION: + case IRPosition::IRP_CALL_SITE: + Enc = {&AnchorVal, ENC_VALUE}; + break; + case IRPosition::IRP_RETURNED: + case IRPosition::IRP_CALL_SITE_RETURNED: + Enc = {&AnchorVal, ENC_RETURNED_VALUE}; + break; + case IRPosition::IRP_ARGUMENT: + Enc = {&AnchorVal, ENC_VALUE}; + break; + case IRPosition::IRP_CALL_SITE_ARGUMENT: + llvm_unreachable( + "Cannot create call site argument IRP with an anchor value!"); + break; + } + verify(); + } + + /// IRPosition for the use \p U. The position kind \p PK needs to be + /// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value + /// the used value. + explicit IRPosition(Use &U, Kind PK) { + assert(PK == IRP_CALL_SITE_ARGUMENT && + "Use constructor is for call site arguments only!"); + Enc = {&U, ENC_CALL_SITE_ARGUMENT_USE}; verify(); } @@ -459,30 +507,65 @@ struct IRPosition { SmallVectorImpl &Attrs, Attributor &A) const; -protected: - /// The value this position is anchored at. - Value *AnchorVal; - - /// If AnchorVal is Argument or CallBase then this number should be - /// non-negative and it denotes the argument or call site argument index - /// respectively. Otherwise, it denotes the kind of this IRPosition according - /// to Kind above. - int KindOrArgNo; + /// Return the underlying pointer as Value *, valid for all positions but + /// IRP_CALL_SITE_ARGUMENT. + Value *getAsValuePtr() const { + assert(getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE && + "Not a value pointer!"); + return reinterpret_cast(Enc.getPointer()); + } + + /// Return the underlying pointer as Use *, valid only for + /// IRP_CALL_SITE_ARGUMENT positions. + Use *getAsUsePtr() const { + assert(getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE && + "Not a value pointer!"); + return reinterpret_cast(Enc.getPointer()); + } + + /// Return true if \p EncodingBits describe a returned or call site returned + /// position. + static bool isReturnPosition(char EncodingBits) { + return EncodingBits == ENC_RETURNED_VALUE; + } + + /// Return true if the encoding bits describe a returned or call site returned + /// position. + bool isReturnPosition() const { return isReturnPosition(getEncodingBits()); } + + /// The encoding of the IRPosition is a combination of a pointer and two + /// encoding bits. The values of the encoding bits are defined in the enum + /// below. The pointer is either a Value* (for the first three encoding bit + /// combinations) or Use* (for ENC_CALL_SITE_ARGUMENT_USE). + /// + ///{ + enum { + ENC_VALUE = 0b00, + ENC_RETURNED_VALUE = 0b01, + ENC_FLOATING_FUNCTION = 0b10, + ENC_CALL_SITE_ARGUMENT_USE = 0b11, + }; + + // Reserve the maximal amount of bits so there is no need to mask out the + // remaining ones. We will not encode anything else in the pointer anyway. + static constexpr int NumEncodingBits = + PointerLikeTypeTraits::NumLowBitsAvailable; + static_assert(NumEncodingBits >= 2, "At least two bits are required!"); + + /// The pointer with the encoding bits. + PointerIntPair Enc; + ///} + + /// Return the encoding bits. + char getEncodingBits() const { return Enc.getInt(); } }; /// Helper that allows IRPosition as a key in a DenseMap. -template <> struct DenseMapInfo { +template <> struct DenseMapInfo : DenseMapInfo { static inline IRPosition getEmptyKey() { return IRPosition::EmptyKey; } static inline IRPosition getTombstoneKey() { return IRPosition::TombstoneKey; } - static unsigned getHashValue(const IRPosition &IRP) { - return (DenseMapInfo::getHashValue(&IRP.getAnchorValue()) << 4) ^ - (unsigned(IRP.getArgNo())); - } - static bool isEqual(const IRPosition &LHS, const IRPosition &RHS) { - return LHS == RHS; - } }; /// A visitor class for IR positions. @@ -802,12 +885,10 @@ struct Attributor { // Put the attribute in the lookup map structure and the container we use to // keep track of all attributes. const IRPosition &IRP = AA.getIRPosition(); - Kind2AAMapTy *&Kind2AA = AAMap[IRP]; - if (!Kind2AA) - Kind2AA = new (Allocator) Kind2AAMapTy(); + AbstractAttribute *&AAPtr = AAMap[{&AAType::ID, IRP}]; - assert(!(*Kind2AA)[&AAType::ID] && "Attribute already in map!"); - (*Kind2AA)[&AAType::ID] = &AA; + assert(!AAPtr && "Attribute already in map!"); + AAPtr = &AA; AllAbstractAttributes.push_back(&AA); return AA; @@ -1112,6 +1193,14 @@ struct Attributor { BumpPtrAllocator &Allocator; private: + /// Run `::update` on \p AA and track the dependences queried while doing so. + /// Also adjust the state if we know further updates are not necessary. + ChangeStatus updateAA(AbstractAttribute &AA); + + /// Remember the dependences on the top of the dependence stack such that they + /// may trigger further updates. (\see DependenceStack) + void rememberDependences(); + /// Check \p Pred on all call sites of \p Fn. /// /// This method will evaluate \p Pred on call sites and return @@ -1165,7 +1254,7 @@ struct Attributor { return AA; } - AA.update(*this); + updateAA(AA); if (TrackDependence && AA.getState().isValidState()) recordDependence(AA, const_cast(*QueryingAA), @@ -1187,13 +1276,11 @@ struct Attributor { // Lookup the abstract attribute of type AAType. If found, return it after // registering a dependence of QueryingAA on the one returned attribute. - Kind2AAMapTy *Kind2AA = AAMap.lookup(IRP); - if (!Kind2AA) + AbstractAttribute *AAPtr = AAMap.lookup({&AAType::ID, IRP}); + if (!AAPtr) return nullptr; - AAType *AA = static_cast((*Kind2AA)[&AAType::ID]); - if (!AA) - return nullptr; + AAType *AA = static_cast(AAPtr); // Do not register a dependence on an attribute with an invalid state. if (TrackDependence && AA->getState().isValidState()) @@ -1218,9 +1305,8 @@ struct Attributor { /// on the outer level, and the addresses of the static member (AAType::ID) on /// the inner level. ///{ - using Kind2AAMapTy = - SmallDenseMap; - DenseMap AAMap; + using AAMapKeyTy = std::pair; + DenseMap AAMap; ///} /// A map from abstract attributes to the ones that queried them through calls @@ -1246,7 +1332,7 @@ struct Attributor { ///} /// Map to remember all requested signature changes (= argument replacements). - DenseMap> + DenseMap, 8>> ArgumentReplacementMap; /// The set of functions we are deriving attributes for. @@ -1262,8 +1348,23 @@ struct Attributor { /// impact the call graph. SmallPtrSet CGModifiedFunctions; - /// Set if the attribute currently updated did query a non-fix attribute. - bool QueriedNonFixAA; + /// Information about a dependence. If FromAA is changed ToAA needs to be + /// updated as well. + struct DepInfo { + const AbstractAttribute *FromAA; + const AbstractAttribute *ToAA; + DepClassTy DepClass; + }; + + /// The dependence stack is used to track dependences during an + /// `AbstractAttribute::update` call. As `AbstractAttribute::update` can be + /// recursive we might have multiple vectors of dependences in here. The stack + /// size, should be adjusted according to the expected recursion depth and the + /// inner dependence vector size to the expected number of dependences per + /// abstract attribute. Since the inner vectors are actually allocated on the + /// stack we can be generous with their size. + using DependenceVector = SmallVector; + SmallVector DependenceStack; /// If not null, a set limiting the attribute opportunities. const DenseSet *Whitelist; @@ -1754,11 +1855,14 @@ struct IRAttributeManifest { }; /// Helper to tie a abstract state implementation to an abstract attribute. -template -struct StateWrapper : public StateTy, public Base { +template +struct StateWrapper : public BaseType, public StateTy { /// Provide static access to the type of the state. using StateType = StateTy; + StateWrapper(const IRPosition &IRP, Ts... Args) + : BaseType(IRP), StateTy(Args...) {} + /// See AbstractAttribute::getState(...). StateType &getState() override { return *this; } @@ -1767,16 +1871,16 @@ struct StateWrapper : public StateTy, public Base { }; /// Helper class that provides common functionality to manifest IR attributes. -template -struct IRAttribute : public IRPosition, public Base { - IRAttribute(const IRPosition &IRP) : IRPosition(IRP) {} - ~IRAttribute() {} +template +struct IRAttribute : public BaseType { + IRAttribute(const IRPosition &IRP) : BaseType(IRP) {} /// See AbstractAttribute::initialize(...). virtual void initialize(Attributor &A) override { const IRPosition &IRP = this->getIRPosition(); if (isa(IRP.getAssociatedValue()) || - hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ false, &A)) { + this->hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ false, + &A)) { this->getState().indicateOptimisticFixpoint(); return; } @@ -1796,11 +1900,12 @@ struct IRAttribute : public IRPosition, public Base { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { - if (isa(getIRPosition().getAssociatedValue())) + if (isa(this->getIRPosition().getAssociatedValue())) return ChangeStatus::UNCHANGED; SmallVector DeducedAttrs; - getDeducedAttributes(getAnchorValue().getContext(), DeducedAttrs); - return IRAttributeManifest::manifestAttrs(A, getIRPosition(), DeducedAttrs); + getDeducedAttributes(this->getAnchorValue().getContext(), DeducedAttrs); + return IRAttributeManifest::manifestAttrs(A, this->getIRPosition(), + DeducedAttrs); } /// Return the kind that identifies the abstract attribute implementation. @@ -1811,9 +1916,6 @@ struct IRAttribute : public IRPosition, public Base { SmallVectorImpl &Attrs) const { Attrs.emplace_back(Attribute::get(Ctx, getAttrKind())); } - - /// Return an IR position, see struct IRPosition. - const IRPosition &getIRPosition() const override { return *this; } }; /// Base struct for all "concrete attribute" deductions. @@ -1859,9 +1961,11 @@ struct IRAttribute : public IRPosition, public Base { /// both directions will be added in the future. /// NOTE: The mechanics of adding a new "concrete" abstract attribute are /// described in the file comment. -struct AbstractAttribute { +struct AbstractAttribute : public IRPosition { using StateType = AbstractState; + AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {} + /// Virtual destructor. virtual ~AbstractAttribute() {} @@ -1880,7 +1984,8 @@ struct AbstractAttribute { virtual const StateType &getState() const = 0; /// Return an IR position, see struct IRPosition. - virtual const IRPosition &getIRPosition() const = 0; + const IRPosition &getIRPosition() const { return *this; }; + IRPosition &getIRPosition() { return *this; }; /// Helper functions, for debug purposes only. ///{ @@ -2096,9 +2201,9 @@ struct AAWillReturn /// An abstract attribute for undefined behavior. struct AAUndefinedBehavior - : public StateWrapper, - public IRPosition { - AAUndefinedBehavior(const IRPosition &IRP, Attributor &A) : IRPosition(IRP) {} + : public StateWrapper { + using Base = StateWrapper; + AAUndefinedBehavior(const IRPosition &IRP, Attributor &A) : Base(IRP) {} /// Return true if "undefined behavior" is assumed. bool isAssumedToCauseUB() const { return getAssumed(); } @@ -2112,9 +2217,6 @@ struct AAUndefinedBehavior /// Return true if "undefined behavior" is known for a specific instruction. virtual bool isKnownToCauseUB(Instruction *I) const = 0; - /// Return an IR position, see struct IRPosition. - const IRPosition &getIRPosition() const override { return *this; } - /// Create an abstract attribute view for the position \p IRP. static AAUndefinedBehavior &createForPosition(const IRPosition &IRP, Attributor &A); @@ -2124,9 +2226,9 @@ struct AAUndefinedBehavior }; /// An abstract interface to determine reachability of point A to B. -struct AAReachability : public StateWrapper, - public IRPosition { - AAReachability(const IRPosition &IRP, Attributor &A) : IRPosition(IRP) {} +struct AAReachability : public StateWrapper { + using Base = StateWrapper; + AAReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {} /// Returns true if 'From' instruction is assumed to reach, 'To' instruction. /// Users should provide two positions they are interested in, and the class @@ -2143,9 +2245,6 @@ struct AAReachability : public StateWrapper, return isPotentiallyReachable(From, To); } - /// Return an IR position, see struct IRPosition. - const IRPosition &getIRPosition() const override { return *this; } - /// Create an abstract attribute view for the position \p IRP. static AAReachability &createForPosition(const IRPosition &IRP, Attributor &A); @@ -2212,9 +2311,9 @@ struct AANoReturn }; /// An abstract interface for liveness abstract attribute. -struct AAIsDead : public StateWrapper, - public IRPosition { - AAIsDead(const IRPosition &IRP, Attributor &A) : IRPosition(IRP) {} +struct AAIsDead : public StateWrapper { + using Base = StateWrapper; + AAIsDead(const IRPosition &IRP, Attributor &A) : Base(IRP) {} protected: /// The query functions are protected such that other attributes need to go @@ -2253,9 +2352,6 @@ struct AAIsDead : public StateWrapper, } public: - /// Return an IR position, see struct IRPosition. - const IRPosition &getIRPosition() const override { return *this; } - /// Create an abstract attribute view for the position \p IRP. static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A); @@ -2531,12 +2627,9 @@ struct AANoCapture }; /// An abstract interface for value simplify abstract attribute. -struct AAValueSimplify : public StateWrapper, - public IRPosition { - AAValueSimplify(const IRPosition &IRP, Attributor &A) : IRPosition(IRP) {} - - /// Return an IR position, see struct IRPosition. - const IRPosition &getIRPosition() const { return *this; } +struct AAValueSimplify : public StateWrapper { + using Base = StateWrapper; + AAValueSimplify(const IRPosition &IRP, Attributor &A) : Base(IRP) {} /// Return an assumed simplified value if a single candidate is found. If /// there cannot be one, return original value. If it is not clear yet, return @@ -2551,9 +2644,9 @@ struct AAValueSimplify : public StateWrapper, static const char ID; }; -struct AAHeapToStack : public StateWrapper, - public IRPosition { - AAHeapToStack(const IRPosition &IRP, Attributor &A) : IRPosition(IRP) {} +struct AAHeapToStack : public StateWrapper { + using Base = StateWrapper; + AAHeapToStack(const IRPosition &IRP, Attributor &A) : Base(IRP) {} /// Returns true if HeapToStack conversion is assumed to be possible. bool isAssumedHeapToStack() const { return getAssumed(); } @@ -2561,9 +2654,6 @@ struct AAHeapToStack : public StateWrapper, /// Returns true if HeapToStack conversion is known to be possible. bool isKnownHeapToStack() const { return getKnown(); } - /// Return an IR position, see struct IRPosition. - const IRPosition &getIRPosition() const { return *this; } - /// Create an abstract attribute view for the position \p IRP. static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A); @@ -2581,9 +2671,10 @@ struct AAHeapToStack : public StateWrapper, /// (=nocapture), it is (for now) not written (=readonly & noalias), we know /// what values are necessary to make the private copy look like the original /// one, and the values we need can be loaded (=dereferenceable). -struct AAPrivatizablePtr : public StateWrapper, - public IRPosition { - AAPrivatizablePtr(const IRPosition &IRP, Attributor &A) : IRPosition(IRP) {} +struct AAPrivatizablePtr + : public StateWrapper { + using Base = StateWrapper; + AAPrivatizablePtr(const IRPosition &IRP, Attributor &A) : Base(IRP) {} /// Returns true if pointer privatization is assumed to be possible. bool isAssumedPrivatizablePtr() const { return getAssumed(); } @@ -2595,13 +2686,6 @@ struct AAPrivatizablePtr : public StateWrapper, /// value. None means it is not clear yet, nullptr means there is none. virtual Optional getPrivatizableType() const = 0; - /// Return an IR position, see struct IRPosition. - /// - ///{ - IRPosition &getIRPosition() { return *this; } - const IRPosition &getIRPosition() const { return *this; } - ///} - /// Create an abstract attribute view for the position \p IRP. static AAPrivatizablePtr &createForPosition(const IRPosition &IRP, Attributor &A); @@ -2820,15 +2904,11 @@ struct AAMemoryLocation }; /// An abstract interface for range value analysis. -struct AAValueConstantRange : public IntegerRangeState, - public AbstractAttribute, - public IRPosition { +struct AAValueConstantRange + : public StateWrapper { + using Base = StateWrapper; AAValueConstantRange(const IRPosition &IRP, Attributor &A) - : IntegerRangeState(IRP.getAssociatedType()->getIntegerBitWidth()), - IRPosition(IRP) {} - - /// Return an IR position, see struct IRPosition. - const IRPosition &getIRPosition() const override { return *this; } + : Base(IRP, IRP.getAssociatedType()->getIntegerBitWidth()) {} /// See AbstractAttribute::getState(...). IntegerRangeState &getState() override { return *this; } diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h index 99cafba4f0a8e..584c65403b1ba 100644 --- a/llvm/include/llvm/Transforms/Instrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation.h @@ -28,6 +28,7 @@ class FunctionPass; class ModulePass; class OptimizationRemarkEmitter; class Comdat; +class CallBase; /// Instrumentation passes often insert conditional checks into entry blocks. /// Call this function before splitting the entry block to move instructions @@ -106,7 +107,7 @@ FunctionPass *createPGOMemOPSizeOptLegacyPass(); // generic utilities. namespace pgo { -// Helper function that transforms Inst (either an indirect-call instruction, or +// Helper function that transforms CB (either an indirect-call instruction, or // an invoke instruction , to a conditional call to F. This is like: // if (Inst.CalledValue == F) // F(...); @@ -119,10 +120,9 @@ namespace pgo { // If \p AttachProfToDirectCall is true, a prof metadata is attached to the // new direct call to contain \p Count. // Returns the promoted direct call instruction. -Instruction *promoteIndirectCall(Instruction *Inst, Function *F, uint64_t Count, - uint64_t TotalCount, - bool AttachProfToDirectCall, - OptimizationRemarkEmitter *ORE); +CallBase &promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, + uint64_t TotalCount, bool AttachProfToDirectCall, + OptimizationRemarkEmitter *ORE); } // namespace pgo /// Options for the frontend instrumentation based profiling pass. diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h new file mode 100644 index 0000000000000..ef33fa2147d13 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h @@ -0,0 +1,49 @@ +//===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares common infrastructure for AddressSanitizer and +// HWAddressSanitizer. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_ADDRESSSANITIZERCOMMON_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_ADDRESSSANITIZERCOMMON_H + +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Module.h" + +namespace llvm { + +class InterestingMemoryOperand { +public: + Use *PtrUse; + bool IsWrite; + Type *OpType; + uint64_t TypeSize; + unsigned Alignment; + // The mask Value, if we're looking at a masked load/store. + Value *MaybeMask; + + InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite, + class Type *OpType, unsigned Alignment, + Value *MaybeMask = nullptr) + : IsWrite(IsWrite), OpType(OpType), Alignment(Alignment), + MaybeMask(MaybeMask) { + const DataLayout &DL = I->getModule()->getDataLayout(); + TypeSize = DL.getTypeStoreSizeInBits(OpType); + PtrUse = &I->getOperandUse(OperandNo); + } + + Instruction *getInsn() { return cast(PtrUse->getUser()); } + + Value *getPtr() { return PtrUse->get(); } +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h index 6935501923694..daa88981d3bf6 100644 --- a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h +++ b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h @@ -27,7 +27,7 @@ class MDNode; /// match exactly, they must at least be bitcast compatible. If \p FailureReason /// is non-null and the indirect call cannot be promoted, the failure reason /// will be stored in it. -bool isLegalToPromote(CallBase &CB, Function *Callee, +bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason = nullptr); /// Promote the given indirect call site to unconditionally call \p Callee. diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h index 4e2571b1d0b6f..e808a50b320fb 100644 --- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h @@ -332,7 +332,7 @@ class FunctionComparator { int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const; int cmpAttrs(const AttributeList L, const AttributeList R) const; int cmpRangeMetadata(const MDNode *L, const MDNode *R) const; - int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const; + int cmpOperandBundlesSchema(const CallBase &LCS, const CallBase &RCS) const; /// Compare two GEPs for equivalent pointer arithmetic. /// Parts to be compared for each comparison stage, diff --git a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h index 355c4d7dc6d87..1efdcc65b39a8 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h +++ b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h @@ -15,7 +15,6 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H #define LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H -#include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -26,6 +25,12 @@ class Loop; class LoopAccessInfo; class LoopInfo; class ScalarEvolution; +struct RuntimeCheckingPtrGroup; +typedef std::pair + RuntimePointerCheck; + +template class ArrayRef; /// This class emits a version of the loop where run-time checks ensure /// that may-alias pointers can't overlap. @@ -71,8 +76,7 @@ class LoopVersioning { Loop *getNonVersionedLoop() { return NonVersionedLoop; } /// Sets the runtime alias checks for versioning the loop. - void setAliasChecks( - SmallVector Checks); + void setAliasChecks(ArrayRef Checks); /// Sets the runtime SCEV checks for versioning the loop. void setSCEVChecks(SCEVUnionPredicate Check); @@ -122,22 +126,20 @@ class LoopVersioning { ValueToValueMapTy VMap; /// The set of alias checks that we are versioning for. - SmallVector AliasChecks; + SmallVector AliasChecks; /// The set of SCEV checks that we are versioning for. SCEVUnionPredicate Preds; /// Maps a pointer to the pointer checking group that the pointer /// belongs to. - DenseMap - PtrToGroup; + DenseMap PtrToGroup; /// The alias scope corresponding to a pointer checking group. - DenseMap - GroupToScope; + DenseMap GroupToScope; /// The list of alias scopes that a pointer checking group can't alias. - DenseMap + DenseMap GroupToNonAliasingScopeList; /// Analyses used. diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h index d4a2d8edb4597..08d963475f23f 100644 --- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h +++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h @@ -23,6 +23,7 @@ extern llvm::cl::opt PGSOIRPassOrTestOnly; extern llvm::cl::opt PGSOColdCodeOnly; extern llvm::cl::opt PGSOColdCodeOnlyForInstrPGO; extern llvm::cl::opt PGSOColdCodeOnlyForSamplePGO; +extern llvm::cl::opt PGSOColdCodeOnlyForPartialSamplePGO; extern llvm::cl::opt ForcePGSO; extern llvm::cl::opt PgsoCutoffInstrProf; extern llvm::cl::opt PgsoCutoffSampleProf; @@ -39,6 +40,16 @@ enum class PGSOQueryType { Other, // Others. }; +static inline bool isPGSOColdCodeOnly(ProfileSummaryInfo *PSI) { + return PGSOColdCodeOnly || + (PSI->hasInstrumentationProfile() && PGSOColdCodeOnlyForInstrPGO) || + (PSI->hasSampleProfile() && + ((!PSI->hasPartialSampleProfile() && PGSOColdCodeOnlyForSamplePGO) || + (PSI->hasPartialSampleProfile() && + PGSOColdCodeOnlyForPartialSamplePGO))) || + (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize()); +} + template bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI, BFIT *BFI, PGSOQueryType QueryType) { @@ -54,13 +65,8 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI, if (PGSOIRPassOrTestOnly && !(QueryType == PGSOQueryType::IRPass || QueryType == PGSOQueryType::Test)) return false; - if (PGSOColdCodeOnly || - (PSI->hasInstrumentationProfile() && PGSOColdCodeOnlyForInstrPGO) || - (PSI->hasSampleProfile() && PGSOColdCodeOnlyForSamplePGO) || - (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) { - // Even if the working set size isn't large, size-optimize cold code. + if (isPGSOColdCodeOnly(PSI)) return AdapterT::isFunctionColdInCallGraph(F, PSI, *BFI); - } if (PSI->hasSampleProfile()) // The "isCold" check seems to work better for Sample PGO as it could have // many profile-unannotated functions. @@ -84,13 +90,8 @@ bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryIn if (PGSOIRPassOrTestOnly && !(QueryType == PGSOQueryType::IRPass || QueryType == PGSOQueryType::Test)) return false; - if (PGSOColdCodeOnly || - (PSI->hasInstrumentationProfile() && PGSOColdCodeOnlyForInstrPGO) || - (PSI->hasSampleProfile() && PGSOColdCodeOnlyForSamplePGO) || - (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) { - // Even if the working set size isn't large, size-optimize cold code. + if (isPGSOColdCodeOnly(PSI)) return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI); - } if (PSI->hasSampleProfile()) // The "isCold" check seems to work better for Sample PGO as it could have // many profile-unannotated functions. diff --git a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index 2e44bbd3a8ca5..b1433c579af81 100644 --- a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -114,7 +114,7 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { Stores.insert(&*I); Instruction &Inst = *I; if (auto *Call = dyn_cast(&Inst)) { - Value *Callee = Call->getCalledValue(); + Value *Callee = Call->getCalledOperand(); // Skip actual functions for direct function calls. if (!isa(Callee) && isInterestingPointer(Callee)) Pointers.insert(Callee); diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index ae99d631f6785..d33af83d586a9 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -61,6 +61,7 @@ INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) @@ -935,8 +936,10 @@ BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { auto Prob = BranchProbability::getZero(); bool FoundProb = false; + uint32_t EdgeCount = 0; for (const_succ_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) if (*I == Dst) { + ++EdgeCount; auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex())); if (MapI != Probs.end()) { FoundProb = true; @@ -944,7 +947,7 @@ BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, } } uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src)); - return FoundProb ? Prob : BranchProbability(1, succ_num); + return FoundProb ? Prob : BranchProbability(EdgeCount, succ_num); } /// Set the edge probability for a given edge specified by PredBlock and an @@ -980,7 +983,8 @@ void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { } void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + PostDominatorTree *PDT) { LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. @@ -1008,10 +1012,15 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, LLVM_DEBUG(dbgs() << "\n"); } - std::unique_ptr PDT = - std::make_unique(const_cast(F)); - computePostDominatedByUnreachable(F, PDT.get()); - computePostDominatedByColdCall(F, PDT.get()); + std::unique_ptr PDTPtr; + + if (!PDT) { + PDTPtr = std::make_unique(const_cast(F)); + PDT = PDTPtr.get(); + } + + computePostDominatedByUnreachable(F, PDT); + computePostDominatedByColdCall(F, PDT); // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. @@ -1057,6 +1066,7 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage( AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } @@ -1064,7 +1074,9 @@ bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { const LoopInfo &LI = getAnalysis().getLoopInfo(); const TargetLibraryInfo &TLI = getAnalysis().getTLI(F); - BPI.calculate(F, LI, &TLI); + PostDominatorTree &PDT = + getAnalysis().getPostDomTree(); + BPI.calculate(F, LI, &TLI, &PDT); return false; } @@ -1079,7 +1091,9 @@ AnalysisKey BranchProbabilityAnalysis::Key; BranchProbabilityInfo BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { BranchProbabilityInfo BPI; - BPI.calculate(F, AM.getResult(F), &AM.getResult(F)); + BPI.calculate(F, AM.getResult(F), + &AM.getResult(F), + &AM.getResult(F)); return BPI; } diff --git a/llvm/lib/Analysis/CFGPrinter.cpp b/llvm/lib/Analysis/CFGPrinter.cpp index 8c8e4887ef77f..cf4afc8cfd9cb 100644 --- a/llvm/lib/Analysis/CFGPrinter.cpp +++ b/llvm/lib/Analysis/CFGPrinter.cpp @@ -42,7 +42,7 @@ static cl::opt HideUnreachablePaths("cfg-hide-unreachable-paths", static cl::opt HideDeoptimizePaths("cfg-hide-deoptimize-paths", cl::init(false)); -static cl::opt ShowHeatColors("cfg-heat-colors", cl::init(false), +static cl::opt ShowHeatColors("cfg-heat-colors", cl::init(true), cl::Hidden, cl::desc("Show heat colors in CFG")); diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp index 0e571cad54607..a0b93636a0392 100644 --- a/llvm/lib/Analysis/CodeMetrics.cpp +++ b/llvm/lib/Analysis/CodeMetrics.cpp @@ -172,7 +172,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (InvI->cannotDuplicate()) notDuplicatable = true; - NumInsts += TTI.getUserCost(&I); + NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); } if (isa(BB->getTerminator())) diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index e247d8f48df2d..ce9f030bf379e 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -54,8 +54,8 @@ static cl::opt cl::ZeroOrMore, cl::desc("Default amount of inlining to perform")); -static cl::opt PrintDebugInstructionDeltas("print-instruction-deltas", - cl::Hidden, cl::init(false), +static cl::opt PrintDebugInstructionDeltas( + "print-instruction-deltas", cl::Hidden, cl::init(false), cl::desc("Prints deltas of cost and threshold per instruction")); static cl::opt InlineThreshold( @@ -132,10 +132,10 @@ class CostAnnotationWriter : public AssemblyAnnotationWriter { public: // This DenseMap stores the delta change in cost and threshold after // accounting for the given instruction. - DenseMap CostThresholdMap; + DenseMap CostThresholdMap; virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS); + formatted_raw_ostream &OS); }; class CallAnalyzer : public InstVisitor { @@ -427,6 +427,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { /// Attempt to evaluate indirect calls to boost its inline cost. const bool BoostIndirectCalls; + /// Ignore the threshold when finalizing analysis. + const bool IgnoreThreshold; + /// Inlining cost measured in abstract units, accounts for all the /// instructions expected to be executed for a given function invocation. /// Instructions that are statically proven to be dead based on call-site @@ -587,7 +590,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { // This function is called to store the initial cost of inlining before // the given instruction was assessed. if (!PrintDebugInstructionDeltas) - return ; + return; Writer.CostThresholdMap[I].CostBefore = Cost; Writer.CostThresholdMap[I].ThresholdBefore = Threshold; } @@ -596,7 +599,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { // This function is called to find new values of cost and threshold after // the instruction has been assessed. if (!PrintDebugInstructionDeltas) - return ; + return; Writer.CostThresholdMap[I].CostAfter = Cost; Writer.CostThresholdMap[I].ThresholdAfter = Threshold; } @@ -629,14 +632,14 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { else if (NumVectorInstructions <= NumInstructions / 2) Threshold -= VectorBonus / 2; - if (Cost < std::max(1, Threshold)) + if (IgnoreThreshold || Cost < std::max(1, Threshold)) return InlineResult::success(); return InlineResult::failure("Cost over threshold."); } bool shouldStop() override { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - return Cost >= Threshold && !ComputeFullInlineCost; + return !IgnoreThreshold && Cost >= Threshold && !ComputeFullInlineCost; } void onLoadEliminationOpportunity() override { @@ -694,12 +697,13 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { std::function &GetAssumptionCache, Optional> &GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee, - CallBase &Call, const InlineParams &Params, bool BoostIndirect = true) + CallBase &Call, const InlineParams &Params, bool BoostIndirect = true, + bool IgnoreThreshold = false) : CallAnalyzer(TTI, GetAssumptionCache, GetBFI, PSI, ORE, Callee, Call), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE), Params(Params), Threshold(Params.DefaultThreshold), - BoostIndirectCalls(BoostIndirect) {} + BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold) {} /// Annotation Writer for cost annotation CostAnnotationWriter Writer; @@ -723,22 +727,24 @@ void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) { disableLoadElimination(); } -void CostAnnotationWriter::emitInstructionAnnot( - const Instruction *I, formatted_raw_ostream &OS) { - // The cost of inlining of the given instruction is printed always. - // The threshold delta is printed only when it is non-zero. It happens - // when we decided to give a bonus at a particular instruction. - assert(CostThresholdMap.count(I) > 0 && - "Expected each instruction to have an instruction annotation"); - const auto &Record = CostThresholdMap[I]; - OS << "; cost before = " << Record.CostBefore - << ", cost after = " << Record.CostAfter - << ", threshold before = " << Record.ThresholdBefore - << ", threshold after = " << Record.ThresholdAfter << ", "; - OS << "cost delta = " << Record.getCostDelta(); - if (Record.hasThresholdChanged()) - OS << ", threshold delta = " << Record.getThresholdDelta(); - OS << "\n"; +void CostAnnotationWriter::emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) { + // The cost of inlining of the given instruction is printed always. + // The threshold delta is printed only when it is non-zero. It happens + // when we decided to give a bonus at a particular instruction. + if (CostThresholdMap.count(I) == 0) { + OS << "; No analysis for the instruction\n"; + return; + } + const auto &Record = CostThresholdMap[I]; + OS << "; cost before = " << Record.CostBefore + << ", cost after = " << Record.CostAfter + << ", threshold before = " << Record.ThresholdBefore + << ", threshold after = " << Record.ThresholdAfter << ", "; + OS << "cost delta = " << Record.getCostDelta(); + if (Record.hasThresholdChanged()) + OS << ", threshold delta = " << Record.getThresholdDelta(); + OS << "\n"; } /// If 'V' maps to a SROA candidate, disable SROA for it. @@ -799,7 +805,9 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { Operands.push_back(SimpleOp); else Operands.push_back(*I); - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&GEP, Operands, + TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitAlloca(AllocaInst &I) { @@ -1047,7 +1055,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) SROAArgValues[&I] = SROAArg; - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { @@ -1071,7 +1080,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { if (auto *SROAArg = getSROAArgForValueOrNull(Op)) SROAArgValues[&I] = SROAArg; - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitCastInst(CastInst &I) { @@ -1101,7 +1111,8 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { break; } - return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); + return TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); } bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { @@ -1803,7 +1814,8 @@ bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. - if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) + if (TargetTransformInfo::TCC_Free == + TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency)) return true; // We found something we don't understand or can't handle. Mark any SROA-able @@ -2215,6 +2227,30 @@ InlineCost llvm::getInlineCost( GetAssumptionCache, GetBFI, GetTLI, PSI, ORE); } +Optional llvm::getInliningCostEstimate( + CallBase &Call, TargetTransformInfo &CalleeTTI, + std::function &GetAssumptionCache, + Optional> GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { + const InlineParams Params = {/* DefaultThreshold*/ 0, + /*HintThreshold*/ {}, + /*ColdThreshold*/ {}, + /*OptSizeThreshold*/ {}, + /*OptMinSizeThreshold*/ {}, + /*HotCallSiteThreshold*/ {}, + /*LocallyHotCallSiteThreshold*/ {}, + /*ColdCallSiteThreshold*/ {}, + /* ComputeFullInlineCost*/ true}; + + InlineCostCallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, + *Call.getCalledFunction(), Call, Params, true, + /*IgnoreThreshold*/ true); + auto R = CA.analyze(); + if (!R.isSuccess()) + return None; + return CA.getCost(); +} + Optional llvm::getAttributeBasedInliningDecision( CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref GetTLI) { diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index d1970d45a6867..7de4a0744c299 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4302,9 +4302,9 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx, if (isa(Idx)) return UndefValue::get(Vec->getType()); - // Inserting an undef scalar? Assume it is the same value as the existing - // vector element. - if (isa(Val)) + // If the scalar is undef, and there is no risk of propagating poison from the + // vector value, simplify to the vector value. + if (isa(Val) && isGuaranteedNotToBeUndefOrPoison(Vec)) return Vec; // If we are extracting a value from a vector, then inserting it into the same @@ -5407,7 +5407,7 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { - Value *Callee = Call->getCalledValue(); + Value *Callee = Call->getCalledOperand(); // musttail calls can only be simplified if they are also DCEd. // As we can't guarantee this here, don't simplify them. diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index 338c6a4137e74..30a0846d2af8d 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -220,7 +220,7 @@ void Lint::visitFunction(Function &F) { } void Lint::visitCallBase(CallBase &I) { - Value *Callee = I.getCalledValue(); + Value *Callee = I.getCalledOperand(); visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr, MemRef::Callee); diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 05f6010c1bd1a..cba3558e64aa0 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -174,6 +174,13 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, return OrigSCEV; } +RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( + unsigned Index, RuntimePointerChecking &RtCheck) + : RtCheck(RtCheck), High(RtCheck.Pointers[Index].End), + Low(RtCheck.Pointers[Index].Start) { + Members.push_back(Index); +} + /// Calculate Start and End points of memory access. /// Let's assume A is the first access and B is a memory access on N-th loop /// iteration. Then B is calculated as: @@ -231,14 +238,14 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); } -SmallVector +SmallVector RuntimePointerChecking::generateChecks() const { - SmallVector Checks; + SmallVector Checks; for (unsigned I = 0; I < CheckingGroups.size(); ++I) { for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { - const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I]; - const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J]; + const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I]; + const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J]; if (needsChecking(CGI, CGJ)) Checks.push_back(std::make_pair(&CGI, &CGJ)); @@ -254,8 +261,8 @@ void RuntimePointerChecking::generateChecks( Checks = generateChecks(); } -bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M, - const CheckingPtrGroup &N) const { +bool RuntimePointerChecking::needsChecking( + const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const { for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) if (needsChecking(M.Members[I], N.Members[J])) @@ -277,7 +284,7 @@ static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J, return I; } -bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) { +bool RuntimeCheckingPtrGroup::addPointer(unsigned Index) { const SCEV *Start = RtCheck.Pointers[Index].Start; const SCEV *End = RtCheck.Pointers[Index].End; @@ -352,7 +359,7 @@ void RuntimePointerChecking::groupChecks( // pointers to the same underlying object. if (!UseDependencies) { for (unsigned I = 0; I < Pointers.size(); ++I) - CheckingGroups.push_back(CheckingPtrGroup(I, *this)); + CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this)); return; } @@ -378,7 +385,7 @@ void RuntimePointerChecking::groupChecks( MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue, Pointers[I].IsWritePtr); - SmallVector Groups; + SmallVector Groups; auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access)); // Because DepCands is constructed by visiting accesses in the order in @@ -395,7 +402,7 @@ void RuntimePointerChecking::groupChecks( // Go through all the existing sets and see if we can find one // which can include this pointer. - for (CheckingPtrGroup &Group : Groups) { + for (RuntimeCheckingPtrGroup &Group : Groups) { // Don't perform more than a certain amount of comparisons. // This should limit the cost of grouping the pointers to something // reasonable. If we do end up hitting this threshold, the algorithm @@ -415,7 +422,7 @@ void RuntimePointerChecking::groupChecks( // We couldn't add this pointer to any existing set or the threshold // for the number of comparisons has been reached. Create a new group // to hold the current pointer. - Groups.push_back(CheckingPtrGroup(Pointer, *this)); + Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); } // We've computed the grouped checks for this partition. @@ -451,7 +458,7 @@ bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const { } void RuntimePointerChecking::printChecks( - raw_ostream &OS, const SmallVectorImpl &Checks, + raw_ostream &OS, const SmallVectorImpl &Checks, unsigned Depth) const { unsigned N = 0; for (const auto &Check : Checks) { @@ -2142,10 +2149,10 @@ struct PointerBounds { /// Expand code for the lower and upper bound of the pointer group \p CG /// in \p TheLoop. \return the values for the bounds. -static PointerBounds -expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, - Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, - const RuntimePointerChecking &PtrRtChecking) { +static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, + Loop *TheLoop, Instruction *Loc, + SCEVExpander &Exp, ScalarEvolution *SE, + const RuntimePointerChecking &PtrRtChecking) { Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue; const SCEV *Sc = SE->getSCEV(Ptr); @@ -2181,17 +2188,17 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, /// Turns a collection of checks into a collection of expanded upper and /// lower bounds for both pointers in the check. -static SmallVector, 4> expandBounds( - const SmallVectorImpl &PointerChecks, - Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp, - const RuntimePointerChecking &PtrRtChecking) { +static SmallVector, 4> +expandBounds(const SmallVectorImpl &PointerChecks, Loop *L, + Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp, + const RuntimePointerChecking &PtrRtChecking) { SmallVector, 4> ChecksWithBounds; // Here we're relying on the SCEV Expander's cache to only emit code for the // same bounds once. transform( PointerChecks, std::back_inserter(ChecksWithBounds), - [&](const RuntimePointerChecking::PointerCheck &Check) { + [&](const RuntimePointerCheck &Check) { PointerBounds First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking), Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking); @@ -2203,8 +2210,7 @@ static SmallVector, 4> expandBounds( std::pair LoopAccessInfo::addRuntimeChecks( Instruction *Loc, - const SmallVectorImpl &PointerChecks) - const { + const SmallVectorImpl &PointerChecks) const { const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); auto *SE = PSE->getSE(); SCEVExpander Exp(*SE, DL, "induction"); diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index fc504b7f9c63a..17c78d6aad3f0 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -673,7 +673,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { // No interprocedural analysis is done at the moment. - if (!A.hasByValOrInAllocaAttr()) { + if (!A.hasPassPointeeByValueAttr()) { ++ObjectVisitorArgument; return unknown(); } diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index 77f4125b5d4b8..f2f5fd70f4718 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -167,7 +167,7 @@ class MemoryLocOrCall { if (!IsCall) return Loc == Other.Loc; - if (Call->getCalledValue() != Other.Call->getCalledValue()) + if (Call->getCalledOperand() != Other.Call->getCalledOperand()) return false; return Call->arg_size() == Other.Call->arg_size() && @@ -203,7 +203,7 @@ template <> struct DenseMapInfo { hash_code hash = hash_combine(MLOC.IsCall, DenseMapInfo::getHashValue( - MLOC.getCall()->getCalledValue())); + MLOC.getCall()->getCalledOperand())); for (const Value *Arg : MLOC.getCall()->args()) hash = hash_combine(hash, DenseMapInfo::getHashValue(Arg)); @@ -466,7 +466,8 @@ checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, assert(isa(MA)); Worklist.append( - upward_defs_begin({const_cast(MA), MAP.second}), + upward_defs_begin({const_cast(MA), MAP.second}, + MSSA.getDomTree()), upward_defs_end()); } } @@ -595,8 +596,8 @@ template class ClobberWalker { void addSearches(MemoryPhi *Phi, SmallVectorImpl &PausedSearches, ListIndex PriorNode) { - auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}), - upward_defs_end()); + auto UpwardDefs = make_range( + upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT), upward_defs_end()); for (const MemoryAccessPair &P : UpwardDefs) { PausedSearches.push_back(Paths.size()); Paths.emplace_back(P.second, P.first, PriorNode); diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index b78115bd43f7d..b900a38fdc0af 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -316,7 +316,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm()) HasInlineAsmMaybeReferencingInternal = true; - auto *CalledValue = CB->getCalledValue(); + auto *CalledValue = CB->getCalledOperand(); auto *CalledFunction = CB->getCalledFunction(); if (CalledValue && !CalledFunction) { CalledValue = CalledValue->stripPointerCasts(); @@ -340,7 +340,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, } // We should have named any anonymous globals assert(CalledFunction->hasName()); - auto ScaledCount = PSI->getProfileCount(&I, BFI); + auto ScaledCount = PSI->getProfileCount(*CB, BFI); auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI) : CalleeInfo::HotnessType::Unknown; if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None) diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp index bea973e8a94e3..6e3ff67bdddb9 100644 --- a/llvm/lib/Analysis/MustExecute.cpp +++ b/llvm/lib/Analysis/MustExecute.cpp @@ -357,26 +357,23 @@ ModulePass *llvm::createMustBeExecutedContextPrinter() { bool MustBeExecutedContextPrinter::runOnModule(Module &M) { // We provide non-PM analysis here because the old PM doesn't like to query // function passes from a module pass. - SmallVector PDTs; - SmallVector DTs; - SmallVector LIs; + SmallVector, 8> PDTs; + SmallVector, 8> DTs; + SmallVector, 8> LIs; GetterTy LIGetter = [&](const Function &F) { - DominatorTree *DT = new DominatorTree(const_cast(F)); - LoopInfo *LI = new LoopInfo(*DT); - DTs.push_back(DT); - LIs.push_back(LI); - return LI; + DTs.push_back(std::make_unique(const_cast(F))); + LIs.push_back(std::make_unique(*DTs.back())); + return LIs.back().get(); }; GetterTy DTGetter = [&](const Function &F) { - DominatorTree *DT = new DominatorTree(const_cast(F)); - DTs.push_back(DT); - return DT; + DTs.push_back(std::make_unique(const_cast(F))); + return DTs.back().get(); }; GetterTy PDTGetter = [&](const Function &F) { - PostDominatorTree *PDT = new PostDominatorTree(const_cast(F)); - PDTs.push_back(PDT); - return PDT; + PDTs.push_back( + std::make_unique(const_cast(F))); + return PDTs.back().get(); }; MustBeExecutedContextExplorer Explorer( /* ExploreInterBlock */ true, @@ -392,9 +389,6 @@ bool MustBeExecutedContextPrinter::runOnModule(Module &M) { } } - DeleteContainerPointers(PDTs); - DeleteContainerPointers(LIs); - DeleteContainerPointers(DTs); return false; } diff --git a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp index bcccbb5856231..2cdf7a1772169 100644 --- a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -36,8 +36,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F) LI.analyze(DT); // Then compute BranchProbabilityInfo. - BranchProbabilityInfo BPI; - BPI.calculate(*F, LI); + BranchProbabilityInfo BPI(*F, LI); // Finally compute BFI. OwnedBFI = std::make_unique(*F, BPI, LI); diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 1ef1758d55a41..dd53aa78f40fc 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -101,14 +101,10 @@ bool ProfileSummaryInfo::computeSummary() { return true; } -// FIXME(CallSite): the parameter should be a CallBase. -Optional -ProfileSummaryInfo::getProfileCount(const Instruction *Inst, - BlockFrequencyInfo *BFI, - bool AllowSynthetic) { - if (!Inst) - return None; - assert((isa(Inst) || isa(Inst)) && +Optional ProfileSummaryInfo::getProfileCount(const CallBase &Call, + BlockFrequencyInfo *BFI, + bool AllowSynthetic) { + assert((isa(Call) || isa(Call)) && "We can only get profile count for call/invoke instruction."); if (hasSampleProfile()) { // In sample PGO mode, check if there is a profile metadata on the @@ -116,12 +112,12 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst, // since the sampled entry count may not be accurate. If there is no // annotated on the instruction, return None. uint64_t TotalCount; - if (Inst->extractProfTotalWeight(TotalCount)) + if (Call.extractProfTotalWeight(TotalCount)) return TotalCount; return None; } if (BFI) - return BFI->getBlockProfileCount(Inst->getParent(), AllowSynthetic); + return BFI->getBlockProfileCount(Call.getParent(), AllowSynthetic); return None; } @@ -156,7 +152,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F, for (const auto &BB : *F) for (const auto &I : BB) if (isa(I) || isa(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) + if (auto CallCount = getProfileCount(cast(I), nullptr)) TotalCallCount += CallCount.getValue(); if (isHotCount(TotalCallCount)) return true; @@ -185,7 +181,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, for (const auto &BB : *F) for (const auto &I : BB) if (isa(I) || isa(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) + if (auto CallCount = getProfileCount(cast(I), nullptr)) TotalCallCount += CallCount.getValue(); if (!isColdCount(TotalCallCount)) return false; @@ -214,7 +210,7 @@ bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile( for (const auto &BB : *F) for (const auto &I : BB) if (isa(I) || isa(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) + if (auto CallCount = getProfileCount(cast(I), nullptr)) TotalCallCount += CallCount.getValue(); if (isHot && isHotCountNthPercentile(PercentileCutoff, TotalCallCount)) return true; @@ -388,13 +384,13 @@ bool ProfileSummaryInfo::isColdBlockNthPercentile(int PercentileCutoff, bool ProfileSummaryInfo::isHotCallSite(const CallBase &CB, BlockFrequencyInfo *BFI) { - auto C = getProfileCount(&CB, BFI); + auto C = getProfileCount(CB, BFI); return C && isHotCount(*C); } bool ProfileSummaryInfo::isColdCallSite(const CallBase &CB, BlockFrequencyInfo *BFI) { - auto C = getProfileCount(&CB, BFI); + auto C = getProfileCount(CB, BFI); if (C) return isColdCount(*C); diff --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 5ffdb0b10d447..22619d0f754fb 100644 --- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -2157,6 +2157,9 @@ bool SCEVExpander::isHighCostExpansionHelper( return false; // Assume to be zero-cost. } + TargetTransformInfo::TargetCostKind CostKind = + TargetTransformInfo::TCK_RecipThroughput; + if (auto *CastExpr = dyn_cast(S)) { unsigned Opcode; switch (S->getSCEVType()) { @@ -2174,7 +2177,7 @@ bool SCEVExpander::isHighCostExpansionHelper( } const SCEV *Op = CastExpr->getOperand(); BudgetRemaining -= TTI.getCastInstrCost(Opcode, /*Dst=*/S->getType(), - /*Src=*/Op->getType()); + /*Src=*/Op->getType(), CostKind); Worklist.emplace_back(Op); return false; // Will answer upon next entry into this function. } @@ -2184,7 +2187,8 @@ bool SCEVExpander::isHighCostExpansionHelper( if (auto *SC = dyn_cast(UDivExpr->getRHS())) { if (SC->getAPInt().isPowerOf2()) { BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::LShr, S->getType()); + TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(), + CostKind); // Note that we don't count the cost of RHS, because it is a constant, // and we consider those to be free. But if that changes, we would need // to log2() it first before calling isHighCostExpansionHelper(). @@ -2207,7 +2211,8 @@ bool SCEVExpander::isHighCostExpansionHelper( // Need to count the cost of this UDiv. BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType()); + TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(), + CostKind); Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()}); return false; // Will answer upon next entry into this function. } @@ -2218,8 +2223,10 @@ bool SCEVExpander::isHighCostExpansionHelper( assert(NAry->getNumOperands() >= 2 && "Polynomial should be at least linear"); - int AddCost = TTI.getArithmeticInstrCost(Instruction::Add, OpType); - int MulCost = TTI.getArithmeticInstrCost(Instruction::Mul, OpType); + int AddCost = + TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); + int MulCost = + TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); // In this polynominal, we may have some zero operands, and we shouldn't // really charge for those. So how many non-zero coeffients are there? @@ -2273,22 +2280,26 @@ bool SCEVExpander::isHighCostExpansionHelper( int PairCost; switch (S->getSCEVType()) { case scAddExpr: - PairCost = TTI.getArithmeticInstrCost(Instruction::Add, OpType); + PairCost = + TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); break; case scMulExpr: // TODO: this is a very pessimistic cost modelling for Mul, // because of Bin Pow algorithm actually used by the expander, // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). - PairCost = TTI.getArithmeticInstrCost(Instruction::Mul, OpType); + PairCost = + TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); break; case scSMaxExpr: case scUMaxExpr: case scSMinExpr: case scUMinExpr: PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType, - CmpInst::makeCmpResultType(OpType)) + + CmpInst::makeCmpResultType(OpType), + CostKind) + TTI.getCmpSelInstrCost(Instruction::Select, OpType, - CmpInst::makeCmpResultType(OpType)); + CmpInst::makeCmpResultType(OpType), + CostKind); break; default: llvm_unreachable("There are no other variants here."); diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 1e06fd8fe3a06..716027a3413c8 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -353,7 +353,7 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { // Do not follow aliases, otherwise we could inadvertently follow // dso_preemptable aliases or aliases with interposable linkage. const GlobalValue *Callee = - dyn_cast(CB.getCalledValue()->stripPointerCasts()); + dyn_cast(CB.getCalledOperand()->stripPointerCasts()); if (!Callee) { US.updateRange(UnknownRange); return false; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 92862fef59346..95b17aa702d08 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -154,8 +154,9 @@ int TargetTransformInfo::getInlinerVectorBonusPercent() const { } int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) const { - return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); + ArrayRef Operands, + TTI::TargetCostKind CostKind) const { + return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind); } int TargetTransformInfo::getExtCost(const Instruction *I, @@ -165,8 +166,9 @@ int TargetTransformInfo::getExtCost(const Instruction *I, int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments, - const User *U) const { - int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U); + const User *U, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -178,8 +180,9 @@ unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters( } int TargetTransformInfo::getUserCost(const User *U, - ArrayRef Operands) const { - int Cost = TTIImpl->getUserCost(U, Operands); + ArrayRef Operands, + enum TargetCostKind CostKind) const { + int Cost = TTIImpl->getUserCost(U, Operands, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -367,9 +370,11 @@ bool TargetTransformInfo::useColdCCForColdCall(Function &F) const { return TTIImpl->useColdCCForColdCall(F); } -unsigned TargetTransformInfo::getScalarizationOverhead(Type *Ty, bool Insert, - bool Extract) const { - return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract); +unsigned +TargetTransformInfo::getScalarizationOverhead(VectorType *Ty, + const APInt &DemandedElts, + bool Insert, bool Extract) const { + return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); } unsigned TargetTransformInfo::getOperandsScalarizationOverhead( @@ -439,22 +444,27 @@ int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, return Cost; } -int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { - int Cost = TTIImpl->getIntImmCost(Imm, Ty); +int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) const { - int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty); +int +TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) const { - int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty); +int +TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -581,12 +591,14 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) { } int TargetTransformInfo::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) const { int Cost = TTIImpl->getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); + Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, + Args, CxtI); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -599,10 +611,11 @@ int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty, } int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I); + int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -615,18 +628,20 @@ int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst, return Cost; } -int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { - int Cost = TTIImpl->getCFInstrCost(Opcode); +int TargetTransformInfo::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -641,40 +656,45 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const { + unsigned AddressSpace, + TTI::TargetCostKind CostKind) const { int Cost = - TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, - Value *Ptr, bool VariableMask, - unsigned Alignment, - const Instruction *I) const { +int TargetTransformInfo::getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I) const { int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment, I); + Alignment, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, - bool UseMaskForGaps) const { + unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + bool UseMaskForCond, bool UseMaskForGaps) const { int Cost = TTIImpl->getInterleavedMemoryOpCost( - Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, UseMaskForCond, - UseMaskForGaps); + Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind, + UseMaskForCond, UseMaskForGaps); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -683,9 +703,11 @@ int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) const { int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, + I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -693,15 +715,18 @@ int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF, + TTI::TargetCostKind CostKind, const Instruction *I) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, + CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, - ArrayRef Tys) const { - int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys); + ArrayRef Tys, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -726,18 +751,20 @@ int TargetTransformInfo::getMemcpyCost(const Instruction *I) const { int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm) const { - int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, + CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getMinMaxReductionCost(VectorType *Ty, - VectorType *CondTy, - bool IsPairwiseForm, - bool IsUnsigned) const { +int TargetTransformInfo::getMinMaxReductionCost( + VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind) const { int Cost = - TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); + TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, + CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -1150,14 +1177,16 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, } int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + switch (I->getOpcode()) { case Instruction::GetElementPtr: - return getUserCost(I); + return getUserCost(I, CostKind); case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { - return getCFInstrCost(I->getOpcode()); + return getCFInstrCost(I->getOpcode(), CostKind); } case Instruction::Add: case Instruction::FAdd: @@ -1182,7 +1211,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { Op1VK = getOperandInfo(I->getOperand(0), Op1VP); Op2VK = getOperandInfo(I->getOperand(1), Op2VP); SmallVector Operands(I->operand_values()); - return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, + return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind, + Op1VK, Op2VK, Op1VP, Op2VP, Operands, I); } case Instruction::FNeg: { @@ -1192,31 +1222,34 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { Op2VK = OK_AnyValue; Op2VP = OP_None; SmallVector Operands(I->operand_values()); - return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, + return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind, + Op1VK, Op2VK, Op1VP, Op2VP, Operands, I); } case Instruction::Select: { const SelectInst *SI = cast(I); Type *CondTy = SI->getCondition()->getType(); - return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); + return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, + CostKind, I); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); - return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); + return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), + CostKind, I); } case Instruction::Store: { const StoreInst *SI = cast(I); Type *ValTy = SI->getValueOperand()->getType(); return getMemoryOpCost(I->getOpcode(), ValTy, MaybeAlign(SI->getAlignment()), - SI->getPointerAddressSpace(), I); + SI->getPointerAddressSpace(), CostKind, I); } case Instruction::Load: { const LoadInst *LI = cast(I); return getMemoryOpCost(I->getOpcode(), I->getType(), MaybeAlign(LI->getAlignment()), - LI->getPointerAddressSpace(), I); + LI->getPointerAddressSpace(), CostKind, I); } case Instruction::ZExt: case Instruction::SExt: @@ -1232,7 +1265,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { case Instruction::BitCast: case Instruction::AddrSpaceCast: { Type *SrcTy = I->getOperand(0)->getType(); - return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); + return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, CostKind, I); } case Instruction::ExtractElement: { const ExtractElementInst *EEI = cast(I); @@ -1249,7 +1282,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { case RK_Arithmetic: return getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case RK_MinMax: return getMinMaxReductionCost( ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), @@ -1265,7 +1299,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { case RK_Arithmetic: return getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/true); + /*IsPairwiseForm=*/true, CostKind); case RK_MinMax: return getMinMaxReductionCost( ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), @@ -1333,7 +1367,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { FMF = FPMO->getFastMathFlags(); return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, - FMF, 1, II); + FMF, 1, CostKind, II); } return -1; default: diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 1c278d8fc56ec..3d1e627e3df9e 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1454,6 +1454,10 @@ static void computeKnownBitsFromOperator(const Operator *I, gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { + // TrailZ can only become smaller, short-circuit if we hit zero. + if (TrailZ == 0) + break; + Value *Index = I->getOperand(i); if (StructType *STy = GTI.getStructTypeOrNull()) { // Handle struct member offset arithmetic. @@ -2339,7 +2343,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, // A byval, inalloca, or nonnull argument is never null. if (const Argument *A = dyn_cast(V)) - if (A->hasByValOrInAllocaAttr() || A->hasNonNullAttr()) + if (A->hasPassPointeeByValueAttr() || A->hasNonNullAttr()) return true; // A Load tagged with nonnull metadata is never null. diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index f7857f4c6d737..b414b5908c895 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -667,6 +667,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(optforfuzzing); KEYWORD(optnone); KEYWORD(optsize); + KEYWORD(preallocated); KEYWORD(readnone); KEYWORD(readonly); KEYWORD(returned); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index dc7301d9f5316..1948177916e78 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1345,6 +1345,13 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; case lltok::kw_willreturn: B.addAttribute(Attribute::WillReturn); break; case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break; + case lltok::kw_preallocated: { + Type *Ty; + if (ParsePreallocated(Ty)) + return true; + B.addPreallocatedAttr(Ty); + break; + } // Error handling. case lltok::kw_inreg: @@ -1373,7 +1380,9 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, break; } - Lex.Lex(); + // ParsePreallocated() consumes token + if (Token != lltok::kw_preallocated) + Lex.Lex(); } } @@ -1637,6 +1646,13 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { B.addByValAttr(Ty); continue; } + case lltok::kw_preallocated: { + Type *Ty; + if (ParsePreallocated(Ty)) + return true; + B.addPreallocatedAttr(Ty); + continue; + } case lltok::kw_dereferenceable: { uint64_t Bytes; if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes)) @@ -1804,10 +1820,15 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_uwtable: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; - case lltok::kw_readnone: case lltok::kw_readonly: HaveError |= Error(Lex.getLoc(), "invalid use of attribute on return type"); + break; + case lltok::kw_preallocated: + HaveError |= + Error(Lex.getLoc(), + "invalid use of parameter-only/call site-only attribute"); + break; } Lex.Lex(); @@ -2519,6 +2540,21 @@ bool LLParser::ParseByValWithOptionalType(Type *&Result) { return false; } +/// ParsePreallocated +/// ::= preallocated() +bool LLParser::ParsePreallocated(Type *&Result) { + Result = nullptr; + if (!EatIfPresent(lltok::kw_preallocated)) + return true; + if (!EatIfPresent(lltok::lparen)) + return Error(Lex.getLoc(), "expected '('"); + if (ParseType(Result)) + return true; + if (!EatIfPresent(lltok::rparen)) + return Error(Lex.getLoc(), "expected ')'"); + return false; +} + /// ParseOptionalOperandBundles /// ::= /*empty*/ /// ::= '[' OperandBundle [, OperandBundle ]* ']' diff --git a/llvm/lib/AsmParser/LLParser.h b/llvm/lib/AsmParser/LLParser.h index 30b6fc210d4ad..3fb724760e948 100644 --- a/llvm/lib/AsmParser/LLParser.h +++ b/llvm/lib/AsmParser/LLParser.h @@ -338,6 +338,7 @@ namespace llvm { std::vector &FwdRefAttrGrps, bool inAttrGrp, LocTy &BuiltinLoc); bool ParseByValWithOptionalType(Type *&Result); + bool ParsePreallocated(Type *&Result); // Module Summary Index Parsing. bool SkipModuleSummaryEntry(); diff --git a/llvm/lib/AsmParser/LLToken.h b/llvm/lib/AsmParser/LLToken.h index 1d88c2972f3d2..74e313289ebe9 100644 --- a/llvm/lib/AsmParser/LLToken.h +++ b/llvm/lib/AsmParser/LLToken.h @@ -213,6 +213,7 @@ enum Kind { kw_optforfuzzing, kw_optnone, kw_optsize, + kw_preallocated, kw_readnone, kw_readonly, kw_returned, diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 4b8b0ec8a24da..f36dab3c9f746 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1284,25 +1284,10 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) { return 1ULL << 62; case Attribute::NoFree: return 1ULL << 63; - case Attribute::NoSync: - llvm_unreachable("nosync attribute not supported in raw format"); - break; - case Attribute::Dereferenceable: - llvm_unreachable("dereferenceable attribute not supported in raw format"); - break; - case Attribute::DereferenceableOrNull: - llvm_unreachable("dereferenceable_or_null attribute not supported in raw " - "format"); - break; - case Attribute::ArgMemOnly: - llvm_unreachable("argmemonly attribute not supported in raw format"); - break; - case Attribute::AllocSize: - llvm_unreachable("allocsize not supported in raw format"); - break; - case Attribute::SanitizeMemTag: - llvm_unreachable("sanitize_memtag attribute not supported in raw format"); - break; + default: + // Other attributes are not supported in the raw format, + // as we ran out of space. + return 0; } llvm_unreachable("Unsupported attribute type"); } @@ -1312,13 +1297,6 @@ static void addRawAttributeValue(AttrBuilder &B, uint64_t Val) { for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { - if (I == Attribute::SanitizeMemTag || - I == Attribute::Dereferenceable || - I == Attribute::DereferenceableOrNull || - I == Attribute::ArgMemOnly || - I == Attribute::AllocSize || - I == Attribute::NoSync) - continue; if (uint64_t A = (Val & getRawAttributeMask(I))) { if (I == Attribute::Alignment) B.addAlignmentAttr(1ULL << ((A >> 16) - 1)); @@ -1544,6 +1522,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::ImmArg; case bitc::ATTR_KIND_SANITIZE_MEMTAG: return Attribute::SanitizeMemTag; + case bitc::ATTR_KIND_PREALLOCATED: + return Attribute::Preallocated; } } @@ -1659,8 +1639,11 @@ Error BitcodeReader::parseAttributeGroupBlock() { Attribute::AttrKind Kind; if (Error Err = parseAttrKind(Record[++i], &Kind)) return Err; - if (Kind == Attribute::ByVal) + if (Kind == Attribute::ByVal) { B.addByValAttr(HasType ? getTypeByID(Record[++i]) : nullptr); + } else if (Kind == Attribute::Preallocated) { + B.addPreallocatedAttr(getTypeByID(Record[++i])); + } } } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index f1bd4a32c60a5..0a6741d97f74e 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -725,6 +725,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_IMMARG; case Attribute::SanitizeMemTag: return bitc::ATTR_KIND_SANITIZE_MEMTAG; + case Attribute::Preallocated: + return bitc::ATTR_KIND_PREALLOCATED; case Attribute::EndAttrKinds: llvm_unreachable("Can not encode end-attribute kinds marker."); case Attribute::None: @@ -2773,7 +2775,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, case Instruction::Invoke: { const InvokeInst *II = cast(&I); - const Value *Callee = II->getCalledValue(); + const Value *Callee = II->getCalledOperand(); FunctionType *FTy = II->getFunctionType(); if (II->hasOperandBundles()) @@ -2849,7 +2851,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, } case Instruction::CallBr: { const CallBrInst *CBI = cast(&I); - const Value *Callee = CBI->getCalledValue(); + const Value *Callee = CBI->getCalledOperand(); FunctionType *FTy = CBI->getFunctionType(); if (CBI->hasOperandBundles()) @@ -3027,7 +3029,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back(Flags); Vals.push_back(VE.getTypeID(FTy)); - pushValueAndType(CI.getCalledValue(), InstID, Vals); // Callee + pushValueAndType(CI.getCalledOperand(), InstID, Vals); // Callee // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { diff --git a/llvm/lib/Bitstream/Reader/BitstreamReader.cpp b/llvm/lib/Bitstream/Reader/BitstreamReader.cpp index 92d7c91a1d351..2739137c1e446 100644 --- a/llvm/lib/Bitstream/Reader/BitstreamReader.cpp +++ b/llvm/lib/Bitstream/Reader/BitstreamReader.cpp @@ -214,6 +214,7 @@ Expected BitstreamCursor::readRecord(unsigned AbbrevID, if (!MaybeNumElts) return MaybeNumElts.takeError(); uint32_t NumElts = MaybeNumElts.get(); + Vals.reserve(Vals.size() + NumElts); for (unsigned i = 0; i != NumElts; ++i) if (Expected MaybeVal = ReadVBR64(6)) @@ -263,6 +264,7 @@ Expected BitstreamCursor::readRecord(unsigned AbbrevID, if (!MaybeNumElts) return MaybeNumElts.takeError(); uint32_t NumElts = MaybeNumElts.get(); + Vals.reserve(Vals.size() + NumElts); // Get the element encoding. if (i + 2 != e) @@ -334,8 +336,8 @@ Expected BitstreamCursor::readRecord(unsigned AbbrevID, *Blob = StringRef(Ptr, NumElts); } else { // Otherwise, unpack into Vals with zero extension. - for (; NumElts; --NumElts) - Vals.push_back((unsigned char)*Ptr++); + auto *UPtr = reinterpret_cast(Ptr); + Vals.append(UPtr, UPtr + NumElts); } } @@ -458,21 +460,15 @@ BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) { return None; if (!ReadBlockInfoNames) break; // Ignore name. - std::string Name; - for (unsigned i = 0, e = Record.size(); i != e; ++i) - Name += (char)Record[i]; - CurBlockInfo->Name = Name; + CurBlockInfo->Name = std::string(Record.begin(), Record.end()); break; } case bitc::BLOCKINFO_CODE_SETRECORDNAME: { if (!CurBlockInfo) return None; if (!ReadBlockInfoNames) break; // Ignore name. - std::string Name; - for (unsigned i = 1, e = Record.size(); i != e; ++i) - Name += (char)Record[i]; - CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0], - Name)); + CurBlockInfo->RecordNames.emplace_back( + (unsigned)Record[0], std::string(Record.begin() + 1, Record.end())); break; } } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 0d31a9db41545..69ae0494cd9e7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -395,6 +395,9 @@ void AsmPrinter::emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { GlobalValue::LinkageTypes Linkage = GV->getLinkage(); switch (Linkage) { case GlobalValue::CommonLinkage: + assert(!TM.getTargetTriple().isOSBinFormatXCOFF() && + "CommonLinkage of XCOFF should not come to this path."); + LLVM_FALLTHROUGH; case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: case GlobalValue::WeakAnyLinkage: @@ -418,8 +421,10 @@ void AsmPrinter::emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { } return; case GlobalValue::ExternalLinkage: - // If external, declare as a global symbol: .globl _foo - OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global); + if (MAI->hasDotExternDirective() && GV->isDeclaration()) + OutStreamer->emitSymbolAttribute(GVSym, MCSA_Extern); + else + OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global); return; case GlobalValue::PrivateLinkage: return; @@ -427,9 +432,14 @@ void AsmPrinter::emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { if (MAI->hasDotLGloblDirective()) OutStreamer->emitSymbolAttribute(GVSym, MCSA_LGlobal); return; + case GlobalValue::ExternalWeakLinkage: + if (TM.getTargetTriple().isOSBinFormatXCOFF()) { + OutStreamer->emitSymbolAttribute(GVSym, MCSA_Weak); + return; + } + LLVM_FALLTHROUGH; case GlobalValue::AppendingLinkage: case GlobalValue::AvailableExternallyLinkage: - case GlobalValue::ExternalWeakLinkage: llvm_unreachable("Should never emit this"); } llvm_unreachable("Unknown linkage type!"); @@ -1489,15 +1499,30 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit remaining GOT equivalent globals. emitGlobalGOTEquivs(); - // Emit visibility info for declarations + // Emit linkage(XCOFF) and visibility info for declarations for (const Function &F : M) { if (!F.isDeclarationForLinker()) continue; + + MCSymbol *Name = getSymbol(&F); + // Function getSymbol gives us the function descriptor symbol for XCOFF. + if (TM.getTargetTriple().isOSBinFormatXCOFF() && !F.isIntrinsic()) { + + // Get the function entry point symbol. + MCSymbol *FnEntryPointSym = OutContext.getOrCreateSymbol( + "." + cast(Name)->getUnqualifiedName()); + if (cast(FnEntryPointSym)->hasRepresentedCsectSet()) + // Emit linkage for the function entry point. + emitLinkage(&F, FnEntryPointSym); + + // Emit linkage for the function descriptor. + emitLinkage(&F, Name); + } + GlobalValue::VisibilityTypes V = F.getVisibility(); if (V == GlobalValue::DefaultVisibility) continue; - MCSymbol *Name = getSymbol(&F); emitVisibility(Name, V, false); } @@ -2617,9 +2642,10 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // [chunk1][chunk2] ... [chunkN]. // The most significant chunk is chunkN and it should be emitted first. // However, due to the alignment issue chunkN contains useless bits. - // Realign the chunks so that they contain only useless information: + // Realign the chunks so that they contain only useful information: // ExtraBits 0 1 (BitWidth / 64) - 1 // chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN] + ExtraBitsSize = alignTo(ExtraBitsSize, 8); ExtraBits = Realigned.getRawData()[0] & (((uint64_t)-1) >> (64 - ExtraBitsSize)); Realigned.lshrInPlace(ExtraBitsSize); @@ -2640,7 +2666,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // Emit the extra bits after the 64-bits chunks. // Emit a directive that fills the expected size. - uint64_t Size = AP.getDataLayout().getTypeAllocSize(CI->getType()); + uint64_t Size = AP.getDataLayout().getTypeStoreSize(CI->getType()); Size -= (BitWidth / 64) * 8; assert(Size && Size * 8 >= ExtraBitsSize && (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) @@ -2755,20 +2781,22 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, return AP.OutStreamer->emitZeros(Size); if (const ConstantInt *CI = dyn_cast(CV)) { - switch (Size) { - case 1: - case 2: - case 4: - case 8: + const uint64_t StoreSize = DL.getTypeStoreSize(CV->getType()); + + if (StoreSize < 8) { if (AP.isVerbose()) AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", CI->getZExtValue()); - AP.OutStreamer->emitIntValue(CI->getZExtValue(), Size); - return; - default: + AP.OutStreamer->emitIntValue(CI->getZExtValue(), StoreSize); + } else { emitGlobalConstantLargeInt(CI, AP); - return; } + + // Emit tail padding if needed + if (Size != StoreSize) + AP.OutStreamer->emitZeros(Size - StoreSize); + + return; } if (const ConstantFP *CFP = dyn_cast(CV)) diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 3264e07d57c0d..f167cead4e2c5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -3111,7 +3111,9 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { OS.EmitCOFFSectionIndex(GVSym); OS.AddComment("Name"); const unsigned LengthOfDataRecord = 12; - emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord); + emitNullTerminatedSymbolName( + OS, getFullyQualifiedName(DIGV->getScope(), DIGV->getName()), + LengthOfDataRecord); endSymbolRecord(DataEnd); } else { // FIXME: Currently this only emits the global variables in the IR metadata. diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 1def0d2bd85a1..9bec110604d9b 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -570,8 +570,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(AI); if (ValueSize < MinCASSize) { - llvm_unreachable( - "MinCmpXchgSizeInBits not yet supported for LL/SC architectures."); + expandPartwordAtomicRMW(AI, + TargetLoweringBase::AtomicExpansionKind::LLSC); } else { auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { return performAtomicOp(AI->getOperation(), Builder, Loaded, @@ -608,16 +608,43 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { namespace { -/// Result values from createMaskInstrs helper. struct PartwordMaskValues { - Type *WordType; - Type *ValueType; - Value *AlignedAddr; - Value *ShiftAmt; - Value *Mask; - Value *Inv_Mask; + // These three fields are guaranteed to be set by createMaskInstrs. + Type *WordType = nullptr; + Type *ValueType = nullptr; + Value *AlignedAddr = nullptr; + // The remaining fields can be null. + Value *ShiftAmt = nullptr; + Value *Mask = nullptr; + Value *Inv_Mask = nullptr; }; +LLVM_ATTRIBUTE_UNUSED +raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { + auto PrintObj = [&O](auto *V) { + if (V) + O << *V; + else + O << "nullptr"; + O << '\n'; + }; + O << "PartwordMaskValues {\n"; + O << " WordType: "; + PrintObj(PMV.WordType); + O << " ValueType: "; + PrintObj(PMV.ValueType); + O << " AlignedAddr: "; + PrintObj(PMV.AlignedAddr); + O << " ShiftAmt: "; + PrintObj(PMV.ShiftAmt); + O << " Mask: "; + PrintObj(PMV.Mask); + O << " Inv_Mask: "; + PrintObj(PMV.Inv_Mask); + O << "}\n"; + return O; +} + } // end anonymous namespace /// This is a helper function which builds instructions to provide @@ -638,48 +665,74 @@ struct PartwordMaskValues { /// Inv_Mask: The inverse of Mask. static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, - unsigned WordSize) { - PartwordMaskValues Ret; + unsigned MinWordSize) { + PartwordMaskValues PMV; - BasicBlock *BB = I->getParent(); - Function *F = BB->getParent(); Module *M = I->getModule(); - - LLVMContext &Ctx = F->getContext(); + LLVMContext &Ctx = M->getContext(); const DataLayout &DL = M->getDataLayout(); - unsigned ValueSize = DL.getTypeStoreSize(ValueType); - assert(ValueSize < WordSize); + PMV.ValueType = ValueType; + PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) + : ValueType; + if (PMV.ValueType == PMV.WordType) { + PMV.AlignedAddr = Addr; + return PMV; + } - Ret.ValueType = ValueType; - Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8); + assert(ValueSize < MinWordSize); Type *WordPtrType = - Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); + PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); - Ret.AlignedAddr = Builder.CreateIntToPtr( - Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType, + PMV.AlignedAddr = Builder.CreateIntToPtr( + Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, "AlignedAddr"); - Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB"); + Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); if (DL.isLittleEndian()) { // turn bytes into bits - Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3); + PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); } else { // turn bytes into bits, and count from the other side. - Ret.ShiftAmt = - Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3); + PMV.ShiftAmt = Builder.CreateShl( + Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); } - Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt"); - Ret.Mask = Builder.CreateShl( - ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt, + PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); + PMV.Mask = Builder.CreateShl( + ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, "Mask"); - Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask"); + PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); + return PMV; +} + +static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, + const PartwordMaskValues &PMV) { + assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); + if (PMV.WordType == PMV.ValueType) + return WideWord; - return Ret; + Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); + Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted"); + return Trunc; +} + +static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, + Value *Updated, const PartwordMaskValues &PMV) { + assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); + assert(Updated->getType() == PMV.ValueType && "Value type mismatch"); + if (PMV.WordType == PMV.ValueType) + return Updated; + + Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); + Value *Shift = + Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); + Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); + Value *Or = Builder.CreateOr(And, Shift, "inserted"); + return Or; } /// Emit IR to implement a masked version of a given atomicrmw @@ -719,13 +772,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, // Finally, comparison ops will operate on the full value, so // truncate down to the original size, and expand out again after // doing the operation. - Value *Loaded_Shiftdown = Builder.CreateTrunc( - Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType); - Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc); - Value *NewVal_Shiftup = Builder.CreateShl( - Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); - Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); - Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup); + Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); + Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc); + Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); return FinalVal; } default: @@ -738,12 +787,10 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, /// /// It will create an LL/SC or cmpxchg loop, as appropriate, the same /// way as a typical atomicrmw expansion. The only difference here is -/// that the operation inside of the loop must operate only upon a +/// that the operation inside of the loop may operate upon only a /// part of the value. void AtomicExpand::expandPartwordAtomicRMW( AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { - assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg); - AtomicOrdering MemOpOrder = AI->getOrdering(); IRBuilder<> Builder(AI); @@ -761,13 +808,18 @@ void AtomicExpand::expandPartwordAtomicRMW( ValOperand_Shifted, AI->getValOperand(), PMV); }; - // TODO: When we're ready to support LLSC conversions too, use - // insertRMWLLSCLoop here for ExpansionKind==LLSC. - Value *OldResult = - insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, - PerformPartwordOp, createCmpXchgInstFun); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); + Value *OldResult; + if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { + OldResult = + insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, + PerformPartwordOp, createCmpXchgInstFun); + } else { + assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); + OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, + MemOpOrder, PerformPartwordOp); + } + + Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); } @@ -800,8 +852,7 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand, AI->getOrdering()); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); return NewAI; @@ -923,8 +974,7 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { // partword.cmpxchg.end: Builder.SetInsertPoint(CI); - Value *FinalOldVal = Builder.CreateTrunc( - Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Res = Builder.CreateInsertValue(Res, Success, 1); @@ -965,8 +1015,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, AI->getOrdering()); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); } @@ -987,9 +1036,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, CI->getSuccessOrdering()); - Value *FinalOldVal = Builder.CreateTrunc( - Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); - + Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Value *Success = Builder.CreateICmpEQ( @@ -1126,24 +1173,28 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // // The full expansion we produce is: // [...] + // %aligned.addr = ... // cmpxchg.start: - // %unreleasedload = @load.linked(%addr) - // %should_store = icmp eq %unreleasedload, %desired - // br i1 %should_store, label %cmpxchg.fencedstore, + // %unreleasedload = @load.linked(%aligned.addr) + // %unreleasedload.extract = extract value from %unreleasedload + // %should_store = icmp eq %unreleasedload.extract, %desired + // br i1 %should_store, label %cmpxchg.releasingstore, // label %cmpxchg.nostore // cmpxchg.releasingstore: // fence? // br label cmpxchg.trystore // cmpxchg.trystore: - // %loaded.trystore = phi [%unreleasedload, %releasingstore], + // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], // [%releasedload, %cmpxchg.releasedload] - // %stored = @store_conditional(%new, %addr) + // %updated.new = insert %new into %loaded.trystore + // %stored = @store_conditional(%updated.new, %aligned.addr) // %success = icmp eq i32 %stored, 0 // br i1 %success, label %cmpxchg.success, // label %cmpxchg.releasedload/%cmpxchg.failure // cmpxchg.releasedload: - // %releasedload = @load.linked(%addr) - // %should_store = icmp eq %releasedload, %desired + // %releasedload = @load.linked(%aligned.addr) + // %releasedload.extract = extract value from %releasedload + // %should_store = icmp eq %releasedload.extract, %desired // br i1 %should_store, label %cmpxchg.trystore, // label %cmpxchg.failure // cmpxchg.success: @@ -1159,9 +1210,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // fence? // br label %cmpxchg.end // cmpxchg.end: - // %loaded = phi [%loaded.nostore, %cmpxchg.failure], - // [%loaded.trystore, %cmpxchg.trystore] + // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], + // [%loaded.trystore, %cmpxchg.trystore] // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %loaded = extract value from %loaded.exit // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] @@ -1187,13 +1239,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(BB); if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) TLI->emitLeadingFence(Builder, CI, SuccessOrder); + + PartwordMaskValues PMV = + createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, + TLI->getMinCmpXchgSizeInBits() / 8); Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(StartBB); - Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *UnreleasedLoad = + TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + Value *UnreleasedLoadExtract = + extractMaskedValue(Builder, UnreleasedLoad, PMV); Value *ShouldStore = Builder.CreateICmpEQ( - UnreleasedLoad, CI->getCompareOperand(), "should_store"); + UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). @@ -1205,8 +1264,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(TryStoreBB); Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = TLI->emitStoreConditional( - Builder, CI->getNewValOperand(), Addr, MemOpOrder); + PHINode *LoadedTryStore = + Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); + LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); + Value *NewValueInsert = + insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); + Value *StoreSuccess = + TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; @@ -1216,13 +1280,16 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(ReleasedLoadBB); Value *SecondLoad; if (HasReleasedLoadBB) { - SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(), - "should_store"); + SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); + ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, + CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); + // Update PHI node in TryStoreBB. + LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); } else Builder.CreateUnreachable(); @@ -1234,6 +1301,12 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(ExitBB); Builder.SetInsertPoint(NoStoreBB); + PHINode *LoadedNoStore = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); + LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); + if (HasReleasedLoadBB) + LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); + // In the failing case, where we don't execute the store-conditional, the // target might want to balance out the load-linked with a dedicated // instruction (e.g., on ARM, clearing the exclusive monitor). @@ -1241,6 +1314,11 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(FailureBB); Builder.SetInsertPoint(FailureBB); + PHINode *LoadedFailure = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); + LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); + if (CI->isWeak()) + LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); if (ShouldInsertFencesForAtomic) TLI->emitTrailingFence(Builder, CI, FailureOrder); Builder.CreateBr(ExitBB); @@ -1250,32 +1328,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate // PHI. Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + PHINode *LoadedExit = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); + LoadedExit->addIncoming(LoadedTryStore, SuccessBB); + LoadedExit->addIncoming(LoadedFailure, FailureBB); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); - // Setup the builder so we can create any PHIs we need. - Value *Loaded; - if (!HasReleasedLoadBB) - Loaded = UnreleasedLoad; - else { - Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin()); - PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB); - TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); - - Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin()); - PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB); - NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); - - Builder.SetInsertPoint(ExitBB, ++ExitBB->begin()); - PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB); - ExitLoaded->addIncoming(NoStoreLoaded, FailureBB); - - Loaded = ExitLoaded; - } + // This is the "exit value" from the cmpxchg expansion. It may be of + // a type wider than the one in the cmpxchg instruction. + Value *LoadedFull = LoadedExit; + + Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); + Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. @@ -1417,8 +1483,6 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { expandPartwordCmpXchg(CI); return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: { - assert(ValueSize >= MinCASSize && - "MinCmpXchgSizeInBits not yet supported for LL/SC expansions."); return expandAtomicCmpXchg(CI); } case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index ef548c84d3c00..0269936d2f88c 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -18,6 +18,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -76,15 +78,32 @@ class CFIInstrInserter : public MachineFunctionPass { unsigned IncomingCFARegister = 0; /// Value of cfa register valid at basic block exit. unsigned OutgoingCFARegister = 0; + /// Set of callee saved registers saved at basic block entry. + BitVector IncomingCSRSaved; + /// Set of callee saved registers saved at basic block exit. + BitVector OutgoingCSRSaved; /// If in/out cfa offset and register values for this block have already /// been set or not. bool Processed = false; }; +#define INVALID_REG UINT_MAX +#define INVALID_OFFSET INT_MAX + /// contains the location where CSR register is saved. + struct CSRSavedLocation { + CSRSavedLocation(Optional R, Optional O) + : Reg(R), Offset(O) {} + Optional Reg; + Optional Offset; + }; + /// Contains cfa offset and register values valid at entry and exit of basic /// blocks. std::vector MBBVector; + /// Map the callee save registers to the locations where they are saved. + SmallDenseMap CSRLocMap; + /// Calculate cfa offset and register values valid at entry and exit for all /// basic blocks in a function. void calculateCFAInfo(MachineFunction &MF); @@ -108,7 +127,8 @@ class CFIInstrInserter : public MachineFunctionPass { return -MBBVector[MBB->getNumber()].IncomingCFAOffset; } - void report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ); + void reportCFAError(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ); + void reportCSRError(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ); /// Go through each MBB in a function and check that outgoing offset and /// register of its predecessors match incoming offset and register of that /// MBB, as well as that incoming offset and register of its successors match @@ -132,6 +152,8 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) { // function. unsigned InitialRegister = MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + unsigned NumRegs = TRI.getNumRegs(); // Initialize MBBMap. for (MachineBasicBlock &MBB : MF) { @@ -141,8 +163,11 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) { MBBInfo.OutgoingCFAOffset = InitialOffset; MBBInfo.IncomingCFARegister = InitialRegister; MBBInfo.OutgoingCFARegister = InitialRegister; + MBBInfo.IncomingCSRSaved.resize(NumRegs); + MBBInfo.OutgoingCSRSaved.resize(NumRegs); MBBVector[MBB.getNumber()] = MBBInfo; } + CSRLocMap.clear(); // Set in/out cfa info for all blocks in the function. This traversal is based // on the assumption that the first block in the function is the entry block @@ -159,12 +184,17 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { int SetOffset = MBBInfo.IncomingCFAOffset; // Outgoing cfa register set by the block. unsigned SetRegister = MBBInfo.IncomingCFARegister; - const std::vector &Instrs = - MBBInfo.MBB->getParent()->getFrameInstructions(); + MachineFunction *MF = MBBInfo.MBB->getParent(); + const std::vector &Instrs = MF->getFrameInstructions(); + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + unsigned NumRegs = TRI.getNumRegs(); + BitVector CSRSaved(NumRegs), CSRRestored(NumRegs); // Determine cfa offset and register set by the block. for (MachineInstr &MI : *MBBInfo.MBB) { if (MI.isCFIInstruction()) { + Optional CSRReg; + Optional CSROffset; unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); const MCCFIInstruction &CFI = Instrs[CFIIndex]; switch (CFI.getOperation()) { @@ -181,6 +211,18 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { SetRegister = CFI.getRegister(); SetOffset = CFI.getOffset(); break; + case MCCFIInstruction::OpOffset: + CSROffset = CFI.getOffset(); + break; + case MCCFIInstruction::OpRegister: + CSRReg = CFI.getRegister2(); + break; + case MCCFIInstruction::OpRelOffset: + CSROffset = CFI.getOffset() - SetOffset; + break; + case MCCFIInstruction::OpRestore: + CSRRestored.set(CFI.getRegister()); + break; case MCCFIInstruction::OpRememberState: // TODO: Add support for handling cfi_remember_state. #ifndef NDEBUG @@ -198,18 +240,24 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { #endif break; // Other CFI directives do not affect CFA value. + case MCCFIInstruction::OpUndefined: case MCCFIInstruction::OpSameValue: - case MCCFIInstruction::OpOffset: - case MCCFIInstruction::OpRelOffset: case MCCFIInstruction::OpEscape: - case MCCFIInstruction::OpRestore: - case MCCFIInstruction::OpUndefined: - case MCCFIInstruction::OpRegister: case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: case MCCFIInstruction::OpGnuArgsSize: break; } + if (CSRReg || CSROffset) { + auto It = CSRLocMap.find(CFI.getRegister()); + if (It == CSRLocMap.end()) { + CSRLocMap.insert( + {CFI.getRegister(), CSRSavedLocation(CSRReg, CSROffset)}); + } else if (It->second.Reg != CSRReg || It->second.Offset != CSROffset) { + llvm_unreachable("Different saved locations for the same CSR"); + } + CSRSaved.set(CFI.getRegister()); + } } } @@ -218,6 +266,11 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { // Update outgoing CFA info. MBBInfo.OutgoingCFAOffset = SetOffset; MBBInfo.OutgoingCFARegister = SetRegister; + + // Update outgoing CSR info. + MBBInfo.OutgoingCSRSaved = MBBInfo.IncomingCSRSaved; + MBBInfo.OutgoingCSRSaved |= CSRSaved; + MBBInfo.OutgoingCSRSaved.reset(CSRRestored); } void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) { @@ -236,6 +289,7 @@ void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) { if (!SuccInfo.Processed) { SuccInfo.IncomingCFAOffset = CurrentInfo.OutgoingCFAOffset; SuccInfo.IncomingCFARegister = CurrentInfo.OutgoingCFARegister; + SuccInfo.IncomingCSRSaved = CurrentInfo.OutgoingCSRSaved; Stack.push_back(Succ); } } @@ -287,12 +341,45 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { .addCFIIndex(CFIIndex); InsertedCFIInstr = true; } + + BitVector SetDifference = PrevMBBInfo->OutgoingCSRSaved; + SetDifference.reset(MBBInfo.IncomingCSRSaved); + for (int Reg : SetDifference.set_bits()) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg)); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + InsertedCFIInstr = true; + } + + SetDifference = MBBInfo.IncomingCSRSaved; + SetDifference.reset(PrevMBBInfo->OutgoingCSRSaved); + for (int Reg : SetDifference.set_bits()) { + auto it = CSRLocMap.find(Reg); + assert(it != CSRLocMap.end() && "Reg should have an entry in CSRLocMap"); + unsigned CFIIndex; + CSRSavedLocation RO = it->second; + if (!RO.Reg && RO.Offset) { + CFIIndex = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg, *RO.Offset)); + } else if (RO.Reg && !RO.Offset) { + CFIIndex = MF.addFrameInst( + MCCFIInstruction::createRegister(nullptr, Reg, *RO.Reg)); + } else { + llvm_unreachable("RO.Reg and RO.Offset cannot both be valid/invalid"); + } + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + InsertedCFIInstr = true; + } + PrevMBBInfo = &MBBInfo; } return InsertedCFIInstr; } -void CFIInstrInserter::report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ) { +void CFIInstrInserter::reportCFAError(const MBBCFAInfo &Pred, + const MBBCFAInfo &Succ) { errs() << "*** Inconsistent CFA register and/or offset between pred and succ " "***\n"; errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber() @@ -307,6 +394,22 @@ void CFIInstrInserter::report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ) { << " incoming CFA Offset:" << Succ.IncomingCFAOffset << "\n"; } +void CFIInstrInserter::reportCSRError(const MBBCFAInfo &Pred, + const MBBCFAInfo &Succ) { + errs() << "*** Inconsistent CSR Saved between pred and succ in function " + << Pred.MBB->getParent()->getName() << " ***\n"; + errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber() + << " outgoing CSR Saved: "; + for (int Reg : Pred.OutgoingCSRSaved.set_bits()) + errs() << Reg << " "; + errs() << "\n"; + errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber() + << " incoming CSR Saved: "; + for (int Reg : Succ.IncomingCSRSaved.set_bits()) + errs() << Reg << " "; + errs() << "\n"; +} + unsigned CFIInstrInserter::verify(MachineFunction &MF) { unsigned ErrorNum = 0; for (auto *CurrMBB : depth_first(&MF)) { @@ -321,7 +424,13 @@ unsigned CFIInstrInserter::verify(MachineFunction &MF) { // we don't generate epilogues inside such blocks. if (SuccMBBInfo.MBB->succ_empty() && !SuccMBBInfo.MBB->isReturnBlock()) continue; - report(CurrMBBInfo, SuccMBBInfo); + reportCFAError(CurrMBBInfo, SuccMBBInfo); + ErrorNum++; + } + // Check that IncomingCSRSaved of every successor matches the + // OutgoingCSRSaved of CurrMBB + if (SuccMBBInfo.IncomingCSRSaved != CurrMBBInfo.OutgoingCSRSaved) { + reportCSRError(CurrMBBInfo, SuccMBBInfo); ErrorNum++; } } diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 3c95b2ecc07d3..70b8370a00d21 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1889,7 +1889,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // Lower inline assembly if we can. // If we found an inline asm expession, and if the target knows how to // lower it to normal LLVM code, do so now. - if (isa(CI->getCalledValue())) { + if (CI->isInlineAsm()) { if (TLI->ExpandInlineAsm(CI)) { // Avoid invalidating the iterator. CurInstIterator = BB->begin(); @@ -4636,7 +4636,7 @@ static bool FindAllMemoryUses( continue; } - InlineAsm *IA = dyn_cast(CI->getCalledValue()); + InlineAsm *IA = dyn_cast(CI->getCalledOperand()); if (!IA) return true; // If this is a memory operand, we're cool, otherwise bail out. @@ -6103,7 +6103,8 @@ static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { // If it's safe to speculatively execute, then it should not have side // effects; therefore, it's safe to sink and possibly *not* execute. return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && - TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive; + TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >= + TargetTransformInfo::TCC_Expensive; } /// Returns true if a SelectInst should be turned into an explicit branch. @@ -6651,6 +6652,8 @@ class VectorPromoteHelper { uint64_t ScalarCost = TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); uint64_t VectorCost = StoreExtractCombineCost; + enum TargetTransformInfo::TargetCostKind CostKind = + TargetTransformInfo::TCK_RecipThroughput; for (const auto &Inst : InstsToBePromoted) { // Compute the cost. // By construction, all instructions being promoted are arithmetic ones. @@ -6666,8 +6669,9 @@ class VectorPromoteHelper { !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue : TargetTransformInfo::OK_AnyValue; ScalarCost += TTI.getArithmeticInstrCost( - Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); + Inst->getOpcode(), Inst->getType(), CostKind, Arg0OVK, Arg1OVK); VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, + CostKind, Arg0OVK, Arg1OVK); } LLVM_DEBUG( @@ -7126,7 +7130,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, return false; ConstantInt *GEPIIdx = cast(GEPI->getOperand(1)); // Check that GEPI is a cheap one. - if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType()) + if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(), + TargetTransformInfo::TCK_SizeAndLatency) > TargetTransformInfo::TCC_Basic) return false; Value *GEPIOp = GEPI->getOperand(0); @@ -7175,7 +7180,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, cast(UGEPI->getOperand(1))->getType()) return false; ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); - if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType()) + if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(), + TargetTransformInfo::TCK_SizeAndLatency) > TargetTransformInfo::TCC_Basic) return false; UGEPIs.push_back(UGEPI); @@ -7186,7 +7192,9 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, for (GetElementPtrInst *UGEPI : UGEPIs) { ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue(); - unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType()); + unsigned ImmCost = + TTI->getIntImmCost(NewIdx, GEPIIdx->getType(), + TargetTransformInfo::TCK_SizeAndLatency); if (ImmCost > TargetTransformInfo::TCC_Basic) return false; } diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 70bb272796739..86fbb4b9ae8a7 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -52,7 +52,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, // Try looking through a bitcast from one function type to another. // Commonly happens with calls to objc_msgSend(). - const Value *CalleeV = CB.getCalledValue()->stripPointerCasts(); + const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts(); if (const Function *F = dyn_cast(CalleeV)) Info.Callee = MachineOperand::CreateGA(F, 0); else diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 72d4dbeed1c29..1b1d7c5b0dd91 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1607,7 +1607,7 @@ bool IRTranslator::translateCallBase(const CallBase &CB, // scan is done to check if any instructions are calls. bool Success = CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*CB.getCalledValue()); }); + [&]() { return getOrCreateVReg(*CB.getCalledOperand()); }); // Check if we just inserted a tail call. if (Success) { @@ -1712,9 +1712,8 @@ bool IRTranslator::translateInvoke(const User &U, const BasicBlock *ReturnBB = I.getSuccessor(0); const BasicBlock *EHPadBB = I.getSuccessor(1); - const Value *Callee = I.getCalledValue(); - const Function *Fn = dyn_cast(Callee); - if (isa(Callee)) + const Function *Fn = I.getCalledFunction(); + if (I.isInlineAsm()) return false; // FIXME: support invoking patchpoint and statepoint intrinsics. diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index e42591067792f..f05394fcf2415 100644 --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -32,7 +32,7 @@ void InlineAsmLowering::anchor() {} bool InlineAsmLowering::lowerInlineAsm(MachineIRBuilder &MIRBuilder, const CallBase &Call) const { - const InlineAsm *IA = cast(Call.getCalledValue()); + const InlineAsm *IA = cast(Call.getCalledOperand()); StringRef ConstraintStr = IA->getConstraintString(); bool HasOnlyMemoryClobber = false; diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 4fa90697552a7..2ae2cba745ef7 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -290,6 +290,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res, } auto Const = buildInstr(TargetOpcode::G_CONSTANT); + Const->setDebugLoc(DebugLoc()); Res.addDefToMIB(*getMRI(), Const); Const.addCImm(&Val); return Const; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 57dd8d315c852..09e31afef26c5 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -62,30 +62,39 @@ MCSymbol *MachineBasicBlock::getSymbol() const { MCContext &Ctx = MF->getContext(); auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); - // We emit a non-temporary symbol for every basic block if we have BBLabels - // or -- with basic block sections -- when a basic block begins a section. - bool BasicBlockSymbols = isBeginSection() || MF->hasBBLabels(); - auto Delimiter = BasicBlockSymbols ? "." : "_"; assert(getNumber() >= 0 && "cannot get label for unreachable MBB"); - // With Basic Block Sections, we emit a symbol for every basic block. To - // keep the size of strtab small, we choose a unary encoding which can - // compress the symbol names significantly. The basic blocks for function - // foo are named a.BB.foo, aa.BB.foo, and so on. - if (BasicBlockSymbols) { + // We emit a non-temporary symbol for every basic block if we have BBLabels + // or -- with basic block sections -- when a basic block begins a section. + // With basic block symbols, we use a unary encoding which can + // compress the symbol names significantly. For basic block sections where + // this block is the first in a cluster, we use a non-temp descriptive name. + // Otherwise we fall back to use temp label. + if (MF->hasBBLabels()) { auto Iter = MF->getBBSectionsSymbolPrefix().begin(); if (getNumber() < 0 || getNumber() >= (int)MF->getBBSectionsSymbolPrefix().size()) report_fatal_error("Unreachable MBB: " + Twine(getNumber())); + // The basic blocks for function foo are named a.BB.foo, aa.BB.foo, and + // so on. std::string Prefix(Iter + 1, Iter + getNumber() + 1); std::reverse(Prefix.begin(), Prefix.end()); CachedMCSymbol = - Ctx.getOrCreateSymbol(Prefix + Twine(Delimiter) + "BB" + - Twine(Delimiter) + Twine(MF->getName())); + Ctx.getOrCreateSymbol(Twine(Prefix) + ".BB." + Twine(MF->getName())); + } else if (MF->hasBBSections() && isBeginSection()) { + SmallString<5> Suffix; + if (SectionID == MBBSectionID::ColdSectionID) { + Suffix += ".cold"; + } else if (SectionID == MBBSectionID::ExceptionSectionID) { + Suffix += ".eh"; + } else { + Suffix += "." + std::to_string(SectionID.Number); + } + CachedMCSymbol = Ctx.getOrCreateSymbol(MF->getName() + Suffix); } else { - CachedMCSymbol = Ctx.getOrCreateSymbol( - Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + - Twine(Delimiter) + Twine(getNumber())); + CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" + + Twine(MF->getFunctionNumber()) + + "_" + Twine(getNumber())); } } return CachedMCSymbol; diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index a4f07806fbe36..71459e520664a 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -1570,7 +1570,7 @@ MachineBlockPlacement::selectBestSuccessor( // For blocks with CFG violations, we may be able to lay them out anyway with // tail-duplication. We keep this vector so we can perform the probability // calculations the minimum number of times. - SmallVector, 4> + SmallVector, 4> DupCandidates; for (MachineBasicBlock *Succ : Successors) { auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ); @@ -1584,7 +1584,7 @@ MachineBlockPlacement::selectBestSuccessor( Chain, BlockFilter)) { // If tail duplication would make Succ profitable, place it. if (allowTailDupPlacement() && shouldTailDuplicate(Succ)) - DupCandidates.push_back(std::make_tuple(SuccProb, Succ)); + DupCandidates.emplace_back(SuccProb, Succ); continue; } diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index e15dcc32c2ba7..dd1dd2cdd8c2e 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -97,12 +97,13 @@ static cl::opt EnableLinkOnceODROutlining( cl::desc("Enable the machine outliner on linkonceodr functions"), cl::init(false)); -// Set the number of times to repeatedly apply outlining. -// Defaults to 1, but more repetitions can save additional size. -static cl::opt - NumRepeat("machine-outline-runs", cl::Hidden, - cl::desc("The number of times to apply machine outlining"), - cl::init(1)); +/// Number of times to re-run the outliner. This is not the total number of runs +/// as the outliner will run at least one time. The default value is set to 0, +/// meaning the outliner will run one time and rerun zero times after that. +static cl::opt OutlinerReruns( + "machine-outliner-reruns", cl::init(0), cl::Hidden, + cl::desc( + "Number of times to rerun the outliner after the initial outline")); namespace { @@ -910,12 +911,9 @@ struct MachineOutliner : public ModulePass { InstructionMapper &Mapper, unsigned Name); - /// Calls runOnceOnModule NumRepeat times + /// Calls 'doOutline()' 1 + OutlinerReruns times. bool runOnModule(Module &M) override; - /// Calls 'doOutline()'. - bool runOnceOnModule(Module &M, unsigned Iter); - /// Construct a suffix tree on the instructions in \p M and outline repeated /// strings from that tree. bool doOutline(Module &M, unsigned &OutlinedFunctionNum); @@ -1306,8 +1304,9 @@ bool MachineOutliner::outline(Module &M, // implicit Last inst in outlined range <-- def to the call // instruction. Also remove call site information for outlined block // of code. The exposed uses need to be copied in the outlined range. - for (MachineBasicBlock::reverse_iterator Iter = EndIt.getReverse(), - Last = std::next(CallInst.getReverse()); + for (MachineBasicBlock::reverse_iterator + Iter = EndIt.getReverse(), + Last = std::next(CallInst.getReverse()); Iter != Last; Iter++) { MachineInstr *MI = &*Iter; for (MachineOperand &MOP : MI->operands()) { @@ -1333,10 +1332,10 @@ bool MachineOutliner::outline(Module &M, } for (const Register &I : DefRegs) - // If it's a def, add it to the call instruction. - CallInst->addOperand(MachineOperand::CreateReg( - I, true, /* isDef = true */ - true /* isImp = true */)); + // If it's a def, add it to the call instruction. + CallInst->addOperand( + MachineOperand::CreateReg(I, true, /* isDef = true */ + true /* isImp = true */)); for (const Register &I : UseRegs) // If it's a exposed use, add it to the call instruction. @@ -1487,19 +1486,31 @@ void MachineOutliner::emitInstrCountChangedRemark( } } -bool MachineOutliner::runOnceOnModule(Module &M, unsigned Iter) { +bool MachineOutliner::runOnModule(Module &M) { // Check if there's anything in the module. If it's empty, then there's // nothing to outline. if (M.empty()) return false; - OutlineRepeatedNum = Iter; - // Number to append to the current outlined function. unsigned OutlinedFunctionNum = 0; + OutlineRepeatedNum = 0; if (!doOutline(M, OutlinedFunctionNum)) return false; + + for (unsigned I = 0; I < OutlinerReruns; ++I) { + OutlinedFunctionNum = 0; + OutlineRepeatedNum++; + if (!doOutline(M, OutlinedFunctionNum)) { + LLVM_DEBUG({ + dbgs() << "Did not outline on iteration " << I + 2 << " out of " + << OutlinerReruns + 1 << "\n"; + }); + break; + } + } + return true; } @@ -1556,25 +1567,11 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) { if (ShouldEmitSizeRemarks && OutlinedSomething) emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount); - return OutlinedSomething; -} + LLVM_DEBUG({ + if (!OutlinedSomething) + dbgs() << "Stopped outlining at iteration " << OutlineRepeatedNum + << " because no changes were found.\n"; + }); -// Apply machine outlining for NumRepeat times. -bool MachineOutliner::runOnModule(Module &M) { - if (NumRepeat < 1) - report_fatal_error("Expect NumRepeat for machine outlining " - "to be greater than or equal to 1!\n"); - - bool Changed = false; - for (unsigned I = 0; I < NumRepeat; I++) { - if (!runOnceOnModule(M, I)) { - LLVM_DEBUG(dbgs() << "Stopped outlining at iteration " << I - << " because no changes were found.\n";); - return Changed; - } - Changed = true; - } - LLVM_DEBUG(dbgs() << "Stopped outlining because iteration is " - "equal to " << NumRepeat << "\n";); - return Changed; + return OutlinedSomething; } diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 3465aaada873b..ef4b02ca9e3ef 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -217,6 +217,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MLI = &getAnalysis(); MDT = &getAnalysis(); + ORE = &getAnalysis().getORE(); TII = MF->getSubtarget().getInstrInfo(); RegClassInfo.runOnMachineFunction(*MF); @@ -248,6 +249,12 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { setPragmaPipelineOptions(L); if (!canPipelineLoop(L)) { LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n"); + ORE->emit([&]() { + return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "Failed to pipeline loop"; + }); + return Changed; } @@ -309,11 +316,24 @@ void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) { /// restricted to loops with a single basic block. Make sure that the /// branch in the loop can be analyzed. bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { - if (L.getNumBlocks() != 1) + if (L.getNumBlocks() != 1) { + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "Not a single basic block: " + << ore::NV("NumBlocks", L.getNumBlocks()); + }); return false; + } - if (disabledByPragma) + if (disabledByPragma) { + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "Disabled by Pragma."; + }); return false; + } // Check if the branch can't be understood because we can't do pipelining // if that's the case. @@ -321,25 +341,37 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { LI.FBB = nullptr; LI.BrCond.clear(); if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) { - LLVM_DEBUG( - dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n"); + LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n"); NumFailBranch++; + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "The branch can't be understood"; + }); return false; } LI.LoopInductionVar = nullptr; LI.LoopCompare = nullptr; if (!TII->analyzeLoopForPipelining(L.getTopBlock())) { - LLVM_DEBUG( - dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n"); + LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n"); NumFailLoop++; + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "The loop structure is not supported"; + }); return false; } if (!L.getLoopPreheader()) { - LLVM_DEBUG( - dbgs() << "Preheader not found, can NOT pipeline current Loop\n"); + LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n"); NumFailPreheader++; + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop", + L.getStartLoc(), L.getHeader()) + << "No loop preheader found"; + }); return false; } @@ -457,10 +489,13 @@ void SwingSchedulerDAG::schedule() { // Can't schedule a loop without a valid MII. if (MII == 0) { - LLVM_DEBUG( - dbgs() - << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n"); + LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n"); NumFailZeroMII++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Invalid Minimal Initiation Interval: 0"; + }); return; } @@ -469,6 +504,14 @@ void SwingSchedulerDAG::schedule() { LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii << ", we don't pipleline large loops\n"); NumFailLargeMaxMII++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Minimal Initiation Interval too large: " + << ore::NV("MII", (int)MII) << " > " + << ore::NV("SwpMaxMii", SwpMaxMii) << "." + << "Refer to -pipeliner-max-mii."; + }); return; } @@ -511,15 +554,24 @@ void SwingSchedulerDAG::schedule() { if (!Scheduled){ LLVM_DEBUG(dbgs() << "No schedule found, return\n"); NumFailNoSchedule++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Unable to find schedule"; + }); return; } unsigned numStages = Schedule.getMaxStageCount(); // No need to generate pipeline if there are no overlapped iterations. if (numStages == 0) { - LLVM_DEBUG( - dbgs() << "No overlapped iterations, no need to generate pipeline\n"); + LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n"); NumFailZeroStage++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "No need to pipeline - no overlapped iterations in schedule."; + }); return; } // Check that the maximum stage count is less than user-defined limit. @@ -527,9 +579,23 @@ void SwingSchedulerDAG::schedule() { LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages << " : too many stages, abort\n"); NumFailLargeMaxStage++; + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Too many stages in schedule: " + << ore::NV("numStages", (int)numStages) << " > " + << ore::NV("SwpMaxStages", SwpMaxStages) + << ". Refer to -pipeliner-max-stages."; + }); return; } + Pass.ORE->emit([&]() { + return MachineOptimizationRemark(DEBUG_TYPE, "schedule", Loop.getStartLoc(), + Loop.getHeader()) + << "Pipelined succesfully!"; + }); + // Generate the schedule as a ModuloSchedule. DenseMap Cycles, Stages; std::vector OrderedInsts; @@ -1080,7 +1146,7 @@ unsigned SwingSchedulerDAG::calculateResMII() { } } int Resmii = Resources.size(); - LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n"); + LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n"); // Delete the memory for each of the DFAs that were created earlier. for (ResourceManager *RI : Resources) { ResourceManager *D = RI; @@ -2052,9 +2118,16 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II << ")\n"); - if (scheduleFound) + if (scheduleFound) { Schedule.finalizeSchedule(this); - else + Pass.ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis( + DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader()) + << "Schedule found with Initiation Interval: " << ore::NV("II", II) + << ", MaxStageCount: " + << ore::NV("MaxStageCount", Schedule.getMaxStageCount()); + }); + } else Schedule.reset(); return scheduleFound && Schedule.getMaxStageCount() > 0; diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index cc9dab6db913d..5733798fdc9c2 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -168,20 +168,14 @@ namespace { // Same for a full set. bool addRequired(const RegSet &RS) { - bool changed = false; - for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) - if (addRequired(*I)) - changed = true; - return changed; + return llvm::any_of( + RS, [this](unsigned Reg) { return this->addRequired(Reg); }); } // Same for a full map. bool addRequired(const RegMap &RM) { - bool changed = false; - for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I) - if (addRequired(I->first)) - changed = true; - return changed; + return llvm::any_of( + RM, [this](const auto &P) { return this->addRequired(P.first); }); } // Live-out registers are either in regsLiveOut or vregsPassed. @@ -216,7 +210,6 @@ namespace { void verifyPreISelGenericInstruction(const MachineInstr *MI); void visitMachineInstrBefore(const MachineInstr *MI); void visitMachineOperand(const MachineOperand *MO, unsigned MONum); - void visitMachineInstrAfter(const MachineInstr *MI); void visitMachineBundleAfter(const MachineInstr *MI); void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); void visitMachineFunctionAfter(); @@ -379,43 +372,40 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { verifyProperties(MF); visitMachineFunctionBefore(); - for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); - MFI!=MFE; ++MFI) { - visitMachineBasicBlockBefore(&*MFI); + for (const MachineBasicBlock &MBB : MF) { + visitMachineBasicBlockBefore(&MBB); // Keep track of the current bundle header. const MachineInstr *CurBundle = nullptr; // Do we expect the next instruction to be part of the same bundle? bool InBundle = false; - for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), - MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { - if (MBBI->getParent() != &*MFI) { - report("Bad instruction parent pointer", &*MFI); - errs() << "Instruction: " << *MBBI; + for (const MachineInstr &MI : MBB.instrs()) { + if (MI.getParent() != &MBB) { + report("Bad instruction parent pointer", &MBB); + errs() << "Instruction: " << MI; continue; } // Check for consistent bundle flags. - if (InBundle && !MBBI->isBundledWithPred()) + if (InBundle && !MI.isBundledWithPred()) report("Missing BundledPred flag, " "BundledSucc was set on predecessor", - &*MBBI); - if (!InBundle && MBBI->isBundledWithPred()) + &MI); + if (!InBundle && MI.isBundledWithPred()) report("BundledPred flag is set, " "but BundledSucc not set on predecessor", - &*MBBI); + &MI); // Is this a bundle header? - if (!MBBI->isInsideBundle()) { + if (!MI.isInsideBundle()) { if (CurBundle) visitMachineBundleAfter(CurBundle); - CurBundle = &*MBBI; + CurBundle = &MI; visitMachineBundleBefore(CurBundle); } else if (!CurBundle) - report("No bundle header", &*MBBI); - visitMachineInstrBefore(&*MBBI); - for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { - const MachineInstr &MI = *MBBI; + report("No bundle header", &MI); + visitMachineInstrBefore(&MI); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &Op = MI.getOperand(I); if (Op.getParent() != &MI) { // Make sure to use correct addOperand / RemoveOperand / ChangeTo @@ -426,16 +416,14 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { visitMachineOperand(&Op, I); } - visitMachineInstrAfter(&*MBBI); - // Was this the last bundled instruction? - InBundle = MBBI->isBundledWithSucc(); + InBundle = MI.isBundledWithSucc(); } if (CurBundle) visitMachineBundleAfter(CurBundle); if (InBundle) - report("BundledSucc flag set on last instruction in block", &MFI->back()); - visitMachineBasicBlockAfter(&*MFI); + report("BundledSucc flag set on last instruction in block", &MBB.back()); + visitMachineBasicBlockAfter(&MBB); } visitMachineFunctionAfter(); @@ -546,9 +534,8 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { BBInfo &MInfo = MBBInfoMap[MBB]; if (!MInfo.reachable) { MInfo.reachable = true; - for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), - SuE = MBB->succ_end(); SuI != SuE; ++SuI) - markReachable(*SuI); + for (const MachineBasicBlock *Succ : MBB->successors()) + markReachable(Succ); } } @@ -640,14 +627,13 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } // Check the predecessor list. - for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), - E = MBB->pred_end(); I != E; ++I) { - if (!FunctionBlocks.count(*I)) + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + if (!FunctionBlocks.count(Pred)) report("MBB has predecessor that isn't part of the function.", MBB); - if (!MBBInfoMap[*I].Succs.count(MBB)) { + if (!MBBInfoMap[Pred].Succs.count(MBB)) { report("Inconsistent CFG", MBB); errs() << "MBB is not in the successor list of the predecessor " - << printMBBReference(*(*I)) << ".\n"; + << printMBBReference(*Pred) << ".\n"; } } @@ -670,8 +656,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { // check whether its answers match up with reality. if (!TBB && !FBB) { // Block falls through to its successor. - MachineFunction::const_iterator MBBI = MBB->getIterator(); - ++MBBI; + MachineFunction::const_iterator MBBI = std::next(MBB->getIterator()); if (MBBI == MF->end()) { // It's possible that the block legitimately ends with a noreturn // call or an unreachable, in which case it won't actually fall @@ -728,8 +713,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } } else if (TBB && !FBB && !Cond.empty()) { // Block conditionally branches somewhere, otherwise falls through. - MachineFunction::const_iterator MBBI = MBB->getIterator(); - ++MBBI; + MachineFunction::const_iterator MBBI = std::next(MBB->getIterator()); if (MBBI == MF->end()) { report("MBB conditionally falls through out of function!", MBB); } else if (MBB->succ_size() == 1) { @@ -1485,12 +1469,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { verifyInlineAsm(MI); // Check the MachineMemOperands for basic consistency. - for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), - E = MI->memoperands_end(); - I != E; ++I) { - if ((*I)->isLoad() && !MI->mayLoad()) + for (MachineMemOperand *Op : MI->memoperands()) { + if (Op->isLoad() && !MI->mayLoad()) report("Missing mayLoad flag", MI); - if ((*I)->isStore() && !MI->mayStore()) + if (Op->isStore() && !MI->mayStore()) report("Missing mayStore flag", MI); } @@ -2088,8 +2070,6 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } } -void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {} - // This function gets called after visiting all instructions in a bundle. The // argument points to the bundle header. // Normal stand-alone instructions are also considered 'bundles', and this @@ -2101,10 +2081,10 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) { // Kill any masked registers. while (!regMasks.empty()) { const uint32_t *Mask = regMasks.pop_back_val(); - for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I) - if (Register::isPhysicalRegister(*I) && - MachineOperand::clobbersPhysReg(Mask, *I)) - regsDead.push_back(*I); + for (unsigned Reg : regsLive) + if (Register::isPhysicalRegister(Reg) && + MachineOperand::clobbersPhysReg(Mask, Reg)) + regsDead.push_back(Reg); } set_subtract(regsLive, regsDead); regsDead.clear(); set_union(regsLive, regsDefined); regsDefined.clear(); @@ -2301,11 +2281,10 @@ void MachineVerifier::calcRegsRequired() { SmallPtrSet todo; for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; - for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(), - PrE = MBB.pred_end(); PrI != PrE; ++PrI) { - BBInfo &PInfo = MBBInfoMap[*PrI]; + for (const MachineBasicBlock *Pred : MBB.predecessors()) { + BBInfo &PInfo = MBBInfoMap[Pred]; if (PInfo.addRequired(MInfo.vregsLiveIn)) - todo.insert(*PrI); + todo.insert(Pred); } } @@ -2315,13 +2294,12 @@ void MachineVerifier::calcRegsRequired() { const MachineBasicBlock *MBB = *todo.begin(); todo.erase(MBB); BBInfo &MInfo = MBBInfoMap[MBB]; - for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), - PrE = MBB->pred_end(); PrI != PrE; ++PrI) { - if (*PrI == MBB) + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + if (Pred == MBB) continue; - BBInfo &SInfo = MBBInfoMap[*PrI]; + BBInfo &SInfo = MBBInfoMap[Pred]; if (SInfo.addRequired(MInfo.vregsRequired)) - todo.insert(*PrI); + todo.insert(Pred); } } } @@ -2405,23 +2383,19 @@ void MachineVerifier::visitMachineFunctionAfter() { // Check for killed virtual registers that should be live out. for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; - for (RegSet::iterator - I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; - ++I) - if (MInfo.regsKilled.count(*I)) { + for (unsigned VReg : MInfo.vregsRequired) + if (MInfo.regsKilled.count(VReg)) { report("Virtual register killed in block, but needed live out.", &MBB); - errs() << "Virtual register " << printReg(*I) + errs() << "Virtual register " << printReg(VReg) << " is used after the block.\n"; } } if (!MF->empty()) { BBInfo &MInfo = MBBInfoMap[&MF->front()]; - for (RegSet::iterator - I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; - ++I) { + for (unsigned VReg : MInfo.vregsRequired) { report("Virtual register defs don't dominate all uses.", MF); - report_context_vreg(*I); + report_context_vreg(VReg); } } @@ -2783,9 +2757,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, VNI->def == LiveInts->getMBBStartIdx(&*MFI); // Check that VNI is live-out of all predecessors. - for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), - PE = MFI->pred_end(); PI != PE; ++PI) { - SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + for (const MachineBasicBlock *Pred : MFI->predecessors()) { + SlotIndex PEnd = LiveInts->getMBBEndIdx(Pred); const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); // All predecessors must have a live-out value. However for a phi @@ -2793,9 +2766,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // only one of the subregisters (not necessarily the current one) needs to // be defined. if (!PVNI && (LaneMask.none() || !IsPHI)) { - if (LiveRangeCalc::isJointlyDominated(*PI, Undefs, *Indexes)) + if (LiveRangeCalc::isJointlyDominated(Pred, Undefs, *Indexes)) continue; - report("Register not marked live out of predecessor", *PI); + report("Register not marked live out of predecessor", Pred); report_context(LR, Reg, LaneMask); report_context(*VNI); errs() << " live into " << printMBBReference(*MFI) << '@' @@ -2806,10 +2779,10 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Only PHI-defs can take different predecessor values. if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI); + report("Different value live out of predecessor", Pred); report_context(LR, Reg, LaneMask); errs() << "Valno #" << PVNI->id << " live out of " - << printMBBReference(*(*PI)) << '@' << PEnd << "\nValno #" + << printMBBReference(*Pred) << '@' << PEnd << "\nValno #" << VNI->id << " live into " << printMBBReference(*MFI) << '@' << LiveInts->getMBBStartIdx(&*MFI) << '\n'; } @@ -2865,10 +2838,9 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { report_context(LI); for (unsigned comp = 0; comp != NumComp; ++comp) { errs() << comp << ": valnos"; - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), - E = LI.vni_end(); I!=E; ++I) - if (comp == ConEQ.getEqClass(*I)) - errs() << ' ' << (*I)->id; + for (const VNInfo *I : LI.valnos) + if (comp == ConEQ.getEqClass(I)) + errs() << ' ' << I->id; errs() << '\n'; } } @@ -2955,15 +2927,14 @@ void MachineVerifier::verifyStackFrame() { // Make sure the exit state of any predecessor is consistent with the entry // state. - for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), - E = MBB->pred_end(); I != E; ++I) { - if (Reachable.count(*I) && - (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue || - SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + if (Reachable.count(Pred) && + (SPState[Pred->getNumber()].ExitValue != BBState.EntryValue || + SPState[Pred->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { report("The exit stack state of a predecessor is inconsistent.", MBB); - errs() << "Predecessor " << printMBBReference(*(*I)) - << " has exit state (" << SPState[(*I)->getNumber()].ExitValue - << ", " << SPState[(*I)->getNumber()].ExitIsSetup << "), while " + errs() << "Predecessor " << printMBBReference(*Pred) + << " has exit state (" << SPState[Pred->getNumber()].ExitValue + << ", " << SPState[Pred->getNumber()].ExitIsSetup << "), while " << printMBBReference(*MBB) << " has entry state (" << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; } @@ -2971,15 +2942,14 @@ void MachineVerifier::verifyStackFrame() { // Make sure the entry state of any successor is consistent with the exit // state. - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) { - if (Reachable.count(*I) && - (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue || - SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { + for (const MachineBasicBlock *Succ : MBB->successors()) { + if (Reachable.count(Succ) && + (SPState[Succ->getNumber()].EntryValue != BBState.ExitValue || + SPState[Succ->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { report("The entry stack state of a successor is inconsistent.", MBB); - errs() << "Successor " << printMBBReference(*(*I)) - << " has entry state (" << SPState[(*I)->getNumber()].EntryValue - << ", " << SPState[(*I)->getNumber()].EntryIsSetup << "), while " + errs() << "Successor " << printMBBReference(*Succ) + << " has entry state (" << SPState[Succ->getNumber()].EntryValue + << ", " << SPState[Succ->getNumber()].EntryIsSetup << "), while " << printMBBReference(*MBB) << " has exit state (" << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; } diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 21076047f77d4..1be9544848ecf 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -39,7 +39,7 @@ static bool lowerLoadRelative(Function &F) { for (auto I = F.use_begin(), E = F.use_end(); I != E;) { auto CI = dyn_cast(I->getUser()); ++I; - if (!CI || CI->getCalledValue() != &F) + if (!CI || CI->getCalledOperand() != &F) continue; IRBuilder<> B(CI); diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 9785c04aec94b..ea667f1704f6b 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -538,7 +538,7 @@ bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From, // Now walk checking that the rest of the instructions will compute the same // value and that we're not overwriting anything. Don't move the instruction - // past any memory, control-flow or other ambigious instructions. + // past any memory, control-flow or other ambiguous instructions. for (auto I = ++Iterator(From), E = Iterator(To); I != E; ++I) { if (mayHaveSideEffects(*I)) return false; diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 00baa23e37167..9f225c49295db 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -675,6 +675,12 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, S.removeSegment(*SS, true); continue; } + // The subrange may have ended before FillerStart. If so, extend it. + if (!S.getVNInfoAt(FillerStart)) { + SlotIndex BBStart = + LIS->getMBBStartIdx(LIS->getMBBFromIndex(FillerStart)); + S.extendInBlock(BBStart, FillerStart); + } VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo)); VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot()); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8edd90aa654bc..d8c6b51db9c61 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -527,7 +527,6 @@ namespace { bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC, bool MatchStrict = false) const; bool isOneUseSetCC(SDValue N) const; - bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); @@ -6099,6 +6098,7 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); // (add v v) -> (shl v 1) + // TODO: Should this be a general DAG canonicalization? if (OppShift.getOpcode() == ISD::SRL && OppShiftCst && ExtractFrom.getOpcode() == ISD::ADD && ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) && @@ -6339,6 +6339,9 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL) { + EVT VT = N0.getValueType(); + unsigned EltBits = VT.getScalarSizeInBits(); + // fold (or (shl x0, (*ext y)), // (srl x1, (*ext (sub 32, y)))) -> // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y)) @@ -6346,13 +6349,52 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, // fold (or (shl x0, (*ext (sub 32, y))), // (srl x1, (*ext y))) -> // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) - EVT VT = N0.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { + if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, HasPos ? Pos : Neg); } + // Matching the shift+xor cases, we can't easily use the xor'd shift amount + // so for now just use the PosOpcode case if its legal. + // TODO: When can we use the NegOpcode case? + if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) { + auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) { + if (Op.getOpcode() != BinOpc) + return false; + ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1)); + return Cst && (Cst->getAPIntValue() == Imm); + }; + + // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31))) + // -> (fshl x0, x1, y) + if (IsBinOpImm(N1, ISD::SRL, 1) && + IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) && + InnerPos == InnerNeg.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) { + return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos); + } + + // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y)) + // -> (fshr x0, x1, y) + if (IsBinOpImm(N0, ISD::SHL, 1) && + IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) && + InnerNeg == InnerPos.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) { + return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg); + } + + // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y)) + // -> (fshr x0, x1, y) + // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization? + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) && + IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) && + InnerNeg == InnerPos.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) { + return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg); + } + } + return SDValue(); } @@ -7977,7 +8019,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. // sra (add (shl X, N1C), AddC), N1C --> // sext (add (trunc X to (width - N1C)), AddC') - if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && + if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { @@ -7994,7 +8036,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // implementation and/or target-specific overrides (because // non-simple types likely require masking when legalized), but that // restriction may conflict with other transforms. - if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) { + if (TruncVT.isSimple() && isTypeLegal(TruncVT) && + TLI.isTruncateFree(VT, TruncVT)) { SDLoc DL(N); SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). @@ -12616,25 +12659,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return SDValue(); } -/// Return true if both inputs are at least as cheap in negated form and at -/// least one input is strictly cheaper in negated form. -bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { - TargetLowering::NegatibleCost LHSNeg = - TLI.getNegatibleCost(X, DAG, LegalOperations, ForCodeSize); - if (TargetLowering::NegatibleCost::Expensive == LHSNeg) - return false; - - TargetLowering::NegatibleCost RHSNeg = - TLI.getNegatibleCost(Y, DAG, LegalOperations, ForCodeSize); - if (TargetLowering::NegatibleCost::Expensive == RHSNeg) - return false; - - // Both negated operands are at least as cheap as their counterparts. - // Check to see if at least one is cheaper negated. - return (TargetLowering::NegatibleCost::Cheaper == LHSNeg || - TargetLowering::NegatibleCost::Cheaper == RHSNeg); -} - SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12709,11 +12733,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FNEG, DL, VT, N0); // -N0 * -N1 --> N0 * N1 - if (isCheaperToUseNegatedFPOps(N0, N1)) { - SDValue NegN0 = TLI.negateExpression(N0, DAG, LegalOperations, ForCodeSize); - SDValue NegN1 = TLI.negateExpression(N1, DAG, LegalOperations, ForCodeSize); + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN0 && NegN1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); - } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) @@ -12792,11 +12823,18 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } // (-N0 * -N1) + N2 --> (N0 * N1) + N2 - if (isCheaperToUseNegatedFPOps(N0, N1)) { - SDValue NegN0 = TLI.negateExpression(N0, DAG, LegalOperations, ForCodeSize); - SDValue NegN1 = TLI.negateExpression(N1, DAG, LegalOperations, ForCodeSize); + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN0 && NegN1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); - } if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) @@ -13061,11 +13099,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (isCheaperToUseNegatedFPOps(N0, N1)) { - SDValue Neg0 = TLI.negateExpression(N0, DAG, LegalOperations, ForCodeSize); - SDValue Neg1 = TLI.negateExpression(N1, DAG, LegalOperations, ForCodeSize); - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, Neg0, Neg1, Flags); - } + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN0 && NegN1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags); return SDValue(); } @@ -17133,11 +17178,11 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDLoc DL(N); EVT VT = InVec.getValueType(); - unsigned NumElts = VT.getVectorNumElements(); auto *IndexC = dyn_cast(EltNo); // Insert into out-of-bounds element is undefined. - if (IndexC && IndexC->getZExtValue() >= VT.getVectorNumElements()) + if (IndexC && VT.isFixedLengthVector() && + IndexC->getZExtValue() >= VT.getVectorNumElements()) return DAG.getUNDEF(VT); // Remove redundant insertions: @@ -17150,12 +17195,21 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // If this is variable insert to undef vector, it might be better to splat: // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... > if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { - SmallVector Ops(NumElts, InVal); - return DAG.getBuildVector(VT, DL, Ops); + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, InVal); + else { + SmallVector Ops(VT.getVectorNumElements(), InVal); + return DAG.getBuildVector(VT, DL, Ops); + } } return SDValue(); } + if (VT.isScalableVector()) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + // We must know which element is being inserted for folds below here. unsigned Elt = IndexC->getZExtValue(); if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 7ffc848651899..d117c6032d3c1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1290,7 +1290,7 @@ bool FastISel::lowerCall(const CallInst *CI) { IsTailCall = false; CallLoweringInfo CLI; - CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), *CI) + CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI) .setTailCall(IsTailCall); return lowerCallTo(CLI); @@ -1300,7 +1300,7 @@ bool FastISel::selectCall(const User *I) { const CallInst *Call = cast(I); // Handle simple inline asms. - if (const InlineAsm *IA = dyn_cast(Call->getCalledValue())) { + if (const InlineAsm *IA = dyn_cast(Call->getCalledOperand())) { // If the inline asm has side effects, then make sure that no local value // lives across by flushing the local value map. if (IA->hasSideEffects()) diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 9b97d5990cc84..3f302d5fa0ca6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -183,7 +183,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Look for inline asm that clobbers the SP register. if (auto *Call = dyn_cast(&I)) { - if (isa(Call->getCalledValue())) { + if (Call->isInlineAsm()) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); std::vector Ops = diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index ef994b3f10ba0..8f746ec45f6c6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -702,131 +702,7 @@ void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, std::pair VectorLegalizer::ExpandLoad(SDNode *N) { LoadSDNode *LD = cast(N); - - EVT SrcVT = LD->getMemoryVT(); - EVT SrcEltVT = SrcVT.getScalarType(); - unsigned NumElem = SrcVT.getVectorNumElements(); - - SDValue NewChain; - SDValue Value; - if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { - SDLoc dl(N); - - SmallVector Vals; - SmallVector LoadChains; - - EVT DstEltVT = LD->getValueType(0).getScalarType(); - SDValue Chain = LD->getChain(); - SDValue BasePTR = LD->getBasePtr(); - ISD::LoadExtType ExtType = LD->getExtensionType(); - - // When elements in a vector is not byte-addressable, we cannot directly - // load each element by advancing pointer, which could only address bytes. - // Instead, we load all significant words, mask bits off, and concatenate - // them to form each element. Finally, they are extended to destination - // scalar type to build the destination vector. - EVT WideVT = TLI.getPointerTy(DAG.getDataLayout()); - - assert(WideVT.isRound() && - "Could not handle the sophisticated case when the widest integer is" - " not power of 2."); - assert(WideVT.bitsGE(SrcEltVT) && - "Type is not legalized?"); - - unsigned WideBytes = WideVT.getStoreSize(); - unsigned Offset = 0; - unsigned RemainingBytes = SrcVT.getStoreSize(); - SmallVector LoadVals; - while (RemainingBytes > 0) { - SDValue ScalarLoad; - unsigned LoadBytes = WideBytes; - - if (RemainingBytes >= LoadBytes) { - ScalarLoad = DAG.getLoad( - WideVT, dl, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), LD->getOriginalAlign(), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); - } else { - EVT LoadVT = WideVT; - while (RemainingBytes < LoadBytes) { - LoadBytes >>= 1; // Reduce the load size by half. - LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); - } - ScalarLoad = - DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), LoadVT, - LD->getOriginalAlign(), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); - } - - RemainingBytes -= LoadBytes; - Offset += LoadBytes; - - BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes); - - LoadVals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); - } - - unsigned BitOffset = 0; - unsigned WideIdx = 0; - unsigned WideBits = WideVT.getSizeInBits(); - - // Extract bits, pack and extend/trunc them into destination type. - unsigned SrcEltBits = SrcEltVT.getSizeInBits(); - SDValue SrcEltBitMask = DAG.getConstant( - APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT); - - for (unsigned Idx = 0; Idx != NumElem; ++Idx) { - assert(BitOffset < WideBits && "Unexpected offset!"); - - SDValue ShAmt = DAG.getConstant( - BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); - SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); - - BitOffset += SrcEltBits; - if (BitOffset >= WideBits) { - WideIdx++; - BitOffset -= WideBits; - if (BitOffset > 0) { - ShAmt = DAG.getConstant( - SrcEltBits - BitOffset, dl, - TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); - SDValue Hi = - DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); - Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); - } - } - - Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); - - switch (ExtType) { - default: llvm_unreachable("Unknown extended-load op!"); - case ISD::EXTLOAD: - Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); - break; - case ISD::ZEXTLOAD: - Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); - break; - case ISD::SEXTLOAD: - ShAmt = - DAG.getConstant(WideBits - SrcEltBits, dl, - TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); - Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); - Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); - Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); - break; - } - Vals.push_back(Lo); - } - - NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); - Value = DAG.getBuildVector(N->getValueType(0), dl, Vals); - } else { - std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); - } - - return std::make_pair(Value, NewChain); + return TLI.scalarizeVectorLoad(LD, DAG); } SDValue VectorLegalizer::ExpandStore(SDNode *N) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 916e1d3345b03..e5f522aa9e481 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2276,15 +2276,42 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, } /// isSplatValue - Return true if the vector V has the same value -/// across all DemandedElts. +/// across all DemandedElts. For scalable vectors it does not make +/// sense to specify which elements are demanded or undefined, therefore +/// they are simply ignored. bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts) { - if (!DemandedElts) - return false; // No demanded elts, better to assume we don't know anything. - EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); + if (!VT.isScalableVector() && !DemandedElts) + return false; // No demanded elts, better to assume we don't know anything. + + // Deal with some common cases here that work for both fixed and scalable + // vector types. + switch (V.getOpcode()) { + case ISD::SPLAT_VECTOR: + return true; + case ISD::ADD: + case ISD::SUB: + case ISD::AND: { + APInt UndefLHS, UndefRHS; + SDValue LHS = V.getOperand(0); + SDValue RHS = V.getOperand(1); + if (isSplatValue(LHS, DemandedElts, UndefLHS) && + isSplatValue(RHS, DemandedElts, UndefRHS)) { + UndefElts = UndefLHS | UndefRHS; + return true; + } + break; + } + } + + // We don't support other cases than those above for scalable vectors at + // the moment. + if (VT.isScalableVector()) + return false; + unsigned NumElts = VT.getVectorNumElements(); assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); UndefElts = APInt::getNullValue(NumElts); @@ -2341,19 +2368,6 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, } break; } - case ISD::ADD: - case ISD::SUB: - case ISD::AND: { - APInt UndefLHS, UndefRHS; - SDValue LHS = V.getOperand(0); - SDValue RHS = V.getOperand(1); - if (isSplatValue(LHS, DemandedElts, UndefLHS) && - isSplatValue(RHS, DemandedElts, UndefRHS)) { - UndefElts = UndefLHS | UndefRHS; - return true; - } - break; - } } return false; @@ -2363,10 +2377,13 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); - unsigned NumElts = VT.getVectorNumElements(); APInt UndefElts; - APInt DemandedElts = APInt::getAllOnesValue(NumElts); + APInt DemandedElts; + + // For now we don't support this with scalable vectors. + if (!VT.isScalableVector()) + DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); return isSplatValue(V, DemandedElts, UndefElts) && (AllowUndefs || !UndefElts); } @@ -2379,19 +2396,35 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { switch (Opcode) { default: { APInt UndefElts; - APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + APInt DemandedElts; + + if (!VT.isScalableVector()) + DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + if (isSplatValue(V, DemandedElts, UndefElts)) { - // Handle case where all demanded elements are UNDEF. - if (DemandedElts.isSubsetOf(UndefElts)) { + if (VT.isScalableVector()) { + // DemandedElts and UndefElts are ignored for scalable vectors, since + // the only supported cases are SPLAT_VECTOR nodes. SplatIdx = 0; - return getUNDEF(VT); + } else { + // Handle case where all demanded elements are UNDEF. + if (DemandedElts.isSubsetOf(UndefElts)) { + SplatIdx = 0; + return getUNDEF(VT); + } + SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); } - SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); return V; } break; } + case ISD::SPLAT_VECTOR: + SplatIdx = 0; + return V; case ISD::VECTOR_SHUFFLE: { + if (VT.isScalableVector()) + return SDValue(); + // Check if this is a shuffle node doing a splat. // TODO - remove this and rely purely on SelectionDAG::isSplatValue, // getTargetVShiftNode currently struggles without the splat source. @@ -4240,6 +4273,8 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, SelectionDAG &DAG) { int NumOps = Ops.size(); assert(NumOps != 0 && "Can't build an empty vector!"); + assert(!VT.isScalableVector() && + "BUILD_VECTOR cannot be used with scalable types"); assert(VT.getVectorNumElements() == (unsigned)NumOps && "Incorrect element count in BUILD_VECTOR!"); @@ -5613,8 +5648,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, llvm_unreachable("should use getVectorShuffle constructor!"); case ISD::INSERT_VECTOR_ELT: { ConstantSDNode *N3C = dyn_cast(N3); - // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF - if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except + // for scalable vectors where we will generate appropriate code to + // deal with out-of-bounds cases correctly. + if (N3C && N1.getValueType().isFixedLengthVector() && + N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) return getUNDEF(VT); // Undefined index can be assumed out-of-bounds, so that's UNDEF too. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 51539f3818870..944aeab5e1eaf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -346,7 +346,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, const char *AsmError = ", possible invalid constraint for vector type"; if (const CallInst *CI = dyn_cast(I)) - if (isa(CI->getCalledValue())) + if (CI->isInlineAsm()) return Ctx.emitError(I, ErrMsg + AsmError); return Ctx.emitError(I, ErrMsg); @@ -1552,16 +1552,17 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return DAG.getBlockAddress(BA, VT); VectorType *VecTy = cast(V->getType()); - unsigned NumElements = VecTy->getNumElements(); // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. - SmallVector Ops; if (const ConstantVector *CV = dyn_cast(C)) { + SmallVector Ops; + unsigned NumElements = cast(VecTy)->getNumElements(); for (unsigned i = 0; i != NumElements; ++i) Ops.push_back(getValue(CV->getOperand(i))); - } else { - assert(isa(C) && "Unknown vector constant!"); + + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + } else if (isa(C)) { EVT EltVT = TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); @@ -1570,11 +1571,16 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else Op = DAG.getConstant(0, getCurSDLoc(), EltVT); - Ops.assign(NumElements, Op); - } - // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + if (isa(VecTy)) + return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op); + else { + SmallVector Ops; + Ops.assign(cast(VecTy)->getNumElements(), Op); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + } + } + llvm_unreachable("Unknown vector constant"); } // If this is a static alloca, generate it as the frameindex instead of @@ -1827,6 +1833,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { unsigned NumValues = ValueVTs.size(); SmallVector Chains(NumValues); + Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType()); for (unsigned i = 0; i != NumValues; ++i) { // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. @@ -1835,9 +1842,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); - Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, + Chains[i] = DAG.getStore( + Chain, getCurSDLoc(), Val, // FIXME: better loc info would be nice. - Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), + commonAlignment(BaseAlign, Offsets[i])); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -2770,7 +2779,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { LLVMContext::OB_cfguardtarget}) && "Cannot lower invokes with arbitrary operand bundles yet!"); - const Value *Callee(I.getCalledValue()); + const Value *Callee(I.getCalledOperand()); const Function *Fn = dyn_cast(Callee); if (isa(Callee)) visitInlineAsm(I); @@ -2850,8 +2859,7 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && "Cannot lower callbrs with arbitrary operand bundles yet!"); - assert(isa(I.getCalledValue()) && - "Only know how to handle inlineasm callbr"); + assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr"); visitInlineAsm(I); CopyToExportRegsIfNeeded(&I); @@ -7470,7 +7478,7 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. - if (isa(I.getCalledValue())) { + if (I.isInlineAsm()) { visitInlineAsm(I); return; } @@ -7642,7 +7650,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LLVMContext::OB_cfguardtarget}) && "Cannot lower calls with arbitrary operand bundles!"); - SDValue Callee = getValue(I.getCalledValue()); + SDValue Callee = getValue(I.getCalledOperand()); if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); @@ -7943,7 +7951,7 @@ class ExtraFlags { public: explicit ExtraFlags(const CallBase &Call) { - const InlineAsm *IA = cast(Call.getCalledValue()); + const InlineAsm *IA = cast(Call.getCalledOperand()); if (IA->hasSideEffects()) Flags |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) @@ -7976,7 +7984,7 @@ class ExtraFlags { /// visitInlineAsm - Handle a call to an InlineAsm object. void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) { - const InlineAsm *IA = cast(Call.getCalledValue()); + const InlineAsm *IA = cast(Call.getCalledOperand()); /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; @@ -8650,7 +8658,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { SmallVector Ops; SDLoc DL = getCurSDLoc(); - Callee = getValue(CI.getCalledValue()); + Callee = getValue(CI.getCalledOperand()); NullPtr = DAG.getIntPtrConstant(0, DL, true); // The stackmap intrinsic only records the live variables (the arguments @@ -9266,6 +9274,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); + MachineFunction &MF = CLI.DAG.getMachineFunction(); + Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx); for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, @@ -9274,7 +9284,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), DemoteStackIdx, Offsets[i]), - /* Alignment = */ 1); + HiddenSRetAlign); ReturnValues[i] = L; Chains[i] = L.getValue(1); } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8a46d08852f13..1975f0dde30da 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2416,6 +2416,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; unsigned SubIdx = Idx.getZExtValue(); APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); + if (!SubElts) + return TLO.CombineTo(Op, Base); APInt SubUndef, SubZero; if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO, Depth + 1)) @@ -2436,6 +2438,22 @@ bool TargetLowering::SimplifyDemandedVectorElts( return true; KnownUndef.insertBits(SubUndef, SubIdx); KnownZero.insertBits(SubZero, SubIdx); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!BaseElts.isAllOnesValue() || !SubElts.isAllOnesValue()) { + APInt DemandedBits = APInt::getAllOnesValue(VT.getScalarSizeInBits()); + SDValue NewBase = SimplifyMultipleUseDemandedBits( + Base, DemandedBits, BaseElts, TLO.DAG, Depth + 1); + SDValue NewSub = SimplifyMultipleUseDemandedBits( + Sub, DemandedBits, SubElts, TLO.DAG, Depth + 1); + if (NewBase || NewSub) { + NewBase = NewBase ? NewBase : Base; + NewSub = NewSub ? NewSub : Sub; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewBase, + NewSub, Op.getOperand(2)); + return TLO.CombineTo(Op, NewOp); + } + } break; } case ISD::EXTRACT_SUBVECTOR: { @@ -2452,6 +2470,18 @@ bool TargetLowering::SimplifyDemandedVectorElts( return true; KnownUndef = SrcUndef.extractBits(NumElts, Idx); KnownZero = SrcZero.extractBits(NumElts, Idx); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedElts.isAllOnesValue()) { + APInt DemandedBits = APInt::getAllOnesValue(VT.getScalarSizeInBits()); + SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, DemandedBits, SrcElts, TLO.DAG, Depth + 1); + if (NewSrc) { + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc, + Op.getOperand(1)); + return TLO.CombineTo(Op, NewOp); + } + } } break; } @@ -4321,7 +4351,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, const CallBase &Call) const { /// Information about all of the constraints. AsmOperandInfoVector ConstraintOperands; - const InlineAsm *IA = cast(Call.getCalledValue()); + const InlineAsm *IA = cast(Call.getCalledOperand()); unsigned maCount = 0; // Largest number of multiple alternative constraints. // Do a prepass over the constraints, canonicalizing them, and building up the @@ -6590,27 +6620,40 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, // elements that are byte-sized must therefore be stored as an integer // built out of the extracted vector elements. if (!SrcEltVT.isByteSized()) { - unsigned NumBits = SrcVT.getSizeInBits(); - EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + unsigned NumLoadBits = SrcVT.getStoreSizeInBits(); + EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits); + + unsigned NumSrcBits = SrcVT.getSizeInBits(); + EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits); - SDValue Load = DAG.getLoad(IntVT, SL, Chain, BasePTR, LD->getPointerInfo(), - LD->getAlignment(), - LD->getMemOperand()->getFlags(), - LD->getAAInfo()); + unsigned SrcEltBits = SrcEltVT.getSizeInBits(); + SDValue SrcEltBitMask = DAG.getConstant( + APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT); + + // Load the whole vector and avoid masking off the top bits as it makes + // the codegen worse. + SDValue Load = + DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR, + LD->getPointerInfo(), SrcIntVT, LD->getAlignment(), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); SmallVector Vals; for (unsigned Idx = 0; Idx < NumElem; ++Idx) { unsigned ShiftIntoIdx = (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx); SDValue ShiftAmount = - DAG.getConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(), SL, IntVT); - SDValue ShiftedElt = - DAG.getNode(ISD::SRL, SL, IntVT, Load, ShiftAmount); - SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, ShiftedElt); + DAG.getConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(), SL, + getShiftAmountTy(LoadVT, DAG.getDataLayout())); + SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount); + SDValue Elt = + DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask); + SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt); + if (ExtType != ISD::NON_EXTLOAD) { unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType); Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar); } + Vals.push_back(Scalar); } diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 0c91cc166f57c..54913175f167e 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -104,14 +105,14 @@ unsigned TargetInstrInfo::getInlineAsmLength( AtInsnStart = false; } - if (AtInsnStart && !std::isspace(static_cast(*Str))) { + if (AtInsnStart && !isSpace(static_cast(*Str))) { unsigned AddLength = MaxInstLength; if (strncmp(Str, ".space", 6) == 0) { char *EStr; int SpaceSize; SpaceSize = strtol(Str + 6, &EStr, 10); SpaceSize = SpaceSize < 0 ? 0 : SpaceSize; - while (*EStr != '\n' && std::isspace(static_cast(*EStr))) + while (*EStr != '\n' && isSpace(static_cast(*EStr))) ++EStr; if (*EStr == '\0' || *EStr == '\n' || isAsmComment(EStr, MAI)) // Successfully parsed .space argument diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index a2dfa0a5734f1..a9b1455d13ab3 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -2131,15 +2131,19 @@ XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal( case GlobalValue::CommonLinkage: return XCOFF::C_EXT; case GlobalValue::ExternalWeakLinkage: + case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: return XCOFF::C_WEAKEXT; case GlobalValue::AppendingLinkage: report_fatal_error( "There is no mapping that implements AppendingLinkage for XCOFF."); - default: - report_fatal_error( - "Unhandled linkage when mapping linkage to StorageClass."); + case GlobalValue::AvailableExternallyLinkage: + report_fatal_error("unhandled AvailableExternallyLinkage when mapping " + "linkage to StorageClass"); } + llvm_unreachable("Unknown linkage type!"); } MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor( diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp index b7e9005736124..44f4fe2ff9b1f 100644 --- a/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -358,9 +358,9 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality, Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr; for (auto &U : FPI->uses()) { if (auto *CI = dyn_cast(U.getUser())) { - if (CI->getCalledValue() == GetExnF) + if (CI->getCalledOperand() == GetExnF) GetExnCI = CI; - if (CI->getCalledValue() == GetSelectorF) + if (CI->getCalledOperand() == GetSelectorF) GetSelectorCI = CI; } } diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp index d6c049f3d0dae..c9c1bf84ab30b 100644 --- a/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -234,6 +234,9 @@ static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB, return CleanupPad->getParent(); } +// Starting from a EHPad, Backward walk through control-flow graph +// to produce two primary outputs: +// FuncInfo.EHPadStateMap[] and FuncInfo.CxxUnwindMap[] static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo, const Instruction *FirstNonPHI, int ParentState) { @@ -336,6 +339,9 @@ static int addSEHFinally(WinEHFuncInfo &FuncInfo, int ParentState, return FuncInfo.SEHUnwindMap.size() - 1; } +// Starting from a EHPad, Backward walk through control-flow graph +// to produce two primary outputs: +// FuncInfo.EHPadStateMap[] and FuncInfo.SEHUnwindMap[] static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo, const Instruction *FirstNonPHI, int ParentState) { @@ -955,7 +961,7 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) { // Skip call sites which are nounwind intrinsics or inline asm. auto *CalledFn = - dyn_cast(CB->getCalledValue()->stripPointerCasts()); + dyn_cast(CB->getCalledOperand()->stripPointerCasts()); if (CalledFn && ((CalledFn->isIntrinsic() && CB->doesNotThrow()) || CB->isInlineAsm())) continue; diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp index 6444af9046b8d..2c3e2023bced8 100644 --- a/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp @@ -160,16 +160,17 @@ bool DWARFLinker::DIECloner::getDIENames(const DWARFDie &Die, if (Die.getTag() == dwarf::DW_TAG_lexical_block) return false; - // FIXME: a bit wasteful as the first getName might return the - // short name. if (!Info.MangledName) - if (const char *MangledName = Die.getName(DINameKind::LinkageName)) + if (const char *MangledName = Die.getLinkageName()) Info.MangledName = StringPool.getEntry(MangledName); if (!Info.Name) - if (const char *Name = Die.getName(DINameKind::ShortName)) + if (const char *Name = Die.getShortName()) Info.Name = StringPool.getEntry(Name); + if (!Info.MangledName) + Info.MangledName = Info.Name; + if (StripTemplate && Info.Name && Info.MangledName != Info.Name) { StringRef Name = Info.Name.getString(); if (Optional StrippedName = StripTemplateParameters(Name)) diff --git a/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp b/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp index 077fd4494241d..c9a5da6676b35 100644 --- a/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp @@ -80,8 +80,12 @@ PointerIntPair DeclContextTree::getChildDeclContext( break; } - const char *Name = DIE.getName(DINameKind::LinkageName); - const char *ShortName = DIE.getName(DINameKind::ShortName); + const char *Name = DIE.getLinkageName(); + const char *ShortName = DIE.getShortName(); + + if (!Name) + Name = ShortName; + StringRef NameRef; StringRef ShortNameRef; StringRef FileRef; diff --git a/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp index 0f704f286ee9d..3c8a30101450d 100644 --- a/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp +++ b/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp @@ -23,13 +23,11 @@ using namespace llvm::codeview; DebugSubsectionRecord::DebugSubsectionRecord() = default; DebugSubsectionRecord::DebugSubsectionRecord(DebugSubsectionKind Kind, - BinaryStreamRef Data, - CodeViewContainer Container) - : Container(Container), Kind(Kind), Data(Data) {} + BinaryStreamRef Data) + : Kind(Kind), Data(Data) {} Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream, - DebugSubsectionRecord &Info, - CodeViewContainer Container) { + DebugSubsectionRecord &Info) { const DebugSubsectionHeader *Header; BinaryStreamReader Reader(Stream); if (auto EC = Reader.readObject(Header)) @@ -39,7 +37,6 @@ Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream, static_cast(uint32_t(Header->Kind)); if (auto EC = Reader.readStreamRef(Info.Data, Header->Length)) return EC; - Info.Container = Container; Info.Kind = Kind; return Error::success(); } @@ -53,14 +50,14 @@ DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; } BinaryStreamRef DebugSubsectionRecord::getRecordData() const { return Data; } DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( - std::shared_ptr Subsection, CodeViewContainer Container) - : Subsection(std::move(Subsection)), Container(Container) {} + std::shared_ptr Subsection) + : Subsection(std::move(Subsection)) {} DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( - const DebugSubsectionRecord &Contents, CodeViewContainer Container) - : Contents(Contents), Container(Container) {} + const DebugSubsectionRecord &Contents) + : Contents(Contents) {} -uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() { +uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() const { uint32_t DataSize = Subsection ? Subsection->calculateSerializedSize() : Contents.getRecordData().getLength(); // The length of the entire subsection is always padded to 4 bytes, @@ -68,7 +65,8 @@ uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() { return sizeof(DebugSubsectionHeader) + alignTo(DataSize, 4); } -Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) const { +Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer, + CodeViewContainer Container) const { assert(Writer.getOffset() % alignOf(Container) == 0 && "Debug Subsection not properly aligned"); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 134436c1ddbb2..f93f32917c337 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -356,7 +356,7 @@ DWARFDie::find(ArrayRef Attrs) const { Optional DWARFDie::findRecursively(ArrayRef Attrs) const { - std::vector Worklist; + SmallVector Worklist; Worklist.push_back(*this); // Keep track if DIEs already seen to prevent infinite recursion. @@ -531,14 +531,26 @@ const char *DWARFDie::getName(DINameKind Kind) const { return nullptr; // Try to get mangled name only if it was asked for. if (Kind == DINameKind::LinkageName) { - if (auto Name = dwarf::toString( - findRecursively({DW_AT_MIPS_linkage_name, DW_AT_linkage_name}), - nullptr)) + if (auto Name = getLinkageName()) return Name; } - if (auto Name = dwarf::toString(findRecursively(DW_AT_name), nullptr)) - return Name; - return nullptr; + return getShortName(); +} + +const char *DWARFDie::getShortName() const { + if (!isValid()) + return nullptr; + + return dwarf::toString(findRecursively(dwarf::DW_AT_name), nullptr); +} + +const char *DWARFDie::getLinkageName() const { + if (!isValid()) + return nullptr; + + return dwarf::toString(findRecursively({dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_AT_linkage_name}), + nullptr); } uint64_t DWARFDie::getDeclLine() const { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index beca8cc0bc7df..c7d3974fd79aa 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -305,7 +305,7 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, Unit = TypeUnitVector.addUnit(std::make_unique( DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(), &DObj.getLocSection(), DObj.getStrSection(), - DObj.getStrOffsetsSection(), &DObj.getAppleObjCSection(), + DObj.getStrOffsetsSection(), &DObj.getAddrSection(), DObj.getLineSection(), DCtx.isLittleEndian(), false, TypeUnitVector)); break; @@ -319,7 +319,7 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, Unit = CompileUnitVector.addUnit(std::make_unique( DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(), &DObj.getLocSection(), DObj.getStrSection(), - DObj.getStrOffsetsSection(), &DObj.getAppleObjCSection(), + DObj.getStrOffsetsSection(), &DObj.getAddrSection(), DObj.getLineSection(), DCtx.isLittleEndian(), false, CompileUnitVector)); break; @@ -457,8 +457,15 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, case DW_AT_ranges: // Make sure the offset in the DW_AT_ranges attribute is valid. if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= DObj.getRangesSection().Data.size()) - ReportError("DW_AT_ranges offset is beyond .debug_ranges bounds:"); + unsigned DwarfVersion = Die.getDwarfUnit()->getVersion(); + const DWARFSection &RangeSection = DwarfVersion < 5 + ? DObj.getRangesSection() + : DObj.getRnglistsSection(); + if (*SectionOffset >= RangeSection.Data.size()) + ReportError( + "DW_AT_ranges offset is beyond " + + StringRef(DwarfVersion < 5 ? ".debug_ranges" : ".debug_rnglists") + + " bounds: " + llvm::formatv("{0:x8}", *SectionOffset)); break; } ReportError("DIE has invalid DW_AT_ranges encoding:"); diff --git a/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp b/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp index eb392dd83d6ac..ad35aefe77748 100644 --- a/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp @@ -82,9 +82,18 @@ llvm::Error ObjectFileTransformer::convert(const object::ObjectFile &Obj, size_t NumBefore = Gsym.getNumFunctionInfos(); for (const object::SymbolRef &Sym : Obj.symbols()) { Expected SymType = Sym.getType(); - const uint64_t Addr = Sym.getValue(); - if (!SymType || SymType.get() != SymbolRef::Type::ST_Function || - !Gsym.IsValidTextAddress(Addr) || Gsym.hasFunctionInfoForAddress(Addr)) + if (!SymType) { + consumeError(SymType.takeError()); + continue; + } + Expected AddrOrErr = Sym.getValue(); + if (!AddrOrErr) + // TODO: Test this error. + return AddrOrErr.takeError(); + + if (SymType.get() != SymbolRef::Type::ST_Function || + !Gsym.IsValidTextAddress(*AddrOrErr) || + Gsym.hasFunctionInfoForAddress(*AddrOrErr)) continue; // Function size for MachO files will be 0 constexpr bool NoCopy = false; @@ -98,8 +107,8 @@ llvm::Error ObjectFileTransformer::convert(const object::ObjectFile &Obj, // for mach-o files. if (IsMachO) Name->consume_front("_"); - Gsym.addFunctionInfo(FunctionInfo(Addr, size, - Gsym.insertString(*Name, NoCopy))); + Gsym.addFunctionInfo( + FunctionInfo(*AddrOrErr, size, Gsym.insertString(*Name, NoCopy))); } size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; Log << "Loaded " << FunctionsAddedCount << " functions from symbol table.\n"; diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 06bbbdefe2143..73801ea1dd1b2 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -89,8 +89,7 @@ void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) { uint32_t DbiModuleDescriptorBuilder::calculateC13DebugInfoSize() const { uint32_t Result = 0; for (const auto &Builder : C13Builders) { - assert(Builder && "Empty C13 Fragment Builder!"); - Result += Builder->calculateSerializedLength(); + Result += Builder.calculateSerializedLength(); } return Result; } @@ -163,8 +162,7 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, "Invalid debug section alignment!"); // TODO: Write C11 Line data for (const auto &Builder : C13Builders) { - assert(Builder && "Empty C13 Fragment Builder!"); - if (auto EC = Builder->commit(SymbolWriter)) + if (auto EC = Builder.commit(SymbolWriter, CodeViewContainer::Pdb)) return EC; } @@ -180,12 +178,10 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, void DbiModuleDescriptorBuilder::addDebugSubsection( std::shared_ptr Subsection) { assert(Subsection); - C13Builders.push_back(std::make_unique( - std::move(Subsection), CodeViewContainer::Pdb)); + C13Builders.push_back(DebugSubsectionRecordBuilder(std::move(Subsection))); } void DbiModuleDescriptorBuilder::addDebugSubsection( const DebugSubsectionRecord &SubsectionContents) { - C13Builders.push_back(std::make_unique( - SubsectionContents, CodeViewContainer::Pdb)); + C13Builders.push_back(DebugSubsectionRecordBuilder(SubsectionContents)); } diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp index 432f1e9b24d3a..d35c656e71724 100644 --- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp @@ -49,7 +49,6 @@ struct llvm::pdb::GSIHashStreamBuilder { }; std::vector Records; - uint32_t StreamIndex; llvm::DenseSet SymbolHashes; std::vector HashRecords; std::array HashBitmap; @@ -213,11 +212,12 @@ Error GSIStreamBuilder::finalizeMsfLayout() { Expected Idx = Msf.addStream(calculateGlobalsHashStreamSize()); if (!Idx) return Idx.takeError(); - GSH->StreamIndex = *Idx; + GlobalsStreamIndex = *Idx; + Idx = Msf.addStream(calculatePublicsHashStreamSize()); if (!Idx) return Idx.takeError(); - PSH->StreamIndex = *Idx; + PublicsStreamIndex = *Idx; uint32_t RecordBytes = GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize(); @@ -225,19 +225,34 @@ Error GSIStreamBuilder::finalizeMsfLayout() { Idx = Msf.addStream(RecordBytes); if (!Idx) return Idx.takeError(); - RecordStreamIdx = *Idx; + RecordStreamIndex = *Idx; return Error::success(); } -static bool comparePubSymByAddrAndName( - const std::pair &LS, - const std::pair &RS) { - if (LS.second->Segment != RS.second->Segment) - return LS.second->Segment < RS.second->Segment; - if (LS.second->Offset != RS.second->Offset) - return LS.second->Offset < RS.second->Offset; +static StringRef extractPubSym(const CVSymbol *Sym, uint16_t &Seg, + uint32_t &Offset) { + ArrayRef Buf = Sym->content(); + assert(Buf.size() > sizeof(PublicSym32Header)); + const auto *Hdr = reinterpret_cast(Buf.data()); + Buf = Buf.drop_front(sizeof(PublicSym32Header)); + Seg = Hdr->Segment; + Offset = Hdr->Offset; + // Don't worry about finding the null terminator, since the strings will be + // compared later. + return StringRef(reinterpret_cast(Buf.data()), Buf.size()); +} - return LS.second->Name < RS.second->Name; +static bool comparePubSymByAddrAndName(const CVSymbol *LS, const CVSymbol *RS) { + uint16_t LSeg, RSeg; + uint32_t LOff, ROff; + StringRef LName, RName; + LName = extractPubSym(LS, LSeg, LOff); + RName = extractPubSym(RS, RSeg, ROff); + if (LSeg != RSeg) + return LSeg < RSeg; + if (LOff != ROff) + return LOff < ROff; + return LName < RName; } /// Compute the address map. The address map is an array of symbol offsets @@ -246,19 +261,15 @@ static std::vector computeAddrMap(ArrayRef Records) { // Make a vector of pointers to the symbols so we can sort it by address. // Also gather the symbol offsets while we're at it. - std::vector DeserializedPublics; - std::vector> PublicsByAddr; + std::vector PublicsByAddr; std::vector SymOffsets; - DeserializedPublics.reserve(Records.size()); PublicsByAddr.reserve(Records.size()); SymOffsets.reserve(Records.size()); uint32_t SymOffset = 0; for (const CVSymbol &Sym : Records) { assert(Sym.kind() == SymbolKind::S_PUB32); - DeserializedPublics.push_back( - cantFail(SymbolDeserializer::deserializeAs(Sym))); - PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back()); + PublicsByAddr.push_back(&Sym); SymOffsets.push_back(SymOffset); SymOffset += Sym.length(); } @@ -267,22 +278,14 @@ static std::vector computeAddrMap(ArrayRef Records) { // Fill in the symbol offsets in the appropriate order. std::vector AddrMap; AddrMap.reserve(Records.size()); - for (auto &Sym : PublicsByAddr) { - ptrdiff_t Idx = std::distance(Records.data(), Sym.first); + for (const CVSymbol *Sym : PublicsByAddr) { + ptrdiff_t Idx = std::distance(Records.data(), Sym); assert(Idx >= 0 && size_t(Idx) < Records.size()); AddrMap.push_back(ulittle32_t(SymOffsets[Idx])); } return AddrMap; } -uint32_t GSIStreamBuilder::getPublicsStreamIndex() const { - return PSH->StreamIndex; -} - -uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const { - return GSH->StreamIndex; -} - void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) { PSH->addSymbol(Pub, Msf); } @@ -366,7 +369,7 @@ Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout, auto PS = WritableMappedBlockStream::createIndexedStream( Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator()); auto PRS = WritableMappedBlockStream::createIndexedStream( - Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator()); + Layout, Buffer, getRecordStreamIndex(), Msf.getAllocator()); if (auto EC = commitSymbolRecordStream(*PRS)) return EC; diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index d6f7e28b839ef..d6a187ebb9f6a 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -144,7 +144,7 @@ Error PDBFileBuilder::finalizeMsfLayout() { if (Dbi) { Dbi->setPublicsStreamIndex(Gsi->getPublicsStreamIndex()); Dbi->setGlobalsStreamIndex(Gsi->getGlobalsStreamIndex()); - Dbi->setSymbolRecordStreamIndex(Gsi->getRecordStreamIdx()); + Dbi->setSymbolRecordStreamIndex(Gsi->getRecordStreamIndex()); } } if (Tpi) { diff --git a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp index 46fc1bbe0a6b2..62e1ea6e0f0af 100644 --- a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -1167,7 +1167,7 @@ void Interpreter::visitCallBase(CallBase &I) { // To handle indirect calls, we must get the pointer value from the argument // and treat it as a function pointer. - GenericValue SRC = getOperandValue(SF.Caller->getCalledValue(), SF); + GenericValue SRC = getOperandValue(SF.Caller->getCalledOperand(), SF); callFunction((Function*)GVTOP(SRC), ArgVals); } diff --git a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt index d5a886b2f5a52..d34d1817d0e87 100644 --- a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt @@ -8,6 +8,14 @@ add_llvm_component_library(LLVMJITLink MachO_x86_64.cpp MachOLinkGraphBuilder.cpp + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/JITLink + DEPENDS intrinsics_gen ) + +target_link_libraries(LLVMJITLink + PRIVATE + LLVMObject +) diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h index 5345909464936..87e5e8bbc98d4 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h @@ -80,13 +80,13 @@ class JITLinkerBase { // For debug dumping of the link graph. virtual StringRef getEdgeKindName(Edge::Kind K) const = 0; - // Alight a JITTargetAddress to conform with block alignment requirements. + // Align a JITTargetAddress to conform with block alignment requirements. static JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) { uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment(); return Addr + Delta; } - // Alight a pointer to conform with block alignment requirements. + // Align a pointer to conform with block alignment requirements. static char *alignToBlock(char *P, Block &B) { uint64_t PAddr = static_cast(reinterpret_cast(P)); uint64_t Delta = (B.getAlignmentOffset() - PAddr) % B.getAlignment(); diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp index 2ef2f5e9409d7..b3e45868ab227 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp @@ -65,7 +65,7 @@ void jitLink_MachO(std::unique_ptr Ctx) { uint32_t CPUType; memcpy(&CPUType, Data.data() + 4, sizeof(uint32_t)); if (Magic == MachO::MH_CIGAM_64) - ByteSwap_32(CPUType); + CPUType = ByteSwap_32(CPUType); LLVM_DEBUG({ dbgs() << "jitLink_MachO: cputype = " << format("0x%08" PRIx32, CPUType) diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp index a8e88a9785c59..9e38dc36faae7 100644 --- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -179,21 +179,20 @@ CompileOnDemandLayer::getPerDylibResources(JITDylib &TargetD) { if (I == DylibResources.end()) { auto &ImplD = getExecutionSession().createBareJITDylib(TargetD.getName() + ".impl"); - JITDylibSearchOrder NewSearchOrder; - TargetD.withSearchOrderDo( - [&](const JITDylibSearchOrder &TargetSearchOrder) { - NewSearchOrder = TargetSearchOrder; - }); - - assert( - !NewSearchOrder.empty() && NewSearchOrder.front().first == &TargetD && - NewSearchOrder.front().second == JITDylibLookupFlags::MatchAllSymbols && - "TargetD must be at the front of its own search order and match " - "non-exported symbol"); - NewSearchOrder.insert(std::next(NewSearchOrder.begin()), - {&ImplD, JITDylibLookupFlags::MatchAllSymbols}); - ImplD.setSearchOrder(NewSearchOrder, false); - TargetD.setSearchOrder(std::move(NewSearchOrder), false); + JITDylibSearchOrder NewLinkOrder; + TargetD.withLinkOrderDo([&](const JITDylibSearchOrder &TargetLinkOrder) { + NewLinkOrder = TargetLinkOrder; + }); + + assert(!NewLinkOrder.empty() && NewLinkOrder.front().first == &TargetD && + NewLinkOrder.front().second == + JITDylibLookupFlags::MatchAllSymbols && + "TargetD must be at the front of its own search order and match " + "non-exported symbol"); + NewLinkOrder.insert(std::next(NewLinkOrder.begin()), + {&ImplD, JITDylibLookupFlags::MatchAllSymbols}); + ImplD.setLinkOrder(NewLinkOrder, false); + TargetD.setLinkOrder(std::move(NewLinkOrder), false); PerDylibResources PDR(ImplD, BuildIndirectStubsManager()); I = DylibResources.insert(std::make_pair(&TargetD, std::move(PDR))).first; diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 97755802bb24b..7aaf62141c89b 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -1157,30 +1157,29 @@ void JITDylib::notifyFailed(FailedSymbolsWorklist Worklist) { Q->handleFailed(make_error(FailedSymbolsMap)); } -void JITDylib::setSearchOrder(JITDylibSearchOrder NewSearchOrder, - bool SearchThisJITDylibFirst) { +void JITDylib::setLinkOrder(JITDylibSearchOrder NewLinkOrder, + bool LinkAgainstThisJITDylibFirst) { ES.runSessionLocked([&]() { - if (SearchThisJITDylibFirst) { - SearchOrder.clear(); - if (NewSearchOrder.empty() || NewSearchOrder.front().first != this) - SearchOrder.push_back( + if (LinkAgainstThisJITDylibFirst) { + LinkOrder.clear(); + if (NewLinkOrder.empty() || NewLinkOrder.front().first != this) + LinkOrder.push_back( std::make_pair(this, JITDylibLookupFlags::MatchAllSymbols)); - SearchOrder.insert(SearchOrder.end(), NewSearchOrder.begin(), - NewSearchOrder.end()); + LinkOrder.insert(LinkOrder.end(), NewLinkOrder.begin(), + NewLinkOrder.end()); } else - SearchOrder = std::move(NewSearchOrder); + LinkOrder = std::move(NewLinkOrder); }); } -void JITDylib::addToSearchOrder(JITDylib &JD, - JITDylibLookupFlags JDLookupFlags) { - ES.runSessionLocked([&]() { SearchOrder.push_back({&JD, JDLookupFlags}); }); +void JITDylib::addToLinkOrder(JITDylib &JD, JITDylibLookupFlags JDLookupFlags) { + ES.runSessionLocked([&]() { LinkOrder.push_back({&JD, JDLookupFlags}); }); } -void JITDylib::replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD, - JITDylibLookupFlags JDLookupFlags) { +void JITDylib::replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD, + JITDylibLookupFlags JDLookupFlags) { ES.runSessionLocked([&]() { - for (auto &KV : SearchOrder) + for (auto &KV : LinkOrder) if (KV.first == &OldJD) { KV = {&NewJD, JDLookupFlags}; break; @@ -1188,14 +1187,14 @@ void JITDylib::replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD, }); } -void JITDylib::removeFromSearchOrder(JITDylib &JD) { +void JITDylib::removeFromLinkOrder(JITDylib &JD) { ES.runSessionLocked([&]() { - auto I = std::find_if(SearchOrder.begin(), SearchOrder.end(), + auto I = std::find_if(LinkOrder.begin(), LinkOrder.end(), [&](const JITDylibSearchOrder::value_type &KV) { return KV.first == &JD; }); - if (I != SearchOrder.end()) - SearchOrder.erase(I); + if (I != LinkOrder.end()) + LinkOrder.erase(I); }); } @@ -1529,7 +1528,7 @@ void JITDylib::dump(raw_ostream &OS) { ES.runSessionLocked([&, this]() { OS << "JITDylib \"" << JITDylibName << "\" (ES: " << format("0x%016" PRIx64, reinterpret_cast(&ES)) << "):\n" - << "Search order: " << SearchOrder << "\n" + << "Link order: " << LinkOrder << "\n" << "Symbol table:\n"; for (auto &KV : Symbols) { @@ -1610,7 +1609,7 @@ JITDylib::MaterializingInfo::takeQueriesMeeting(SymbolState RequiredState) { JITDylib::JITDylib(ExecutionSession &ES, std::string Name) : ES(ES), JITDylibName(std::move(Name)) { - SearchOrder.push_back({this, JITDylibLookupFlags::MatchAllSymbols}); + LinkOrder.push_back({this, JITDylibLookupFlags::MatchAllSymbols}); } Error JITDylib::defineImpl(MaterializationUnit &MU) { diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 0ee1beef8bf14..c8d7b4d2db04d 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -374,8 +374,8 @@ class GenericLLVMIRPlatformSupport : public LLJIT::PlatformSupport { continue; Visited.insert(&NextJD); DFSLinkOrder.push_back(&NextJD); - NextJD.withSearchOrderDo([&](const JITDylibSearchOrder &SearchOrder) { - for (auto &KV : SearchOrder) + NextJD.withLinkOrderDo([&](const JITDylibSearchOrder &LinkOrder) { + for (auto &KV : LinkOrder) WorkStack.push_back(KV.first); }); } diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index c547c8ef7f150..15c3aa79a2a84 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -271,8 +271,8 @@ std::vector MachOPlatform::getDFSLinkOrder(JITDylib &JD) { continue; Visited.insert(NextJD); Result.push_back(NextJD); - NextJD->withSearchOrderDo([&](const JITDylibSearchOrder &SO) { - for (auto &KV : SO) + NextJD->withLinkOrderDo([&](const JITDylibSearchOrder &LO) { + for (auto &KV : LO) WorkStack.push_back(KV.first); }); } diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 83d3f676dc495..02066b458dfce 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -50,9 +50,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { void lookup(const LookupMap &Symbols, std::unique_ptr LC) override { - JITDylibSearchOrder SearchOrder; - MR.getTargetJITDylib().withSearchOrderDo( - [&](const JITDylibSearchOrder &O) { SearchOrder = O; }); + JITDylibSearchOrder LinkOrder; + MR.getTargetJITDylib().withLinkOrderDo( + [&](const JITDylibSearchOrder &LO) { LinkOrder = LO; }); auto &ES = Layer.getExecutionSession(); @@ -90,7 +90,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { MR.addDependencies(KV.first, InternalDeps); } - ES.lookup(LookupKind::Static, SearchOrder, std::move(LookupSet), + ES.lookup(LookupKind::Static, LinkOrder, std::move(LookupSet), SymbolState::Resolved, std::move(OnResolve), [this](const SymbolDependenceMap &Deps) { registerDependencies(Deps); diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp index 5ded369f3bd39..21925726072e3 100644 --- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp @@ -47,10 +47,10 @@ class JITDylibSearchOrderResolver : public JITSymbolResolver { MR.addDependenciesForAll(Deps); }; - JITDylibSearchOrder SearchOrder; - MR.getTargetJITDylib().withSearchOrderDo( - [&](const JITDylibSearchOrder &JDs) { SearchOrder = JDs; }); - ES.lookup(LookupKind::Static, SearchOrder, InternedSymbols, + JITDylibSearchOrder LinkOrder; + MR.getTargetJITDylib().withLinkOrderDo( + [&](const JITDylibSearchOrder &LO) { LinkOrder = LO; }); + ES.lookup(LookupKind::Static, LinkOrder, InternedSymbols, SymbolState::Resolved, std::move(OnResolvedWithUnwrap), RegisterDependencies); } diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp index da268efbeda0a..1d8f1ac8ac8af 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp @@ -76,7 +76,7 @@ RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) { uint64_t RuntimeDyldCOFF::getSymbolOffset(const SymbolRef &Sym) { // The value in a relocatable COFF object is the offset. - return Sym.getValue(); + return cantFail(Sym.getValue()); } uint64_t RuntimeDyldCOFF::getDLLImportOffset(unsigned SectionID, StubMap &Stubs, diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index 9ba67589e4b23..e5e512672daa1 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -9,6 +9,7 @@ #include "llvm/ExecutionEngine/RuntimeDyldChecker.h" #include "RuntimeDyldCheckerImpl.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" @@ -708,7 +709,7 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix, const char *LineStart = MemBuf->getBufferStart(); // Eat whitespace. - while (LineStart != MemBuf->getBufferEnd() && std::isspace(*LineStart)) + while (LineStart != MemBuf->getBufferEnd() && isSpace(*LineStart)) ++LineStart; while (LineStart != MemBuf->getBufferEnd() && *LineStart != '\0') { @@ -734,7 +735,7 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix, // Eat whitespace. LineStart = LineEnd; - while (LineStart != MemBuf->getBufferEnd() && std::isspace(*LineStart)) + while (LineStart != MemBuf->getBufferEnd() && isSpace(*LineStart)) ++LineStart; } return DidAllTestsPass && (NumRules != 0); diff --git a/llvm/lib/IR/AbstractCallSite.cpp b/llvm/lib/IR/AbstractCallSite.cpp index 1f7371607b880..6504e566ba4bb 100644 --- a/llvm/lib/IR/AbstractCallSite.cpp +++ b/llvm/lib/IR/AbstractCallSite.cpp @@ -64,7 +64,7 @@ AbstractCallSite::AbstractCallSite(const Use *U) // This happens by updating the use @p U to the use of the constant // cast expression and afterwards re-initializing CB accordingly. if (ConstantExpr *CE = dyn_cast(U->getUser())) - if (CE->getNumUses() == 1 && CE->isCast()) { + if (CE->hasOneUse() && CE->isCast()) { U = &*CE->use_begin(); CB = dyn_cast(U->getUser()); } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index b7d8287a9feaa..1afc5c1fd928d 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -3905,7 +3905,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { PrintCallingConv(CI->getCallingConv(), Out); } - Operand = CI->getCalledValue(); + Operand = CI->getCalledOperand(); FunctionType *FTy = CI->getFunctionType(); Type *RetTy = FTy->getReturnType(); const AttributeList &PAL = CI->getAttributes(); @@ -3944,7 +3944,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperandBundles(CI); } else if (const InvokeInst *II = dyn_cast(&I)) { - Operand = II->getCalledValue(); + Operand = II->getCalledOperand(); FunctionType *FTy = II->getFunctionType(); Type *RetTy = FTy->getReturnType(); const AttributeList &PAL = II->getAttributes(); @@ -3987,7 +3987,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << " unwind "; writeOperand(II->getUnwindDest(), true); } else if (const CallBrInst *CBI = dyn_cast(&I)) { - Operand = CBI->getCalledValue(); + Operand = CBI->getCalledOperand(); FunctionType *FTy = CBI->getFunctionType(); Type *RetTy = FTy->getReturnType(); const AttributeList &PAL = CBI->getAttributes(); @@ -4197,9 +4197,16 @@ void AssemblyWriter::writeAttribute(const Attribute &Attr, bool InAttrGroup) { return; } - assert(Attr.hasAttribute(Attribute::ByVal) && "unexpected type attr"); + assert((Attr.hasAttribute(Attribute::ByVal) || + Attr.hasAttribute(Attribute::Preallocated)) && + "unexpected type attr"); + + if (Attr.hasAttribute(Attribute::ByVal)) { + Out << "byval"; + } else { + Out << "preallocated"; + } - Out << "byval"; if (Type *Ty = Attr.getValueAsType()) { Out << '('; TypePrinter.print(Ty, Out); diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h index 44c35ca360456..46eb4a1660280 100644 --- a/llvm/lib/IR/AttributeImpl.h +++ b/llvm/lib/IR/AttributeImpl.h @@ -54,8 +54,6 @@ class AttributeImpl : public FoldingSetNode { AttributeImpl(const AttributeImpl &) = delete; AttributeImpl &operator=(const AttributeImpl &) = delete; - virtual ~AttributeImpl(); - bool isEnumAttribute() const { return KindID == EnumAttrEntry; } bool isIntAttribute() const { return KindID == IntAttrEntry; } bool isStringAttribute() const { return KindID == StringAttrEntry; } @@ -104,6 +102,9 @@ class AttributeImpl : public FoldingSetNode { } }; +static_assert(std::is_trivially_destructible::value, + "AttributeImpl should be trivially destructible"); + //===----------------------------------------------------------------------===// /// \class /// A set of classes that contain the value of the @@ -112,8 +113,6 @@ class AttributeImpl : public FoldingSetNode { /// attribute enties, which are for target-dependent attributes. class EnumAttributeImpl : public AttributeImpl { - virtual void anchor(); - Attribute::AttrKind Kind; protected: @@ -130,8 +129,6 @@ class EnumAttributeImpl : public AttributeImpl { class IntAttributeImpl : public EnumAttributeImpl { uint64_t Val; - void anchor() override; - public: IntAttributeImpl(Attribute::AttrKind Kind, uint64_t Val) : EnumAttributeImpl(IntAttrEntry, Kind), Val(Val) { @@ -142,24 +139,43 @@ class IntAttributeImpl : public EnumAttributeImpl { uint64_t getValue() const { return Val; } }; -class StringAttributeImpl : public AttributeImpl { - virtual void anchor(); +class StringAttributeImpl final + : public AttributeImpl, + private TrailingObjects { + friend TrailingObjects; - std::string Kind; - std::string Val; + unsigned KindSize; + unsigned ValSize; + size_t numTrailingObjects(OverloadToken) const { + return KindSize + 1 + ValSize + 1; + } public: StringAttributeImpl(StringRef Kind, StringRef Val = StringRef()) - : AttributeImpl(StringAttrEntry), Kind(std::string(Kind)), - Val(std::string(Val)) {} + : AttributeImpl(StringAttrEntry), KindSize(Kind.size()), + ValSize(Val.size()) { + char *TrailingString = getTrailingObjects(); + // Some users rely on zero-termination. + llvm::copy(Kind, TrailingString); + TrailingString[KindSize] = '\0'; + llvm::copy(Val, &TrailingString[KindSize + 1]); + TrailingString[KindSize + 1 + ValSize] = '\0'; + } + + StringRef getStringKind() const { + return StringRef(getTrailingObjects(), KindSize); + } + StringRef getStringValue() const { + return StringRef(getTrailingObjects() + KindSize + 1, ValSize); + } - StringRef getStringKind() const { return Kind; } - StringRef getStringValue() const { return Val; } + static size_t totalSizeToAlloc(StringRef Kind, StringRef Val) { + return TrailingObjects::totalSizeToAlloc(Kind.size() + 1 + + Val.size() + 1); + } }; class TypeAttributeImpl : public EnumAttributeImpl { - void anchor() override; - Type *Ty; public: @@ -220,6 +236,7 @@ class AttributeSetNode final std::pair> getAllocSizeArgs() const; std::string getAsString(bool InAttrGrp) const; Type *getByValType() const; + Type *getPreallocatedType() const; using iterator = const Attribute *; @@ -249,7 +266,6 @@ class AttributeListImpl final friend TrailingObjects; private: - LLVMContext &Context; unsigned NumAttrSets; ///< Number of entries in this set. /// Bitset with a bit for each available attribute Attribute::AttrKind. uint8_t AvailableFunctionAttrs[12] = {}; @@ -258,17 +274,12 @@ class AttributeListImpl final size_t numTrailingObjects(OverloadToken) { return NumAttrSets; } public: - AttributeListImpl(LLVMContext &C, ArrayRef Sets); + AttributeListImpl(ArrayRef Sets); // AttributesSetImpt is uniqued, these should not be available. AttributeListImpl(const AttributeListImpl &) = delete; AttributeListImpl &operator=(const AttributeListImpl &) = delete; - void operator delete(void *p) { ::operator delete(p); } - - /// Get the context that created this AttributeListImpl. - LLVMContext &getContext() { return Context; } - /// Return true if the AttributeSet or the FunctionIndex has an /// enum attribute of the given kind. bool hasFnAttribute(Attribute::AttrKind Kind) const { @@ -286,6 +297,9 @@ class AttributeListImpl final void dump() const; }; +static_assert(std::is_trivially_destructible::value, + "AttributeListImpl should be trivially destructible"); + } // end namespace llvm #endif // LLVM_LIB_IR_ATTRIBUTEIMPL_H diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index 7c6ce2c78a66e..7a8068e681847 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -92,9 +92,9 @@ Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind, // If we didn't find any existing attributes of the same shape then create a // new one and insert it. if (!Val) - PA = new EnumAttributeImpl(Kind); + PA = new (pImpl->Alloc) EnumAttributeImpl(Kind); else - PA = new IntAttributeImpl(Kind, Val); + PA = new (pImpl->Alloc) IntAttributeImpl(Kind, Val); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -114,7 +114,10 @@ Attribute Attribute::get(LLVMContext &Context, StringRef Kind, StringRef Val) { if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = new StringAttributeImpl(Kind, Val); + void *Mem = + pImpl->Alloc.Allocate(StringAttributeImpl::totalSizeToAlloc(Kind, Val), + alignof(StringAttributeImpl)); + PA = new (Mem) StringAttributeImpl(Kind, Val); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -135,7 +138,7 @@ Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind, if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = new TypeAttributeImpl(Kind, Ty); + PA = new (pImpl->Alloc) TypeAttributeImpl(Kind, Ty); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -169,6 +172,10 @@ Attribute Attribute::getWithByValType(LLVMContext &Context, Type *Ty) { return get(Context, ByVal, Ty); } +Attribute Attribute::getWithPreallocatedType(LLVMContext &Context, Type *Ty) { + return get(Context, Preallocated, Ty); +} + Attribute Attribute::getWithAllocSizeArgs(LLVMContext &Context, unsigned ElemSizeArg, const Optional &NumElemsArg) { @@ -446,6 +453,17 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return Result; } + if (hasAttribute(Attribute::Preallocated)) { + std::string Result; + Result += "preallocated"; + raw_string_ostream OS(Result); + Result += '('; + getValueAsType()->print(OS, false, true); + OS.flush(); + Result += ')'; + return Result; + } + // FIXME: These should be output like this: // // align=4 @@ -539,17 +557,6 @@ void Attribute::Profile(FoldingSetNodeID &ID) const { // AttributeImpl Definition //===----------------------------------------------------------------------===// -// Pin the vtables to this file. -AttributeImpl::~AttributeImpl() = default; - -void EnumAttributeImpl::anchor() {} - -void IntAttributeImpl::anchor() {} - -void StringAttributeImpl::anchor() {} - -void TypeAttributeImpl::anchor() {} - bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { if (isStringAttribute()) return false; return getKindAsEnum() == A; @@ -731,6 +738,10 @@ Type *AttributeSet::getByValType() const { return SetNode ? SetNode->getByValType() : nullptr; } +Type *AttributeSet::getPreallocatedType() const { + return SetNode ? SetNode->getPreallocatedType() : nullptr; +} + std::pair> AttributeSet::getAllocSizeArgs() const { return SetNode ? SetNode->getAllocSizeArgs() : std::pair>(0, 0); @@ -829,6 +840,9 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) { case Attribute::ByVal: Attr = Attribute::getWithByValType(C, B.getByValType()); break; + case Attribute::Preallocated: + Attr = Attribute::getWithPreallocatedType(C, B.getPreallocatedType()); + break; case Attribute::Alignment: assert(B.getAlignment() && "Alignment must be set"); Attr = Attribute::getWithAlignment(C, *B.getAlignment()); @@ -912,6 +926,13 @@ Type *AttributeSetNode::getByValType() const { return 0; } +Type *AttributeSetNode::getPreallocatedType() const { + for (const auto &I : *this) + if (I.hasAttribute(Attribute::Preallocated)) + return I.getValueAsType(); + return 0; +} + uint64_t AttributeSetNode::getDereferenceableBytes() const { if (auto A = findEnumAttribute(Attribute::Dereferenceable)) return A->getDereferenceableBytes(); @@ -953,9 +974,8 @@ static constexpr unsigned attrIdxToArrayIdx(unsigned Index) { return Index == AttributeList::FunctionIndex ? 0 : Index + 1; } -AttributeListImpl::AttributeListImpl(LLVMContext &C, - ArrayRef Sets) - : Context(C), NumAttrSets(Sets.size()) { +AttributeListImpl::AttributeListImpl(ArrayRef Sets) + : NumAttrSets(Sets.size()) { assert(!Sets.empty() && "pointless AttributeListImpl"); // There's memory after the node where we can store the entries in. @@ -1011,9 +1031,10 @@ AttributeList AttributeList::getImpl(LLVMContext &C, // create a new one and insert it. if (!PA) { // Coallocate entries after the AttributeListImpl itself. - void *Mem = ::operator new( - AttributeListImpl::totalSizeToAlloc(AttrSets.size())); - PA = new (Mem) AttributeListImpl(C, AttrSets); + void *Mem = pImpl->Alloc.Allocate( + AttributeListImpl::totalSizeToAlloc(AttrSets.size()), + alignof(AttributeListImpl)); + PA = new (Mem) AttributeListImpl(AttrSets); pImpl->AttrsLists.InsertNode(PA, InsertPoint); } @@ -1338,8 +1359,6 @@ AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index, // AttributeList Accessor Methods //===----------------------------------------------------------------------===// -LLVMContext &AttributeList::getContext() const { return pImpl->getContext(); } - AttributeSet AttributeList::getParamAttributes(unsigned ArgNo) const { return getAttributes(ArgNo + FirstArgIndex); } @@ -1495,6 +1514,7 @@ void AttrBuilder::clear() { DerefBytes = DerefOrNullBytes = 0; AllocSizeArgs = 0; ByValType = nullptr; + PreallocatedType = nullptr; } AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { @@ -1520,6 +1540,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) { StackAlignment = Attr.getStackAlignment(); else if (Kind == Attribute::ByVal) ByValType = Attr.getValueAsType(); + else if (Kind == Attribute::Preallocated) + PreallocatedType = Attr.getValueAsType(); else if (Kind == Attribute::Dereferenceable) DerefBytes = Attr.getDereferenceableBytes(); else if (Kind == Attribute::DereferenceableOrNull) @@ -1544,6 +1566,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { StackAlignment.reset(); else if (Val == Attribute::ByVal) ByValType = nullptr; + else if (Val == Attribute::Preallocated) + PreallocatedType = nullptr; else if (Val == Attribute::Dereferenceable) DerefBytes = 0; else if (Val == Attribute::DereferenceableOrNull) @@ -1632,6 +1656,12 @@ AttrBuilder &AttrBuilder::addByValAttr(Type *Ty) { return *this; } +AttrBuilder &AttrBuilder::addPreallocatedAttr(Type *Ty) { + Attrs[Attribute::Preallocated] = true; + PreallocatedType = Ty; + return *this; +} + AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { // FIXME: What if both have alignments, but they don't match?! if (!Alignment) @@ -1652,6 +1682,9 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { if (!ByValType) ByValType = B.ByValType; + if (!PreallocatedType) + PreallocatedType = B.PreallocatedType; + Attrs |= B.Attrs; for (auto I : B.td_attrs()) @@ -1680,6 +1713,9 @@ AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) { if (B.ByValType) ByValType = nullptr; + if (B.PreallocatedType) + PreallocatedType = nullptr; + Attrs &= ~B.Attrs; for (auto I : B.td_attrs()) @@ -1739,7 +1775,8 @@ bool AttrBuilder::operator==(const AttrBuilder &B) { return false; return Alignment == B.Alignment && StackAlignment == B.StackAlignment && - DerefBytes == B.DerefBytes && ByValType == B.ByValType; + DerefBytes == B.DerefBytes && ByValType == B.ByValType && + PreallocatedType == B.PreallocatedType; } //===----------------------------------------------------------------------===// @@ -1757,17 +1794,18 @@ AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) { if (!Ty->isPointerTy()) // Attribute that only apply to pointers. - Incompatible.addAttribute(Attribute::ByVal) - .addAttribute(Attribute::Nest) - .addAttribute(Attribute::NoAlias) - .addAttribute(Attribute::NoCapture) - .addAttribute(Attribute::NonNull) - .addDereferenceableAttr(1) // the int here is ignored - .addDereferenceableOrNullAttr(1) // the int here is ignored - .addAttribute(Attribute::ReadNone) - .addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::StructRet) - .addAttribute(Attribute::InAlloca); + Incompatible.addAttribute(Attribute::Nest) + .addAttribute(Attribute::NoAlias) + .addAttribute(Attribute::NoCapture) + .addAttribute(Attribute::NonNull) + .addDereferenceableAttr(1) // the int here is ignored + .addDereferenceableOrNullAttr(1) // the int here is ignored + .addAttribute(Attribute::ReadNone) + .addAttribute(Attribute::ReadOnly) + .addAttribute(Attribute::StructRet) + .addAttribute(Attribute::InAlloca) + .addPreallocatedAttr(Ty) + .addByValAttr(Ty); return Incompatible; } diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index cfa1ebfe63c64..4ebe6a0cee79b 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -2840,7 +2840,7 @@ void LLVMRemoveCallSiteStringAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, } LLVMValueRef LLVMGetCalledValue(LLVMValueRef Instr) { - return wrap(unwrap(Instr)->getCalledValue()); + return wrap(unwrap(Instr)->getCalledOperand()); } LLVMTypeRef LLVMGetCalledFunctionType(LLVMValueRef Instr) { diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 1c63476785f44..254b6fe563e37 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -658,15 +658,10 @@ bool llvm::stripNonLineTableDebugInfo(Module &M) { if (I.getDebugLoc() != DebugLoc()) I.setDebugLoc(remapDebugLoc(I.getDebugLoc())); - // Remap DILocations in untyped MDNodes (e.g., llvm.loop). - SmallVector, 2> MDs; - I.getAllMetadata(MDs); - for (auto Attachment : MDs) - if (auto *T = dyn_cast_or_null(Attachment.second)) - for (unsigned N = 0; N < T->getNumOperands(); ++N) - if (auto *Loc = dyn_cast_or_null(T->getOperand(N))) - if (Loc != DebugLoc()) - T->replaceOperandWith(N, remapDebugLoc(Loc)); + // Remap DILocations in llvm.loop attachments. + updateLoopMetadataDebugLocations(I, [&](const DILocation &Loc) { + return remapDebugLoc(&Loc).get(); + }); } } } diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 53ab11775af95..ac62ffa0ef13f 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -114,11 +114,12 @@ bool Argument::hasInAllocaAttr() const { return hasAttribute(Attribute::InAlloca); } -bool Argument::hasByValOrInAllocaAttr() const { +bool Argument::hasPassPointeeByValueAttr() const { if (!getType()->isPointerTy()) return false; AttributeList Attrs = getParent()->getAttributes(); return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) || - Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca); + Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca) || + Attrs.hasParamAttribute(getArgNo(), Attribute::Preallocated); } unsigned Argument::getParamAlignment() const { @@ -1074,8 +1075,8 @@ static Type *DecodeFixedType(ArrayRef &Infos, // Return the overloaded type (which determines the pointers address space) return Tys[D.getOverloadArgNumber()]; case IITDescriptor::ScalableVecArgument: { - auto *Ty = cast(DecodeFixedType(Infos, Tys, Context)); - return VectorType::get(Ty->getElementType(), {Ty->getNumElements(), true}); + auto *Ty = cast(DecodeFixedType(Infos, Tys, Context)); + return ScalableVectorType::get(Ty->getElementType(), Ty->getNumElements()); } } llvm_unreachable("unhandled"); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index e04a323e4fe05..d9cda0f7de1f1 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -255,7 +255,7 @@ unsigned CallBase::getNumSubclassExtraOperandsDynamic() const { } bool CallBase::isIndirectCall() const { - const Value *V = getCalledValue(); + const Value *V = getCalledOperand(); if (isa(V) || isa(V)) return false; return !isInlineAsm(); @@ -491,7 +491,7 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef OpB, Instruction *InsertPt) { std::vector Args(CI->arg_begin(), CI->arg_end()); - auto *NewCI = CallInst::Create(CI->getFunctionType(), CI->getCalledValue(), + auto *NewCI = CallInst::Create(CI->getFunctionType(), CI->getCalledOperand(), Args, OpB, CI->getName(), InsertPt); NewCI->setTailCallKind(CI->getTailCallKind()); NewCI->setCallingConv(CI->getCallingConv()); @@ -802,9 +802,9 @@ InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef OpB, Instruction *InsertPt) { std::vector Args(II->arg_begin(), II->arg_end()); - auto *NewII = InvokeInst::Create(II->getFunctionType(), II->getCalledValue(), - II->getNormalDest(), II->getUnwindDest(), - Args, OpB, II->getName(), InsertPt); + auto *NewII = InvokeInst::Create( + II->getFunctionType(), II->getCalledOperand(), II->getNormalDest(), + II->getUnwindDest(), Args, OpB, II->getName(), InsertPt); NewII->setCallingConv(II->getCallingConv()); NewII->SubclassOptionalData = II->SubclassOptionalData; NewII->setAttributes(II->getAttributes()); @@ -885,11 +885,9 @@ CallBrInst *CallBrInst::Create(CallBrInst *CBI, ArrayRef OpB, Instruction *InsertPt) { std::vector Args(CBI->arg_begin(), CBI->arg_end()); - auto *NewCBI = CallBrInst::Create(CBI->getFunctionType(), - CBI->getCalledValue(), - CBI->getDefaultDest(), - CBI->getIndirectDests(), - Args, OpB, CBI->getName(), InsertPt); + auto *NewCBI = CallBrInst::Create( + CBI->getFunctionType(), CBI->getCalledOperand(), CBI->getDefaultDest(), + CBI->getIndirectDests(), Args, OpB, CBI->getName(), InsertPt); NewCBI->setCallingConv(CBI->getCallingConv()); NewCBI->SubclassOptionalData = CBI->SubclassOptionalData; NewCBI->setAttributes(CBI->getAttributes()); @@ -1918,11 +1916,11 @@ void ShuffleVectorInst::commute() { bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, ArrayRef Mask) { // V1 and V2 must be vectors of the same type. - if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType()) + if (!isa(V1->getType()) || V1->getType() != V2->getType()) return false; // Make sure the mask elements make sense. - int V1Size = cast(V1->getType())->getNumElements(); + int V1Size = cast(V1->getType())->getElementCount().Min; for (int Elem : Mask) if (Elem != UndefMaskElem && Elem >= V1Size * 2) return false; diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp index 604f549afe631..27475781909bf 100644 --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -68,6 +68,11 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { "cfguardtarget operand bundle id drifted!"); (void)CFGuardTargetEntry; + auto *PreallocatedEntry = pImpl->getOrInsertBundleTag("preallocated"); + assert(PreallocatedEntry->second == LLVMContext::OB_preallocated && + "preallocated operand bundle id drifted!"); + (void)PreallocatedEntry; + SyncScope::ID SingleThreadSSID = pImpl->getOrInsertSyncScopeID("singlethread"); assert(SingleThreadSSID == SyncScope::SingleThread && diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index ca2ee588d15da..68b8f8aef6802 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -104,21 +104,6 @@ LLVMContextImpl::~LLVMContextImpl() { delete CDSConstant.second; CDSConstants.clear(); - // Destroy attributes. - for (FoldingSetIterator I = AttrsSet.begin(), - E = AttrsSet.end(); I != E; ) { - FoldingSetIterator Elem = I++; - delete &*Elem; - } - - // Destroy attribute lists. - for (FoldingSetIterator I = AttrsLists.begin(), - E = AttrsLists.end(); - I != E;) { - FoldingSetIterator Elem = I++; - delete &*Elem; - } - // Destroy attribute node lists. for (FoldingSetIterator I = AttrsSetNodes.begin(), E = AttrsSetNodes.end(); I != E; ) { diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp index d73f748b0584d..ba6ca7abae585 100644 --- a/llvm/lib/IR/Mangler.cpp +++ b/llvm/lib/IR/Mangler.cpp @@ -98,7 +98,7 @@ static void addByteCountSuffix(raw_ostream &OS, const Function *F, AI != AE; ++AI) { Type *Ty = AI->getType(); // 'Dereference' type in case of byval or inalloca parameter attribute. - if (AI->hasByValOrInAllocaAttr()) + if (AI->hasPassPointeeByValueAttr()) Ty = cast(Ty)->getElementType(); // Size should be aligned to pointer size. unsigned PtrSize = DL.getPointerSize(); diff --git a/llvm/lib/IR/ProfileSummary.cpp b/llvm/lib/IR/ProfileSummary.cpp index 11d95ac19be6a..d6c3c5035dc85 100644 --- a/llvm/lib/IR/ProfileSummary.cpp +++ b/llvm/lib/IR/ProfileSummary.cpp @@ -65,18 +65,24 @@ Metadata *ProfileSummary::getDetailedSummaryMD(LLVMContext &Context) { // "ProfileFormat" and a string representing the format ("InstrProf" or // "SampleProfile"). The rest of the elements of the outer MDTuple are specific // to the kind of profile summary as returned by getFormatSpecificMD. -Metadata *ProfileSummary::getMD(LLVMContext &Context) { +// IsPartialProfile is an optional field and \p AddPartialField will decide +// whether to add a field for it. +Metadata *ProfileSummary::getMD(LLVMContext &Context, bool AddPartialField) { const char *KindStr[3] = {"InstrProf", "CSInstrProf", "SampleProfile"}; - Metadata *Components[] = { - getKeyValMD(Context, "ProfileFormat", KindStr[PSK]), - getKeyValMD(Context, "TotalCount", getTotalCount()), - getKeyValMD(Context, "MaxCount", getMaxCount()), - getKeyValMD(Context, "MaxInternalCount", getMaxInternalCount()), - getKeyValMD(Context, "MaxFunctionCount", getMaxFunctionCount()), - getKeyValMD(Context, "NumCounts", getNumCounts()), - getKeyValMD(Context, "NumFunctions", getNumFunctions()), - getDetailedSummaryMD(Context), - }; + SmallVector Components; + Components.push_back(getKeyValMD(Context, "ProfileFormat", KindStr[PSK])); + Components.push_back(getKeyValMD(Context, "TotalCount", getTotalCount())); + Components.push_back(getKeyValMD(Context, "MaxCount", getMaxCount())); + Components.push_back( + getKeyValMD(Context, "MaxInternalCount", getMaxInternalCount())); + Components.push_back( + getKeyValMD(Context, "MaxFunctionCount", getMaxFunctionCount())); + Components.push_back(getKeyValMD(Context, "NumCounts", getNumCounts())); + Components.push_back(getKeyValMD(Context, "NumFunctions", getNumFunctions())); + if (AddPartialField) + Components.push_back( + getKeyValMD(Context, "IsPartialProfile", isPartialProfile())); + Components.push_back(getDetailedSummaryMD(Context)); return MDTuple::get(Context, Components); } @@ -141,10 +147,11 @@ static bool getSummaryFromMD(MDTuple *MD, SummaryEntryVector &Summary) { ProfileSummary *ProfileSummary::getFromMD(Metadata *MD) { MDTuple *Tuple = dyn_cast_or_null(MD); - if (!Tuple || Tuple->getNumOperands() != 8) + if (!Tuple && (Tuple->getNumOperands() < 8 || Tuple->getNumOperands() > 9)) return nullptr; - auto &FormatMD = Tuple->getOperand(0); + int i = 0; + auto &FormatMD = Tuple->getOperand(i++); ProfileSummary::Kind SummaryKind; if (isKeyValuePair(dyn_cast_or_null(FormatMD), "ProfileFormat", "SampleProfile")) @@ -160,27 +167,41 @@ ProfileSummary *ProfileSummary::getFromMD(Metadata *MD) { uint64_t NumCounts, TotalCount, NumFunctions, MaxFunctionCount, MaxCount, MaxInternalCount; - if (!getVal(dyn_cast(Tuple->getOperand(1)), "TotalCount", + if (!getVal(dyn_cast(Tuple->getOperand(i++)), "TotalCount", TotalCount)) return nullptr; - if (!getVal(dyn_cast(Tuple->getOperand(2)), "MaxCount", MaxCount)) + if (!getVal(dyn_cast(Tuple->getOperand(i++)), "MaxCount", MaxCount)) return nullptr; - if (!getVal(dyn_cast(Tuple->getOperand(3)), "MaxInternalCount", + if (!getVal(dyn_cast(Tuple->getOperand(i++)), "MaxInternalCount", MaxInternalCount)) return nullptr; - if (!getVal(dyn_cast(Tuple->getOperand(4)), "MaxFunctionCount", + if (!getVal(dyn_cast(Tuple->getOperand(i++)), "MaxFunctionCount", MaxFunctionCount)) return nullptr; - if (!getVal(dyn_cast(Tuple->getOperand(5)), "NumCounts", NumCounts)) + if (!getVal(dyn_cast(Tuple->getOperand(i++)), "NumCounts", + NumCounts)) return nullptr; - if (!getVal(dyn_cast(Tuple->getOperand(6)), "NumFunctions", + if (!getVal(dyn_cast(Tuple->getOperand(i++)), "NumFunctions", NumFunctions)) return nullptr; + // Initialize IsPartialProfile because the field is optional. + uint64_t IsPartialProfile = 0; + + // IsPartialProfile is optional so it doesn't matter even if the next val + // is not IsPartialProfile. + if (getVal(dyn_cast(Tuple->getOperand(i)), "IsPartialProfile", + IsPartialProfile)) { + // Need to make sure when IsPartialProfile is presented, we won't step + // over the bound of Tuple operand array. + if (Tuple->getNumOperands() < 9) + return nullptr; + i++; + } SummaryEntryVector Summary; - if (!getSummaryFromMD(dyn_cast(Tuple->getOperand(7)), Summary)) + if (!getSummaryFromMD(dyn_cast(Tuple->getOperand(i++)), Summary)) return nullptr; return new ProfileSummary(SummaryKind, std::move(Summary), TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount, - NumCounts, NumFunctions); + NumCounts, NumFunctions, IsPartialProfile); } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index de82170458352..b1f060737c6f2 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -397,6 +397,9 @@ class Verifier : public InstVisitor, VerifierSupport { } private: + /// Whether a metadata node is allowed to be, or contain, a DILocation. + enum class AreDebugLocsAllowed { No, Yes }; + // Verification methods... void visitGlobalValue(const GlobalValue &GV); void visitGlobalVariable(const GlobalVariable &GV); @@ -405,7 +408,7 @@ class Verifier : public InstVisitor, VerifierSupport { void visitAliaseeSubExpr(SmallPtrSetImpl &Visited, const GlobalAlias &A, const Constant &C); void visitNamedMDNode(const NamedMDNode &NMD); - void visitMDNode(const MDNode &MD); + void visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs); void visitMetadataAsValue(const MetadataAsValue &MD, Function *F); void visitValueAsMetadata(const ValueAsMetadata &MD, Function *F); void visitComdat(const Comdat &C); @@ -780,11 +783,11 @@ void Verifier::visitNamedMDNode(const NamedMDNode &NMD) { if (!MD) continue; - visitMDNode(*MD); + visitMDNode(*MD, AreDebugLocsAllowed::Yes); } } -void Verifier::visitMDNode(const MDNode &MD) { +void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) { // Only visit each node once. Metadata can be mutually recursive, so this // avoids infinite recursion here, as well as being an optimization. if (!MDNodes.insert(&MD).second) @@ -807,8 +810,10 @@ void Verifier::visitMDNode(const MDNode &MD) { continue; Assert(!isa(Op), "Invalid operand for global metadata!", &MD, Op); + AssertDI(!isa(Op) || AllowLocs == AreDebugLocsAllowed::Yes, + "DILocation not allowed within this metadata node", &MD, Op); if (auto *N = dyn_cast(Op)) { - visitMDNode(*N); + visitMDNode(*N, AllowLocs); continue; } if (auto *V = dyn_cast(Op)) { @@ -851,7 +856,7 @@ void Verifier::visitValueAsMetadata(const ValueAsMetadata &MD, Function *F) { void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) { Metadata *MD = MDV.getMetadata(); if (auto *N = dyn_cast(MD)) { - visitMDNode(*N); + visitMDNode(*N, AreDebugLocsAllowed::No); return; } @@ -1563,7 +1568,8 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) { /// arguments. static bool isFuncOrArgAttr(Attribute::AttrKind Kind) { return Kind == Attribute::ReadOnly || Kind == Attribute::WriteOnly || - Kind == Attribute::ReadNone || Kind == Attribute::NoFree; + Kind == Attribute::ReadNone || Kind == Attribute::NoFree || + Kind == Attribute::Preallocated; } void Verifier::verifyAttributeTypes(AttributeSet Attrs, bool IsFunction, @@ -1614,11 +1620,13 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty, unsigned AttrCount = 0; AttrCount += Attrs.hasAttribute(Attribute::ByVal); AttrCount += Attrs.hasAttribute(Attribute::InAlloca); + AttrCount += Attrs.hasAttribute(Attribute::Preallocated); AttrCount += Attrs.hasAttribute(Attribute::StructRet) || Attrs.hasAttribute(Attribute::InReg); AttrCount += Attrs.hasAttribute(Attribute::Nest); - Assert(AttrCount <= 1, "Attributes 'byval', 'inalloca', 'inreg', 'nest', " - "and 'sret' are incompatible!", + Assert(AttrCount <= 1, + "Attributes 'byval', 'inalloca', 'preallocated', 'inreg', 'nest', " + "and 'sret' are incompatible!", V); Assert(!(Attrs.hasAttribute(Attribute::InAlloca) && @@ -1668,6 +1676,12 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty, "Attribute 'byval' type does not match parameter!", V); } + if (Attrs.hasAttribute(Attribute::Preallocated)) { + Assert(Attrs.getPreallocatedType() == + cast(Ty)->getElementType(), + "Attribute 'preallocated' type does not match parameter!", V); + } + AttrBuilder IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty); Assert(!AttrBuilder(Attrs).overlaps(IncompatibleAttrs), "Wrong types for attribute: " + @@ -1678,8 +1692,10 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty, SmallPtrSet Visited; if (!PTy->getElementType()->isSized(&Visited)) { Assert(!Attrs.hasAttribute(Attribute::ByVal) && - !Attrs.hasAttribute(Attribute::InAlloca), - "Attributes 'byval' and 'inalloca' do not support unsized types!", + !Attrs.hasAttribute(Attribute::InAlloca) && + !Attrs.hasAttribute(Attribute::Preallocated), + "Attributes 'byval', 'inalloca', and 'preallocated' do not " + "support unsized types!", V); } if (!isa(PTy->getElementType())) @@ -1720,9 +1736,11 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, !RetAttrs.hasAttribute(Attribute::NoFree) && !RetAttrs.hasAttribute(Attribute::Returned) && !RetAttrs.hasAttribute(Attribute::InAlloca) && + !RetAttrs.hasAttribute(Attribute::Preallocated) && !RetAttrs.hasAttribute(Attribute::SwiftSelf) && !RetAttrs.hasAttribute(Attribute::SwiftError)), - "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', 'nofree'" + "Attributes 'byval', 'inalloca', 'preallocated', 'nest', 'sret', " + "'nocapture', 'nofree', " "'returned', 'swiftself', and 'swifterror' do not apply to return " "values!", V); @@ -2300,7 +2318,7 @@ void Verifier::visitFunction(const Function &F) { "function declaration may not have a !prof attachment", &F); // Verify the metadata itself. - visitMDNode(*I.second); + visitMDNode(*I.second, AreDebugLocsAllowed::Yes); } Assert(!F.hasPersonalityFn(), "Function declaration shouldn't have a personality routine", &F); @@ -2324,6 +2342,7 @@ void Verifier::visitFunction(const Function &F) { // Visit metadata attachments. for (const auto &I : MDs) { // Verify that the attachment is legal. + auto AllowLocs = AreDebugLocsAllowed::No; switch (I.first) { default: break; @@ -2338,6 +2357,7 @@ void Verifier::visitFunction(const Function &F) { AssertDI(!AttachedTo || AttachedTo == &F, "DISubprogram attached to more than one function", SP, &F); AttachedTo = &F; + AllowLocs = AreDebugLocsAllowed::Yes; break; } case LLVMContext::MD_prof: @@ -2348,7 +2368,7 @@ void Verifier::visitFunction(const Function &F) { } // Verify the metadata itself. - visitMDNode(*I.second); + visitMDNode(*I.second, AllowLocs); } } @@ -2859,9 +2879,9 @@ void Verifier::visitPHINode(PHINode &PN) { } void Verifier::visitCallBase(CallBase &Call) { - Assert(Call.getCalledValue()->getType()->isPointerTy(), + Assert(Call.getCalledOperand()->getType()->isPointerTy(), "Called function must be a pointer!", Call); - PointerType *FPTy = cast(Call.getCalledValue()->getType()); + PointerType *FPTy = cast(Call.getCalledOperand()->getType()); Assert(FPTy->getElementType()->isFunctionTy(), "Called function is not pointer to function type!", Call); @@ -2894,8 +2914,8 @@ void Verifier::visitCallBase(CallBase &Call) { bool IsIntrinsic = Call.getCalledFunction() && Call.getCalledFunction()->getName().startswith("llvm."); - Function *Callee - = dyn_cast(Call.getCalledValue()->stripPointerCasts()); + Function *Callee = + dyn_cast(Call.getCalledOperand()->stripPointerCasts()); if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Speculatable)) { // Don't allow speculatable on call sites, unless the underlying function @@ -2904,6 +2924,14 @@ void Verifier::visitCallBase(CallBase &Call) { "speculatable attribute may not apply to call sites", Call); } + if (Attrs.hasAttribute(AttributeList::FunctionIndex, + Attribute::Preallocated)) { + Assert(Call.getCalledFunction()->getIntrinsicID() == + Intrinsic::call_preallocated_arg, + "preallocated as a call site attribute can only be on " + "llvm.call.preallocated.arg"); + } + // Verify call attributes. verifyFunctionAttrs(FTy, Attrs, &Call, IsIntrinsic); @@ -2950,6 +2978,13 @@ void Verifier::visitCallBase(CallBase &Call) { Assert(isa(ArgVal) || isa(ArgVal), "immarg operand has non-immediate parameter", ArgVal, Call); } + + if (Call.paramHasAttr(i, Attribute::Preallocated)) { + Value *ArgVal = Call.getArgOperand(i); + Assert(Call.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0, + "preallocated operand requires a preallocated bundle", ArgVal, + Call); + } } if (FTy->isVarArg()) { @@ -3020,9 +3055,11 @@ void Verifier::visitCallBase(CallBase &Call) { visitIntrinsicCall(ID, Call); // Verify that a callsite has at most one "deopt", at most one "funclet", at - // most one "gc-transition", and at most one "cfguardtarget" operand bundle. + // most one "gc-transition", at most one "cfguardtarget", + // and at most one "preallocated" operand bundle. bool FoundDeoptBundle = false, FoundFuncletBundle = false, - FoundGCTransitionBundle = false, FoundCFGuardTargetBundle = false; + FoundGCTransitionBundle = false, FoundCFGuardTargetBundle = false, + FoundPreallocatedBundle = false; for (unsigned i = 0, e = Call.getNumOperandBundles(); i < e; ++i) { OperandBundleUse BU = Call.getOperandBundleAt(i); uint32_t Tag = BU.getTagID(); @@ -3047,6 +3084,18 @@ void Verifier::visitCallBase(CallBase &Call) { FoundCFGuardTargetBundle = true; Assert(BU.Inputs.size() == 1, "Expected exactly one cfguardtarget bundle operand", Call); + } else if (Tag == LLVMContext::OB_preallocated) { + Assert(!FoundPreallocatedBundle, "Multiple preallocated operand bundles", + Call); + FoundPreallocatedBundle = true; + Assert(BU.Inputs.size() == 1, + "Expected exactly one preallocated bundle operand", Call); + auto Input = dyn_cast(BU.Inputs.front()); + Assert(Input && + Input->getIntrinsicID() == Intrinsic::call_preallocated_setup, + "\"preallocated\" argument must be a token from " + "llvm.call.preallocated.setup", + Call); } } @@ -3077,8 +3126,9 @@ static bool isTypeCongruent(Type *L, Type *R) { static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) { static const Attribute::AttrKind ABIAttrs[] = { - Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca, - Attribute::InReg, Attribute::SwiftSelf, Attribute::SwiftError}; + Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca, + Attribute::InReg, Attribute::SwiftSelf, Attribute::SwiftError, + Attribute::Preallocated}; AttrBuilder Copy; for (auto AK : ABIAttrs) { if (Attrs.hasParamAttribute(I, AK)) @@ -3120,7 +3170,7 @@ void Verifier::verifyMustTailCall(CallInst &CI) { "cannot guarantee tail call due to mismatched calling conv", &CI); // - All ABI-impacting function attributes, such as sret, byval, inreg, - // returned, and inalloca, must match. + // returned, preallocated, and inalloca, must match. AttributeList CallerAttrs = F->getAttributes(); AttributeList CalleeAttrs = CI.getAttributes(); for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) { @@ -4264,7 +4314,7 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *N = I.getDebugLoc().getAsMDNode()) { AssertDI(isa(N), "invalid !dbg metadata attachment", &I, N); - visitMDNode(*N); + visitMDNode(*N, AreDebugLocsAllowed::Yes); } if (auto *DII = dyn_cast(&I)) { @@ -4272,6 +4322,17 @@ void Verifier::visitInstruction(Instruction &I) { verifyNotEntryValue(*DII); } + SmallVector, 4> MDs; + I.getAllMetadata(MDs); + for (auto Attachment : MDs) { + unsigned Kind = Attachment.first; + auto AllowLocs = + (Kind == LLVMContext::MD_dbg || Kind == LLVMContext::MD_loop) + ? AreDebugLocsAllowed::Yes + : AreDebugLocsAllowed::No; + visitMDNode(*Attachment.second, AllowLocs); + } + InstsInThisBlock.insert(&I); } @@ -4432,6 +4493,71 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { } break; } + case Intrinsic::call_preallocated_setup: { + auto *NumArgs = dyn_cast(Call.getArgOperand(0)); + Assert(NumArgs != nullptr, + "llvm.call.preallocated.setup argument must be a constant"); + bool FoundCall = false; + for (User *U : Call.users()) { + auto *UseCall = dyn_cast(U); + Assert(UseCall != nullptr, + "Uses of llvm.call.preallocated.setup must be calls"); + const Function *Fn = UseCall->getCalledFunction(); + if (Fn && Fn->getIntrinsicID() == Intrinsic::call_preallocated_arg) { + auto *AllocArgIndex = dyn_cast(UseCall->getArgOperand(1)); + Assert(AllocArgIndex != nullptr, + "llvm.call.preallocated.alloc arg index must be a constant"); + auto AllocArgIndexInt = AllocArgIndex->getValue(); + Assert(AllocArgIndexInt.sge(0) && + AllocArgIndexInt.slt(NumArgs->getValue()), + "llvm.call.preallocated.alloc arg index must be between 0 and " + "corresponding " + "llvm.call.preallocated.setup's argument count"); + } else { + Assert(!FoundCall, "Can have at most one call corresponding to a " + "llvm.call.preallocated.setup"); + FoundCall = true; + size_t NumPreallocatedArgs = 0; + for (unsigned i = 0; i < UseCall->getNumArgOperands(); i++) { + if (UseCall->paramHasAttr(i, Attribute::Preallocated)) { + ++NumPreallocatedArgs; + } + } + Assert(NumArgs->equalsInt(NumPreallocatedArgs), + "llvm.call.preallocated.setup arg size must be equal to number " + "of preallocated arguments " + "at call site", + Call, *UseCall); + // getOperandBundle() cannot be called if more than one of the operand + // bundle exists. There is already a check elsewhere for this, so skip + // here if we see more than one. + if (UseCall->countOperandBundlesOfType(LLVMContext::OB_preallocated) > + 1) { + return; + } + auto PreallocatedBundle = + UseCall->getOperandBundle(LLVMContext::OB_preallocated); + Assert(PreallocatedBundle, + "Use of llvm.call.preallocated.setup outside intrinsics " + "must be in \"preallocated\" operand bundle"); + Assert(PreallocatedBundle->Inputs.front().get() == &Call, + "preallocated bundle must have token from corresponding " + "llvm.call.preallocated.setup"); + } + } + break; + } + case Intrinsic::call_preallocated_arg: { + auto *Token = dyn_cast(Call.getArgOperand(0)); + Assert(Token && Token->getCalledFunction()->getIntrinsicID() == + Intrinsic::call_preallocated_setup, + "llvm.call.preallocated.arg token argument must be a " + "llvm.call.preallocated.setup"); + Assert(Call.hasFnAttr(Attribute::Preallocated), + "llvm.call.preallocated.arg must be called with a \"preallocated\" " + "call site attribute"); + break; + } case Intrinsic::gcroot: case Intrinsic::gcwrite: case Intrinsic::gcread: diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index e9eb1d136e75d..ab39a0819250c 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -603,6 +603,7 @@ Error LTO::addModule(InputFile &Input, unsigned ModI, if (LTOInfo->IsThinLTO) return addThinLTO(BM, ModSyms, ResI, ResE); + RegularLTO.EmptyCombinedModule = false; Expected ModOrErr = addRegularLTO(BM, ModSyms, ResI, ResE); if (!ModOrErr) @@ -1026,10 +1027,13 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule)) return Error::success(); } - if (Error Err = - backend(Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel, - std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex)) - return Err; + + if (!RegularLTO.EmptyCombinedModule || Conf.AlwaysEmitRegularLTOObj) { + if (Error Err = backend( + Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel, + std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex)) + return Err; + } return finalizeOptimizationRemarks(std::move(*DiagFileOrErr)); } diff --git a/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/llvm/lib/MC/MCAsmInfoXCOFF.cpp index c51cdff59fa0a..479578fca9a84 100644 --- a/llvm/lib/MC/MCAsmInfoXCOFF.cpp +++ b/llvm/lib/MC/MCAsmInfoXCOFF.cpp @@ -14,16 +14,21 @@ void MCAsmInfoXCOFF::anchor() {} MCAsmInfoXCOFF::MCAsmInfoXCOFF() { IsLittleEndian = false; - HasDotTypeDotSizeDirective = false; - COMMDirectiveAlignmentIsInBytes = false; - LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment; + SupportsQuotedNames = false; UseDotAlignForAlignment = true; + ZeroDirective = "\t.space\t"; + ZeroDirectiveSupportsNonZeroValue = false; AsciiDirective = nullptr; // not supported AscizDirective = nullptr; // not supported - NeedsFunctionDescriptors = true; - HasDotLGloblDirective = true; Data64bitsDirective = "\t.llong\t"; - SupportsQuotedNames = false; + COMMDirectiveAlignmentIsInBytes = false; + LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment; + HasDotTypeDotSizeDirective = false; + HasDotExternDirective = true; + HasDotLGloblDirective = true; + SymbolsHaveSMC = true; + UseIntegratedAssembler = false; + NeedsFunctionDescriptors = true; } bool MCAsmInfoXCOFF::isAcceptableChar(char C) const { diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 3f18417f92460..df688f43fe0ec 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -675,6 +675,9 @@ bool MCAsmStreamer::emitSymbolAttribute(MCSymbol *Symbol, break; case MCSA_Protected: OS << "\t.protected\t"; break; case MCSA_Reference: OS << "\t.reference\t"; break; + case MCSA_Extern: + OS << "\t.extern\t"; + break; case MCSA_Weak: OS << MAI->getWeakDirective(); break; case MCSA_WeakDefinition: OS << "\t.weak_definition\t"; diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index 8921c29944c06..49d863f258bf9 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -203,6 +203,7 @@ bool MCELFStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { // defined. switch (Attribute) { case MCSA_Cold: + case MCSA_Extern: case MCSA_LazyReference: case MCSA_Reference: case MCSA_SymbolResolver: diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp index 54741fdd686dd..a7dc0626d0ab7 100644 --- a/llvm/lib/MC/MCInstrAnalysis.cpp +++ b/llvm/lib/MC/MCInstrAnalysis.cpp @@ -23,15 +23,10 @@ bool MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI, return false; } -bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size, uint64_t &Target) const { - if (Inst.getNumOperands() == 0 || - Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL) - return false; - - int64_t Imm = Inst.getOperand(0).getImm(); - Target = Addr+Size+Imm; - return true; +bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/, + uint64_t /*Size*/, + uint64_t & /*Target*/) const { + return false; } Optional diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp index a59d082acc7d8..2b1d1b28ea184 100644 --- a/llvm/lib/MC/MCMachOStreamer.cpp +++ b/llvm/lib/MC/MCMachOStreamer.cpp @@ -326,6 +326,7 @@ bool MCMachOStreamer::emitSymbolAttribute(MCSymbol *Sym, case MCSA_ELF_TypeCommon: case MCSA_ELF_TypeNoType: case MCSA_ELF_TypeGnuUniqueObject: + case MCSA_Extern: case MCSA_Hidden: case MCSA_IndirectSymbol: case MCSA_Internal: diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp index ca75f12c44a19..a4a1e7e8b3dc8 100644 --- a/llvm/lib/MC/MCXCOFFStreamer.cpp +++ b/llvm/lib/MC/MCXCOFFStreamer.cpp @@ -35,6 +35,7 @@ bool MCXCOFFStreamer::emitSymbolAttribute(MCSymbol *Sym, switch (Attribute) { case MCSA_Global: + case MCSA_Extern: Symbol->setStorageClass(XCOFF::C_EXT); Symbol->setExternal(true); break; @@ -42,6 +43,10 @@ bool MCXCOFFStreamer::emitSymbolAttribute(MCSymbol *Sym, Symbol->setStorageClass(XCOFF::C_HIDEXT); Symbol->setExternal(true); break; + case llvm::MCSA_Weak: + Symbol->setStorageClass(XCOFF::C_WEAKEXT); + Symbol->setExternal(true); + break; default: report_fatal_error("Not implemented yet."); } diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index d51d3e1b5f719..0c09f99a78973 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -204,7 +204,7 @@ static void writePatchableSLEB(raw_pwrite_stream &Stream, int32_t X, } // Write X as a plain integer value at offset Offset in Stream. -static void writeI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) { +static void patchI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) { uint8_t Buffer[4]; support::endian::write32le(Buffer, X); Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset); @@ -308,6 +308,18 @@ class WasmObjectWriter : public MCObjectWriter { W.OS << Str; } + void writeI32(int32_t val) { + char Buffer[4]; + support::endian::write32le(Buffer, val); + W.OS.write(Buffer, sizeof(Buffer)); + } + + void writeI64(int64_t val) { + char Buffer[8]; + support::endian::write64le(Buffer, val); + W.OS.write(Buffer, sizeof(Buffer)); + } + void writeValueType(wasm::ValType Ty) { W.OS << static_cast(Ty); } void writeTypeSection(ArrayRef Signatures); @@ -321,6 +333,7 @@ class WasmObjectWriter : public MCObjectWriter { ArrayRef Functions); void writeDataSection(); void writeEventSection(ArrayRef Events); + void writeGlobalSection(ArrayRef Globals); void writeRelocSection(uint32_t SectionIndex, StringRef Name, std::vector &Relocations); void writeLinkingMetaDataSection( @@ -665,7 +678,7 @@ void WasmObjectWriter::applyRelocations( case wasm::R_WASM_FUNCTION_OFFSET_I32: case wasm::R_WASM_SECTION_OFFSET_I32: case wasm::R_WASM_GLOBAL_INDEX_I32: - writeI32(Stream, Value, Offset); + patchI32(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_SLEB: case wasm::R_WASM_TABLE_INDEX_REL_SLEB: @@ -777,6 +790,40 @@ void WasmObjectWriter::writeEventSection(ArrayRef Events) { endSection(Section); } +void WasmObjectWriter::writeGlobalSection(ArrayRef Globals) { + if (Globals.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_GLOBAL); + + encodeULEB128(Globals.size(), W.OS); + for (const wasm::WasmGlobal &Global : Globals) { + encodeULEB128(Global.Type.Type, W.OS); + W.OS << char(Global.Type.Mutable); + W.OS << char(Global.InitExpr.Opcode); + switch (Global.Type.Type) { + case wasm::WASM_TYPE_I32: + encodeSLEB128(0, W.OS); + break; + case wasm::WASM_TYPE_I64: + encodeSLEB128(0, W.OS); + break; + case wasm::WASM_TYPE_F32: + writeI32(0); + break; + case wasm::WASM_TYPE_F64: + writeI64(0); + break; + default: + llvm_unreachable("unexpected type"); + } + W.OS << char(wasm::WASM_OPCODE_END); + } + + endSection(Section); +} + void WasmObjectWriter::writeExportSection(ArrayRef Exports) { if (Exports.empty()) return; @@ -1118,6 +1165,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, SmallVector Imports; SmallVector Exports; SmallVector Events; + SmallVector Globals; SmallVector SymbolInfos; SmallVector, 2> InitFuncs; std::map> Comdats; @@ -1377,22 +1425,43 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, } else if (WS.isGlobal()) { // A "true" Wasm global (currently just __stack_pointer) - if (WS.isDefined()) - report_fatal_error("don't yet support defined globals"); - - // An import; the index was assigned above - LLVM_DEBUG(dbgs() << " -> global index: " - << WasmIndices.find(&WS)->second << "\n"); - + if (WS.isDefined()) { + assert(WasmIndices.count(&WS) == 0); + wasm::WasmGlobal Global; + Global.Type = WS.getGlobalType(); + Global.Index = NumGlobalImports + Globals.size(); + switch (Global.Type.Type) { + case wasm::WASM_TYPE_I32: + Global.InitExpr.Opcode = wasm::WASM_OPCODE_I32_CONST; + break; + case wasm::WASM_TYPE_I64: + Global.InitExpr.Opcode = wasm::WASM_OPCODE_I64_CONST; + break; + case wasm::WASM_TYPE_F32: + Global.InitExpr.Opcode = wasm::WASM_OPCODE_F32_CONST; + break; + case wasm::WASM_TYPE_F64: + Global.InitExpr.Opcode = wasm::WASM_OPCODE_F64_CONST; + break; + default: + llvm_unreachable("unexpected type"); + } + WasmIndices[&WS] = Global.Index; + Globals.push_back(Global); + } else { + // An import; the index was assigned above + LLVM_DEBUG(dbgs() << " -> global index: " + << WasmIndices.find(&WS)->second << "\n"); + } } else if (WS.isEvent()) { // C++ exception symbol (__cpp_exception) unsigned Index; if (WS.isDefined()) { + assert(WasmIndices.count(&WS) == 0); Index = NumEventImports + Events.size(); wasm::WasmEventType Event; Event.SigIndex = getEventType(WS); Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION; - assert(WasmIndices.count(&WS) == 0); WasmIndices[&WS] = Index; Events.push_back(Event); } else { @@ -1584,6 +1653,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, // Skip the "table" section; we import the table instead. // Skip the "memory" section; we import the memory instead. writeEventSection(Events); + writeGlobalSection(Globals); writeExportSection(Exports); writeElemSection(TableElems); writeDataCountSection(); diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp index aa3a6424a7a77..11ce5dde47eed 100644 --- a/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -354,21 +354,24 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, // Handle undefined symbol. UndefinedCsects.emplace_back(ContainingCsect); SectionMap[ContainingCsect] = &UndefinedCsects.back(); - } else { - // If the symbol is the csect itself, we don't need to put the symbol - // into csect's Syms. - if (XSym == ContainingCsect->getQualNameSymbol()) - continue; + if (nameShouldBeInStringTable(ContainingCsect->getName())) + Strings.add(ContainingCsect->getName()); + continue; + } - // Only put a label into the symbol table when it is an external label. - if (!XSym->isExternal()) - continue; + // If the symbol is the csect itself, we don't need to put the symbol + // into csect's Syms. + if (XSym == ContainingCsect->getQualNameSymbol()) + continue; - assert(SectionMap.find(ContainingCsect) != SectionMap.end() && - "Expected containing csect to exist in map"); - // Lookup the containing csect and add the symbol to it. - SectionMap[ContainingCsect]->Syms.emplace_back(XSym); - } + // Only put a label into the symbol table when it is an external label. + if (!XSym->isExternal()) + continue; + + assert(SectionMap.find(ContainingCsect) != SectionMap.end() && + "Expected containing csect to exist in map"); + // Lookup the containing csect and add the symbol to it. + SectionMap[ContainingCsect]->Syms.emplace_back(XSym); // If the name does not fit in the storage provided in the symbol table // entry, add it to the string table. diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp index 0ee084c7ce1a9..e945e8cecce93 100644 --- a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp +++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp @@ -77,9 +77,6 @@ unsigned LSUnit::dispatch(const InstRef &IR) { acquireSQSlot(); if (Desc.MayStore) { - // Always create a new group for store operations. - - // A store may not pass a previous store or store barrier. unsigned NewGID = createMemoryGroup(); MemoryGroup &NewGroup = getGroup(NewGID); NewGroup.addInstruction(); @@ -91,16 +88,32 @@ unsigned LSUnit::dispatch(const InstRef &IR) { MemoryGroup &IDom = getGroup(ImmediateLoadDominator); LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << ImmediateLoadDominator << ") --> (" << NewGID << ")\n"); - IDom.addSuccessor(&NewGroup); + IDom.addSuccessor(&NewGroup, !assumeNoAlias()); + } + + // A store may not pass a previous store barrier. + if (CurrentStoreBarrierGroupID) { + MemoryGroup &StoreGroup = getGroup(CurrentStoreBarrierGroupID); + LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" + << CurrentStoreBarrierGroupID + << ") --> (" << NewGID << ")\n"); + StoreGroup.addSuccessor(&NewGroup, true); } - if (CurrentStoreGroupID) { + + // A store may not pass a previous store. + if (CurrentStoreGroupID && + (CurrentStoreGroupID != CurrentStoreBarrierGroupID)) { MemoryGroup &StoreGroup = getGroup(CurrentStoreGroupID); LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID << ") --> (" << NewGID << ")\n"); - StoreGroup.addSuccessor(&NewGroup); + StoreGroup.addSuccessor(&NewGroup, !assumeNoAlias()); } + CurrentStoreGroupID = NewGID; + if (IsMemBarrier) + CurrentStoreBarrierGroupID = NewGID; + if (Desc.MayLoad) { CurrentLoadGroupID = NewGID; if (IsMemBarrier) @@ -112,31 +125,59 @@ unsigned LSUnit::dispatch(const InstRef &IR) { assert(Desc.MayLoad && "Expected a load!"); - // Always create a new memory group if this is the first load of the sequence. + unsigned ImmediateLoadDominator = + std::max(CurrentLoadGroupID, CurrentLoadBarrierGroupID); + + // A new load group is created if we are in one of the following situations: + // 1) This is a load barrier (by construction, a load barrier is always + // assigned to a different memory group). + // 2) There is no load in flight (by construction we always keep loads and + // stores into separate memory groups). + // 3) There is a load barrier in flight. This load depends on it. + // 4) There is an intervening store between the last load dispatched to the + // LSU and this load. We always create a new group even if this load + // does not alias the last dispatched store. + // 5) There is no intervening store and there is an active load group. + // However that group has already started execution, so we cannot add + // this load to it. + bool ShouldCreateANewGroup = + IsMemBarrier || !ImmediateLoadDominator || + CurrentLoadBarrierGroupID == ImmediateLoadDominator || + ImmediateLoadDominator <= CurrentStoreGroupID || + getGroup(ImmediateLoadDominator).isExecuting(); - // A load may not pass a previous store unless flag 'NoAlias' is set. - // A load may pass a previous load. - // A younger load cannot pass a older load barrier. - // A load barrier cannot pass a older load. - bool ShouldCreateANewGroup = !CurrentLoadGroupID || IsMemBarrier || - CurrentLoadGroupID <= CurrentStoreGroupID || - CurrentLoadGroupID <= CurrentLoadBarrierGroupID; if (ShouldCreateANewGroup) { unsigned NewGID = createMemoryGroup(); MemoryGroup &NewGroup = getGroup(NewGID); NewGroup.addInstruction(); + // A load may not pass a previous store or store barrier + // unless flag 'NoAlias' is set. if (!assumeNoAlias() && CurrentStoreGroupID) { - MemoryGroup &StGroup = getGroup(CurrentStoreGroupID); + MemoryGroup &StoreGroup = getGroup(CurrentStoreGroupID); LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID << ") --> (" << NewGID << ")\n"); - StGroup.addSuccessor(&NewGroup); + StoreGroup.addSuccessor(&NewGroup, true); } - if (CurrentLoadBarrierGroupID) { - MemoryGroup &LdGroup = getGroup(CurrentLoadBarrierGroupID); - LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentLoadBarrierGroupID - << ") --> (" << NewGID << ")\n"); - LdGroup.addSuccessor(&NewGroup); + + // A load barrier may not pass a previous load or load barrier. + if (IsMemBarrier) { + if (ImmediateLoadDominator) { + MemoryGroup &LoadGroup = getGroup(ImmediateLoadDominator); + LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" + << ImmediateLoadDominator + << ") --> (" << NewGID << ")\n"); + LoadGroup.addSuccessor(&NewGroup, true); + } + } else { + // A younger load cannot pass a older load barrier. + if (CurrentLoadBarrierGroupID) { + MemoryGroup &LoadGroup = getGroup(CurrentLoadBarrierGroupID); + LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" + << CurrentLoadBarrierGroupID + << ") --> (" << NewGID << ")\n"); + LoadGroup.addSuccessor(&NewGroup, true); + } } CurrentLoadGroupID = NewGID; @@ -145,6 +186,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) { return NewGID; } + // A load may pass a previous load. MemoryGroup &Group = getGroup(CurrentLoadGroupID); Group.addInstruction(); return CurrentLoadGroupID; diff --git a/llvm/lib/Object/COFFObjectFile.cpp b/llvm/lib/Object/COFFObjectFile.cpp index ebb29ff8c9f1f..28233f8bdc77d 100644 --- a/llvm/lib/Object/COFFObjectFile.cpp +++ b/llvm/lib/Object/COFFObjectFile.cpp @@ -166,7 +166,7 @@ uint32_t COFFObjectFile::getSymbolAlignment(DataRefImpl Ref) const { } Expected COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const { - uint64_t Result = getSymbolValue(Ref); + uint64_t Result = cantFail(getSymbolValue(Ref)); COFFSymbolRef Symb = getCOFFSymbol(Ref); int32_t SectionNumber = Symb.getSectionNumber(); diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp index fe90378114461..61b36ea0f448a 100644 --- a/llvm/lib/Object/ObjectFile.cpp +++ b/llvm/lib/Object/ObjectFile.cpp @@ -54,15 +54,15 @@ bool SectionRef::containsSymbol(SymbolRef S) const { return *this == **SymSec; } -uint64_t ObjectFile::getSymbolValue(DataRefImpl Ref) const { +Expected ObjectFile::getSymbolValue(DataRefImpl Ref) const { if (Expected FlagsOrErr = getSymbolFlags(Ref)) { if (*FlagsOrErr & SymbolRef::SF_Undefined) return 0; if (*FlagsOrErr & SymbolRef::SF_Common) return getCommonSymbolSize(Ref); } else - // TODO: Actually report errors helpfully. - report_fatal_error(FlagsOrErr.takeError()); + // TODO: Test this error. + return FlagsOrErr.takeError(); return getSymbolValueImpl(Ref); } diff --git a/llvm/lib/Object/SymbolSize.cpp b/llvm/lib/Object/SymbolSize.cpp index 04257f11d7d12..84eed4d169d3a 100644 --- a/llvm/lib/Object/SymbolSize.cpp +++ b/llvm/lib/Object/SymbolSize.cpp @@ -61,8 +61,11 @@ llvm::object::computeSymbolSizes(const ObjectFile &O) { unsigned SymNum = 0; for (symbol_iterator I = O.symbol_begin(), E = O.symbol_end(); I != E; ++I) { SymbolRef Sym = *I; - uint64_t Value = Sym.getValue(); - Addresses.push_back({I, Value, SymNum, getSymbolSectionID(O, Sym)}); + Expected ValueOrErr = Sym.getValue(); + if (!ValueOrErr) + // TODO: Actually report errors helpfully. + report_fatal_error(ValueOrErr.takeError()); + Addresses.push_back({I, *ValueOrErr, SymNum, getSymbolSectionID(O, Sym)}); ++SymNum; } for (SectionRef Sec : O.sections()) { diff --git a/llvm/lib/ObjectYAML/COFFEmitter.cpp b/llvm/lib/ObjectYAML/COFFEmitter.cpp index ec3ec55011f95..734e1be4b2d5c 100644 --- a/llvm/lib/ObjectYAML/COFFEmitter.cpp +++ b/llvm/lib/ObjectYAML/COFFEmitter.cpp @@ -187,7 +187,7 @@ toDebugS(ArrayRef Subsections, std::vector Builders; uint32_t Size = sizeof(uint32_t); for (auto &SS : CVSS) { - DebugSubsectionRecordBuilder B(SS, CodeViewContainer::ObjectFile); + DebugSubsectionRecordBuilder B(SS); Size += B.calculateSerializedLength(); Builders.push_back(std::move(B)); } @@ -197,7 +197,7 @@ toDebugS(ArrayRef Subsections, Err(Writer.writeInteger(COFF::DEBUG_SECTION_MAGIC)); for (const auto &B : Builders) { - Err(B.commit(Writer)); + Err(B.commit(Writer, CodeViewContainer::ObjectFile)); } return {Output}; } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index b904f983dcebe..089dfca07f088 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" @@ -144,7 +145,7 @@ bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { StringRef buffer = Buffer.getBufferStart(); return count == 0 || std::all_of(buffer.begin(), buffer.begin() + count, - [](char c) { return isPrint(c) || ::isspace(c); }); + [](char c) { return isPrint(c) || isSpace(c); }); } // Read the profile variant flag from the header: ":FE" means this is a FE diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 0025806ca2356..aa7e79652af95 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -919,91 +919,118 @@ static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) { return I - 1; } -void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, - SmallVectorImpl &NewArgv, - bool MarkEOLs) { +// Windows treats whitespace, double quotes, and backslashes specially. +static bool isWindowsSpecialChar(char C) { + return isWhitespaceOrNull(C) || C == '\\' || C == '\"'; +} + +// Windows tokenization implementation. The implementation is designed to be +// inlined and specialized for the two user entry points. +static inline void +tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver, + function_ref AddToken, + bool AlwaysCopy, function_ref MarkEOL) { SmallString<128> Token; - // This is a small state machine to consume characters until it reaches the - // end of the source string. + // Try to do as much work inside the state machine as possible. enum { INIT, UNQUOTED, QUOTED } State = INIT; - for (size_t I = 0, E = Src.size(); I != E; ++I) { - char C = Src[I]; - - // INIT state indicates that the current input index is at the start of - // the string or between tokens. - if (State == INIT) { - if (isWhitespaceOrNull(C)) { - // Mark the end of lines in response files - if (MarkEOLs && C == '\n') - NewArgv.push_back(nullptr); - continue; + for (size_t I = 0, E = Src.size(); I < E; ++I) { + switch (State) { + case INIT: { + assert(Token.empty() && "token should be empty in initial state"); + // Eat whitespace before a token. + while (I < E && isWhitespaceOrNull(Src[I])) { + if (Src[I] == '\n') + MarkEOL(); + ++I; } - if (C == '"') { + // Stop if this was trailing whitespace. + if (I >= E) + break; + size_t Start = I; + while (I < E && !isWindowsSpecialChar(Src[I])) + ++I; + StringRef NormalChars = Src.slice(Start, I); + if (I >= E || isWhitespaceOrNull(Src[I])) { + if (I < E && Src[I] == '\n') + MarkEOL(); + // No special characters: slice out the substring and start the next + // token. Copy the string if the caller asks us to. + AddToken(AlwaysCopy ? Saver.save(NormalChars) : NormalChars); + } else if (Src[I] == '\"') { + Token += NormalChars; State = QUOTED; - continue; - } - if (C == '\\') { + } else if (Src[I] == '\\') { + Token += NormalChars; I = parseBackslash(Src, I, Token); State = UNQUOTED; - continue; + } else { + llvm_unreachable("unexpected special character"); } - Token.push_back(C); - State = UNQUOTED; - continue; + break; } - // UNQUOTED state means that it's reading a token not quoted by double - // quotes. - if (State == UNQUOTED) { - // Whitespace means the end of the token. - if (isWhitespaceOrNull(C)) { - NewArgv.push_back(Saver.save(StringRef(Token)).data()); + case UNQUOTED: + if (isWhitespaceOrNull(Src[I])) { + // Whitespace means the end of the token. If we are in this state, the + // token must have contained a special character, so we must copy the + // token. + AddToken(Saver.save(Token.str())); Token.clear(); + if (Src[I] == '\n') + MarkEOL(); State = INIT; - // Mark the end of lines in response files - if (MarkEOLs && C == '\n') - NewArgv.push_back(nullptr); - continue; - } - if (C == '"') { + } else if (Src[I] == '\"') { State = QUOTED; - continue; - } - if (C == '\\') { + } else if (Src[I] == '\\') { I = parseBackslash(Src, I, Token); - continue; + } else { + Token.push_back(Src[I]); } - Token.push_back(C); - continue; - } + break; - // QUOTED state means that it's reading a token quoted by double quotes. - if (State == QUOTED) { - if (C == '"') { + case QUOTED: + if (Src[I] == '\"') { if (I < (E - 1) && Src[I + 1] == '"') { // Consecutive double-quotes inside a quoted string implies one // double-quote. Token.push_back('"'); - I = I + 1; - continue; + ++I; + } else { + // Otherwise, end the quoted portion and return to the unquoted state. + State = UNQUOTED; } - State = UNQUOTED; - continue; - } - if (C == '\\') { + } else if (Src[I] == '\\') { I = parseBackslash(Src, I, Token); - continue; + } else { + Token.push_back(Src[I]); } - Token.push_back(C); + break; } } - // Append the last token after hitting EOF with no whitespace. + if (!Token.empty()) - NewArgv.push_back(Saver.save(StringRef(Token)).data()); - // Mark the end of response files - if (MarkEOLs) - NewArgv.push_back(nullptr); + AddToken(Saver.save(Token.str())); +} + +void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, + SmallVectorImpl &NewArgv, + bool MarkEOLs) { + auto AddToken = [&](StringRef Tok) { NewArgv.push_back(Tok.data()); }; + auto OnEOL = [&]() { + if (MarkEOLs) + NewArgv.push_back(nullptr); + }; + tokenizeWindowsCommandLineImpl(Src, Saver, AddToken, + /*AlwaysCopy=*/true, OnEOL); +} + +void cl::TokenizeWindowsCommandLineNoCopy(StringRef Src, StringSaver &Saver, + SmallVectorImpl &NewArgv) { + auto AddToken = [&](StringRef Tok) { NewArgv.push_back(Tok); }; + auto OnEOL = []() {}; + tokenizeWindowsCommandLineImpl(Src, Saver, AddToken, /*AlwaysCopy=*/false, + OnEOL); } void cl::tokenizeConfigFile(StringRef Source, StringSaver &Saver, diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index 97d5ffaadf827..27d92f0e0aec2 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -74,10 +74,10 @@ Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer, Error zlib::uncompress(StringRef InputBuffer, SmallVectorImpl &UncompressedBuffer, size_t UncompressedSize) { - UncompressedBuffer.resize(UncompressedSize); + UncompressedBuffer.reserve(UncompressedSize); Error E = uncompress(InputBuffer, UncompressedBuffer.data(), UncompressedSize); - UncompressedBuffer.resize(UncompressedSize); + UncompressedBuffer.set_size(UncompressedSize); return E; } diff --git a/llvm/lib/Support/FileCollector.cpp b/llvm/lib/Support/FileCollector.cpp index f6d72685a8c3b..cb83d845695e8 100644 --- a/llvm/lib/Support/FileCollector.cpp +++ b/llvm/lib/Support/FileCollector.cpp @@ -34,7 +34,6 @@ static bool isCaseSensitivePath(StringRef Path) { FileCollector::FileCollector(std::string Root, std::string OverlayRoot) : Root(std::move(Root)), OverlayRoot(std::move(OverlayRoot)) { - sys::fs::create_directories(this->Root, true); } bool FileCollector::getRealPath(StringRef SrcPath, @@ -150,6 +149,13 @@ copyAccessAndModificationTime(StringRef Filename, } std::error_code FileCollector::copyFiles(bool StopOnError) { + auto Err = sys::fs::create_directories(Root, /*IgnoreExisting=*/true); + if (Err) { + return Err; + } + + std::lock_guard lock(Mutex); + for (auto &entry : VFSWriter.getMappings()) { // Create directory tree. if (std::error_code EC = diff --git a/llvm/lib/Support/FileOutputBuffer.cpp b/llvm/lib/Support/FileOutputBuffer.cpp index 0a5306f684d4e..ec12820e9692c 100644 --- a/llvm/lib/Support/FileOutputBuffer.cpp +++ b/llvm/lib/Support/FileOutputBuffer.cpp @@ -172,6 +172,10 @@ FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) { if (Flags & F_executable) Mode |= fs::all_exe; + // If Size is zero, don't use mmap which will fail with EINVAL. + if (Size == 0) + return createInMemoryBuffer(Path, Size, Mode); + fs::file_status Stat; fs::status(Path, Stat); diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp index b95f92c86e996..e4a86bb69de4d 100644 --- a/llvm/lib/Support/FileUtilities.cpp +++ b/llvm/lib/Support/FileUtilities.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/FileUtilities.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" @@ -92,9 +93,9 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P, // If one of the positions is at a space and the other isn't, chomp up 'til // the end of the space. - while (isspace(static_cast(*F1P)) && F1P != F1End) + while (isSpace(static_cast(*F1P)) && F1P != F1End) ++F1P; - while (isspace(static_cast(*F2P)) && F2P != F2End) + while (isSpace(static_cast(*F2P)) && F2P != F2End) ++F2P; // If we stop on numbers, compare their difference. diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index ae8fd66bac306..67db360a3954a 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -231,6 +231,16 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { } } + if (Implementer == "0x4e") { // NVIDIA Corporation + for (unsigned I = 0, E = Lines.size(); I != E; ++I) { + if (Lines[I].startswith("CPU part")) { + return StringSwitch(Lines[I].substr(8).ltrim("\t :")) + .Case("0x004", "carmel") + .Default("generic"); + } + } + } + if (Implementer == "0x48") // HiSilicon Technologies, Inc. // Look for the CPU part line. for (unsigned I = 0, E = Lines.size(); I != E; ++I) diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp index 6f065b9c5a3c8..775629074f6c4 100644 --- a/llvm/lib/Support/Path.cpp +++ b/llvm/lib/Support/Path.cpp @@ -540,15 +540,9 @@ void native(SmallVectorImpl &Path, Style style) { Path = PathHome; } } else { - for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI) { - if (*PI == '\\') { - auto PN = PI + 1; - if (PN < PE && *PN == '\\') - ++PI; // increment once, the for loop will move over the escaped slash - else - *PI = '/'; - } - } + for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI) + if (*PI == '\\') + *PI = '/'; } } @@ -684,43 +678,69 @@ StringRef remove_leading_dotslash(StringRef Path, Style style) { return Path; } -static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot, - Style style) { +// Remove path traversal components ("." and "..") when possible, and +// canonicalize slashes. +bool remove_dots(SmallVectorImpl &the_path, bool remove_dot_dot, + Style style) { + style = real_style(style); + StringRef remaining(the_path.data(), the_path.size()); + bool needs_change = false; SmallVector components; - // Skip the root path, then look for traversal in the components. - StringRef rel = path::relative_path(path, style); - for (StringRef C : - llvm::make_range(path::begin(rel, style), path::end(rel))) { - if (C == ".") - continue; - // Leading ".." will remain in the path unless it's at the root. - if (remove_dot_dot && C == "..") { + // Consume the root path, if present. + StringRef root = path::root_path(remaining, style); + bool absolute = !root.empty(); + if (absolute) + remaining = remaining.drop_front(root.size()); + + // Loop over path components manually. This makes it easier to detect + // non-preferred slashes and double separators that must be canonicalized. + while (!remaining.empty()) { + size_t next_slash = remaining.find_first_of(separators(style)); + if (next_slash == StringRef::npos) + next_slash = remaining.size(); + StringRef component = remaining.take_front(next_slash); + remaining = remaining.drop_front(next_slash); + + // Eat the slash, and check if it is the preferred separator. + if (!remaining.empty()) { + needs_change |= remaining.front() != preferred_separator(style); + remaining = remaining.drop_front(); + // The path needs to be rewritten if it has a trailing slash. + // FIXME: This is emergent behavior that could be removed. + needs_change |= remaining.empty(); + } + + // Check for path traversal components or double separators. + if (component.empty() || component == ".") { + needs_change = true; + } else if (remove_dot_dot && component == "..") { + needs_change = true; + // Do not allow ".." to remove the root component. If this is the + // beginning of a relative path, keep the ".." component. if (!components.empty() && components.back() != "..") { components.pop_back(); - continue; + } else if (!absolute) { + components.push_back(component); } - if (path::is_absolute(path, style)) - continue; + } else { + components.push_back(component); } - components.push_back(C); } - SmallString<256> buffer = path::root_path(path, style); - for (StringRef C : components) - path::append(buffer, style, C); - return buffer; -} - -bool remove_dots(SmallVectorImpl &path, bool remove_dot_dot, - Style style) { - StringRef p(path.data(), path.size()); - - SmallString<256> result = remove_dots(p, remove_dot_dot, style); - if (result == path) + // Avoid rewriting the path unless we have to. + if (!needs_change) return false; - path.swap(result); + SmallString<256> buffer = root; + if (!components.empty()) { + buffer += components[0]; + for (StringRef C : makeArrayRef(components).drop_front()) { + buffer += preferred_separator(style); + buffer += C; + } + } + the_path.swap(buffer); return true; } diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp index 9ece0c5a3b62b..6d5fe7165f633 100644 --- a/llvm/lib/Support/SmallVector.cpp +++ b/llvm/lib/Support/SmallVector.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" +#include using namespace llvm; // Check that no bytes are wasted and everything is well-aligned. @@ -37,24 +38,30 @@ static_assert(sizeof(SmallVector) == sizeof(unsigned) * 2 + sizeof(void *) * 2, "wasted space in SmallVector size 1"); -/// grow_pod - This is an implementation of the grow() method which only works -/// on POD-like datatypes and is out of line to reduce code duplication. -/// This function will report a fatal error if it cannot increase capacity. -void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity, - size_t TSize) { - // Ensure we can fit the new capacity in 32 bits. - if (MinCapacity > UINT32_MAX) +static_assert(sizeof(SmallVector) == + sizeof(void *) * 2 + sizeof(void *), + "1 byte elements have word-sized type for size and capacity"); + +// Note: Moving this function into the header may cause performance regression. +template +void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity, + size_t TSize) { + // Ensure we can fit the new capacity. + // This is only going to be applicable when the capacity is 32 bit. + if (MinCapacity > SizeTypeMax()) report_bad_alloc_error("SmallVector capacity overflow during allocation"); // Ensure we can meet the guarantee of space for at least one more element. // The above check alone will not catch the case where grow is called with a // default MinCapacity of 0, but the current capacity cannot be increased. - if (capacity() == size_t(UINT32_MAX)) + // This is only going to be applicable when the capacity is 32 bit. + if (capacity() == SizeTypeMax()) report_bad_alloc_error("SmallVector capacity unable to grow"); + // In theory 2*capacity can overflow if the capacity is 64 bit, but the + // original capacity would never be large enough for this to be a problem. size_t NewCapacity = 2 * capacity() + 1; // Always grow. - NewCapacity = - std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX)); + NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax()); void *NewElts; if (BeginX == FirstEl) { @@ -70,3 +77,20 @@ void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity, this->BeginX = NewElts; this->Capacity = NewCapacity; } + +template class llvm::SmallVectorBase; + +// Disable the uint64_t instantiation for 32-bit builds. +// Both uint32_t and uint64_t instantations are needed for 64-bit builds. +// This instantiation will never be used in 32-bit builds, and will cause +// warnings when sizeof(Size_T) > sizeof(size_t). +#if SIZE_MAX > UINT32_MAX +template class llvm::SmallVectorBase; + +// Assertions to ensure this #if stays in sync with SmallVectorSizeType. +static_assert(sizeof(SmallVectorSizeType) == sizeof(uint64_t), + "Expected SmallVectorBase variant to be in use."); +#else +static_assert(sizeof(SmallVectorSizeType) == sizeof(uint32_t), + "Expected SmallVectorBase variant to be in use."); +#endif diff --git a/llvm/lib/Support/TarWriter.cpp b/llvm/lib/Support/TarWriter.cpp index 6c62f8861afd4..c7a744f0fc98c 100644 --- a/llvm/lib/Support/TarWriter.cpp +++ b/llvm/lib/Support/TarWriter.cpp @@ -131,7 +131,17 @@ static bool splitUstar(StringRef Path, StringRef &Prefix, StringRef &Name) { return true; } - size_t Sep = Path.rfind('/', sizeof(UstarHeader::Prefix) + 1); + // tar 1.13 and earlier unconditionally look at the tar header interpreted + // as an 'oldgnu_header', which has an 'isextended' byte at offset 482 in the + // header, corresponding to offset 137 in the prefix. That's the version of + // tar in gnuwin, so only use 137 of the 155 bytes in the prefix. This means + // we'll need a pax header after 237 bytes of path instead of after 255, + // but in return paths up to 237 bytes work with gnuwin, instead of just + // 137 bytes of directory + 100 bytes of basename previously. + // (tar-1.13 also doesn't support pax headers, but in practice all paths in + // llvm's test suite are short enough for that to not matter.) + const int MaxPrefix = 137; + size_t Sep = Path.rfind('/', MaxPrefix + 1); if (Sep == StringRef::npos) return false; if (Path.size() - Sep - 1 >= sizeof(UstarHeader::Name)) diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp index 5aa5815d7272c..46a1990cd7196 100644 --- a/llvm/lib/Support/ThreadPool.cpp +++ b/llvm/lib/Support/ThreadPool.cpp @@ -21,8 +21,7 @@ using namespace llvm; #if LLVM_ENABLE_THREADS ThreadPool::ThreadPool(ThreadPoolStrategy S) - : ActiveThreads(0), EnableFlag(true), - ThreadCount(S.compute_thread_count()) { + : ThreadCount(S.compute_thread_count()) { // Create ThreadCount threads that will loop forever, wait on QueueCondition // for tasks to be queued or the Pool to be destroyed. Threads.reserve(ThreadCount); @@ -44,24 +43,24 @@ ThreadPool::ThreadPool(ThreadPoolStrategy S) // We first need to signal that we are active before popping the queue // in order for wait() to properly detect that even if the queue is // empty, there is still a task in flight. - { - std::unique_lock LockGuard(CompletionLock); - ++ActiveThreads; - } + ++ActiveThreads; Task = std::move(Tasks.front()); Tasks.pop(); } // Run the task we just grabbed Task(); + bool Notify; { // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait() - std::unique_lock LockGuard(CompletionLock); + std::lock_guard LockGuard(QueueLock); --ActiveThreads; + Notify = workCompletedUnlocked(); } - - // Notify task completion, in case someone waits on ThreadPool::wait() - CompletionCondition.notify_all(); + // Notify task completion if this is the last active thread, in case + // someone waits on ThreadPool::wait(). + if (Notify) + CompletionCondition.notify_all(); } }); } @@ -69,12 +68,8 @@ ThreadPool::ThreadPool(ThreadPoolStrategy S) void ThreadPool::wait() { // Wait for all threads to complete and the queue to be empty - std::unique_lock LockGuard(CompletionLock); - // The order of the checks for ActiveThreads and Tasks.empty() matters because - // any active threads might be modifying the Tasks queue, and this would be a - // race. - CompletionCondition.wait(LockGuard, - [&] { return !ActiveThreads && Tasks.empty(); }); + std::unique_lock LockGuard(QueueLock); + CompletionCondition.wait(LockGuard, [&] { return workCompletedUnlocked(); }); } std::shared_future ThreadPool::asyncImpl(TaskTy Task) { @@ -109,7 +104,7 @@ ThreadPool::~ThreadPool() { // No threads are launched, issue a warning if ThreadCount is not 0 ThreadPool::ThreadPool(ThreadPoolStrategy S) - : ActiveThreads(0), ThreadCount(S.compute_thread_count()) { + : ThreadCount(S.compute_thread_count()) { if (ThreadCount != 1) { errs() << "Warning: request a ThreadPool with " << ThreadCount << " threads, but LLVM_ENABLE_THREADS has been turned off\n"; diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc index 001ab81b23af9..783a7ace10054 100644 --- a/llvm/lib/Support/Unix/Path.inc +++ b/llvm/lib/Support/Unix/Path.inc @@ -1138,6 +1138,19 @@ bool home_directory(SmallVectorImpl &result) { return true; } +bool cache_directory(SmallVectorImpl &result) { + if (const char *RequestedDir = getenv("XDG_CACHE_HOME")) { + result.clear(); + result.append(RequestedDir, RequestedDir + strlen(RequestedDir)); + return true; + } + if (!home_directory(result)) { + return false; + } + append(result, ".cache"); + return true; +} + static bool getDarwinConfDir(bool TempDir, SmallVectorImpl &Result) { #if defined(_CS_DARWIN_USER_TEMP_DIR) && defined(_CS_DARWIN_USER_CACHE_DIR) // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR. diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index 0eadefb689fd2..ec62e656ddf0e 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -101,18 +101,13 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl &Path16, } // Remove '.' and '..' because long paths treat these as real path components. + llvm::sys::path::native(Path8Str, path::Style::windows); llvm::sys::path::remove_dots(Path8Str, true); const StringRef RootName = llvm::sys::path::root_name(Path8Str); assert(!RootName.empty() && "Root name cannot be empty for an absolute path!"); - // llvm::sys::path::remove_dots, used above, can leave a '/' after the root - // name and long paths must use '\' as the separator. - const size_t RootNameSize = RootName.size(); - if (RootNameSize < Path8Str.size() && Path8Str[RootNameSize] == '/') - Path8Str[RootNameSize] = '\\'; - SmallString<2 * MAX_PATH> FullPath(LongPathPrefix); if (RootName[1] != ':') { // Check if UNC. FullPath.append("UNC\\"); @@ -1372,6 +1367,10 @@ bool home_directory(SmallVectorImpl &result) { return getKnownFolderPath(FOLDERID_Profile, result); } +bool cache_directory(SmallVectorImpl &result) { + return getKnownFolderPath(FOLDERID_LocalAppData, result); +} + static bool getTempDirEnvVar(const wchar_t *Var, SmallVectorImpl &Res) { SmallVector Buf; size_t Size = 1024; diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index d2ab0ff898aa8..6337607a4a04d 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -142,7 +142,7 @@ def FeatureStrictAlign : SubtargetFeature<"strict-align", "Disallow all unaligned memory " "access">; -foreach i = {1-7,9-15,18,20-28} in +foreach i = {1-7,9-15,18,20-28,30} in def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true", "Reserve X"#i#", making it unavailable " "as a GPR">; @@ -605,6 +605,14 @@ def ProcA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX", FeatureComplxNum ]>; +def ProcCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel", + "Nvidia Carmel processors", [ + HasV8_2aOps, + FeatureNEON, + FeatureCrypto, + FeatureFullFP16 + ]>; + // Note that cyclone does not fuse AES instructions, but newer apple chips do // perform the fusion and cyclone is used by default when targetting apple OSes. def ProcAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7", @@ -947,6 +955,9 @@ def : ProcessorModel<"apple-latest", CycloneModel, [ProcAppleA13]>; // FIXME: Scheduling model is not implemented yet. def : ProcessorModel<"a64fx", NoSchedModel, [ProcA64FX]>; +// Nvidia Carmel +def : ProcessorModel<"carmel", NoSchedModel, [ProcCarmel]>; + //===----------------------------------------------------------------------===// // Assembly parser //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index bcf7436dc3a95..c4f260145afff 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -3092,7 +3092,7 @@ bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { if (auto CNode = dyn_cast(N)) { int64_t ImmVal = CNode->getSExtValue(); SDLoc DL(N); - if (ImmVal >= -127 && ImmVal < 127) { + if (ImmVal >= -128 && ImmVal < 128) { Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); return true; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9d8151dd0e99c..d5e549cdee56a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -188,10 +188,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::SSUBSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); - setOperationAction(ISD::SMAX, VT, Legal); - setOperationAction(ISD::UMAX, VT, Legal); - setOperationAction(ISD::SMIN, VT, Legal); - setOperationAction(ISD::UMIN, VT, Legal); } for (auto VT : @@ -887,6 +883,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SDIV, VT, Custom); setOperationAction(ISD::UDIV, VT, Custom); + setOperationAction(ISD::SMIN, VT, Custom); + setOperationAction(ISD::UMIN, VT, Custom); + setOperationAction(ISD::SMAX, VT, Custom); + setOperationAction(ISD::UMAX, VT, Custom); } } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); @@ -1285,6 +1285,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ"; case AArch64ISD::SDIV_PRED: return "AArch64ISD::SDIV_PRED"; case AArch64ISD::UDIV_PRED: return "AArch64ISD::UDIV_PRED"; + case AArch64ISD::SMIN_PRED: return "AArch64ISD::SMIN_PRED"; + case AArch64ISD::UMIN_PRED: return "AArch64ISD::UMIN_PRED"; + case AArch64ISD::SMAX_PRED: return "AArch64ISD::SMAX_PRED"; + case AArch64ISD::UMAX_PRED: return "AArch64ISD::UMAX_PRED"; case AArch64ISD::ADC: return "AArch64ISD::ADC"; case AArch64ISD::SBC: return "AArch64ISD::SBC"; case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; @@ -1366,6 +1370,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::REV: return "AArch64ISD::REV"; case AArch64ISD::REINTERPRET_CAST: return "AArch64ISD::REINTERPRET_CAST"; case AArch64ISD::TBL: return "AArch64ISD::TBL"; + case AArch64ISD::FADDA_PRED: return "AArch64ISD::FADDA_PRED"; + case AArch64ISD::FADDV_PRED: return "AArch64ISD::FADDV_PRED"; + case AArch64ISD::FMAXV_PRED: return "AArch64ISD::FMAXV_PRED"; + case AArch64ISD::FMAXNMV_PRED: return "AArch64ISD::FMAXNMV_PRED"; + case AArch64ISD::FMINV_PRED: return "AArch64ISD::FMINV_PRED"; + case AArch64ISD::FMINNMV_PRED: return "AArch64ISD::FMINNMV_PRED"; case AArch64ISD::NOT: return "AArch64ISD::NOT"; case AArch64ISD::BIT: return "AArch64ISD::BIT"; case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; @@ -3348,9 +3358,17 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::SDIV: - return LowerDIV(Op, DAG, AArch64ISD::SDIV_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED); case ISD::UDIV: - return LowerDIV(Op, DAG, AArch64ISD::UDIV_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED); + case ISD::SMIN: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED); + case ISD::UMIN: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED); + case ISD::SMAX: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED); + case ISD::UMAX: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED); case ISD::SRA: case ISD::SRL: case ISD::SHL: @@ -7657,7 +7675,7 @@ SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op, return DAG.getNode(ISD::BITCAST, DL, VT, TBL); } -SDValue AArch64TargetLowering::LowerDIV(SDValue Op, +SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const { EVT VT = Op.getValueType(); @@ -11308,6 +11326,46 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, return DAG.getZExtOrTrunc(Res, DL, VT); } +static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc, + SelectionDAG &DAG) { + SDLoc DL(N); + + SDValue Pred = N->getOperand(1); + SDValue VecToReduce = N->getOperand(2); + + EVT ReduceVT = VecToReduce.getValueType(); + SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce); + + // SVE reductions set the whole vector register with the first element + // containing the reduction result, which we'll now extract. + SDValue Zero = DAG.getConstant(0, DL, MVT::i64); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce, + Zero); +} + +static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc, + SelectionDAG &DAG) { + SDLoc DL(N); + + SDValue Pred = N->getOperand(1); + SDValue InitVal = N->getOperand(2); + SDValue VecToReduce = N->getOperand(3); + EVT ReduceVT = VecToReduce.getValueType(); + + // Ordered reductions use the first lane of the result vector as the + // reduction's initial value. + SDValue Zero = DAG.getConstant(0, DL, MVT::i64); + InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT, + DAG.getUNDEF(ReduceVT), InitVal, Zero); + + SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce); + + // SVE reductions set the whole vector register with the first element + // containing the reduction result, which we'll now extract. + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce, + Zero); +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -11389,8 +11447,32 @@ static SDValue performIntrinsicCombine(SDNode *N, return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_udiv: - return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_smin: + return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_umin: + return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_smax: + return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_umax: + return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_fadda: + return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG); + case Intrinsic::aarch64_sve_faddv: + return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG); + case Intrinsic::aarch64_sve_fmaxnmv: + return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG); + case Intrinsic::aarch64_sve_fmaxv: + return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG); + case Intrinsic::aarch64_sve_fminnmv: + return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG); + case Intrinsic::aarch64_sve_fminv: + return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG); case Intrinsic::aarch64_sve_sel: return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index fe67d75fab178..d356e8bf68a4b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -55,6 +55,10 @@ enum NodeType : unsigned { // Arithmetic instructions SDIV_PRED, UDIV_PRED, + SMIN_PRED, + UMIN_PRED, + SMAX_PRED, + UMAX_PRED, // Arithmetic instructions which write flags. ADDS, @@ -215,6 +219,14 @@ enum NodeType : unsigned { REV, TBL, + // Floating-point reductions. + FADDA_PRED, + FADDV_PRED, + FMAXV_PRED, + FMAXNMV_PRED, + FMINV_PRED, + FMINNMV_PRED, + INSR, PTEST, PTRUE, @@ -785,8 +797,8 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerDIV(SDValue Op, SelectionDAG &DAG, - unsigned NewOp) const; + SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, + unsigned NewOp) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 0bc805cc3e9d6..d7334f627f6c3 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -152,9 +152,14 @@ class AArch64InstructionSelector : public InstructionSelector { MachineInstr *emitVectorConcat(Optional Dst, Register Op1, Register Op2, MachineIRBuilder &MIRBuilder) const; - MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, - MachineOperand &Predicate, - MachineIRBuilder &MIRBuilder) const; + + // Emit an integer compare between LHS and RHS, which checks for Predicate. + // + // This may update Predicate when emitting the compare. + std::pair + emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, + MachineOperand &Predicate, + MachineIRBuilder &MIRBuilder) const; MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, @@ -300,6 +305,10 @@ class AArch64InstructionSelector : public InstructionSelector { MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; + MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS, + MachineOperand &RHS, + MachineOperand &Predicate, + MachineIRBuilder &MIB) const; /// Return true if \p MI is a load or store of \p NumBytes bytes. bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; @@ -460,6 +469,27 @@ static unsigned getMinSizeForRegBank(const RegisterBank &RB) { } } +static Optional getImmedFromMO(const MachineOperand &Root) { + auto &MI = *Root.getParent(); + auto &MBB = *MI.getParent(); + auto &MF = *MBB.getParent(); + auto &MRI = MF.getRegInfo(); + uint64_t Immed; + if (Root.isImm()) + Immed = Root.getImm(); + else if (Root.isCImm()) + Immed = Root.getCImm()->getZExtValue(); + else if (Root.isReg()) { + auto ValAndVReg = + getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); + if (!ValAndVReg) + return None; + Immed = ValAndVReg->Value; + } else + return None; + return Immed; +} + /// Check whether \p I is a currently unsupported binary operation: /// - it has an unsized type /// - an operand is not a vreg @@ -706,7 +736,6 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { - Register DstReg = I.getOperand(0).getReg(); Register SrcReg = I.getOperand(1).getReg(); const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); @@ -739,13 +768,15 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"); - assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI)); - (void)KnownValid; - return true; + bool ValidCopy = true; +#ifndef NDEBUG + ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI); + assert(ValidCopy && "Invalid copy."); +#endif + return ValidCopy; }; - // Is this a copy? If so, then we may need to insert a subregister copy, or - // a SUBREG_TO_REG. + // Is this a copy? If so, then we may need to insert a subregister copy. if (I.isCopy()) { // Yes. Check if there's anything to fix up. if (!SrcRC) { @@ -755,51 +786,43 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); unsigned DstSize = TRI.getRegSizeInBits(*DstRC); + unsigned SubReg; - // If the source register is bigger than the destination we need to perform - // a subregister copy. - if (SrcSize > DstSize) { - unsigned SubReg = 0; + // If the source bank doesn't support a subregister copy small enough, + // then we first need to copy to the destination bank. + if (getMinSizeForRegBank(SrcRegBank) > DstSize) { + const TargetRegisterClass *DstTempRC = + getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true); + getSubRegForClass(DstRC, TRI, SubReg); - // If the source bank doesn't support a subregister copy small enough, - // then we first need to copy to the destination bank. - if (getMinSizeForRegBank(SrcRegBank) > DstSize) { - const TargetRegisterClass *SubregRC = getMinClassForRegBank( - DstRegBank, SrcSize, /* GetAllRegSet = */ true); - getSubRegForClass(DstRC, TRI, SubReg); - - MachineIRBuilder MIB(I); - auto Copy = MIB.buildCopy({SubregRC}, {SrcReg}); - copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); - } else { - const TargetRegisterClass *SubregRC = getMinClassForRegBank( - SrcRegBank, DstSize, /* GetAllRegSet = */ true); - getSubRegForClass(SubregRC, TRI, SubReg); - copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); - } - - return CheckCopy(); - } - - // Is this a cross-bank copy? - if (DstRegBank.getID() != SrcRegBank.getID()) { - if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 && - SrcSize == 16) { - // Special case for FPR16 to GPR32. - // FIXME: This can probably be generalized like the above case. - Register PromoteReg = - MRI.createVirtualRegister(&AArch64::FPR32RegClass); - BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(AArch64::SUBREG_TO_REG), PromoteReg) - .addImm(0) - .addUse(SrcReg) - .addImm(AArch64::hsub); - MachineOperand &RegOp = I.getOperand(1); - RegOp.setReg(PromoteReg); + MachineIRBuilder MIB(I); + auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg}); + copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); + } else if (SrcSize > DstSize) { + // If the source register is bigger than the destination we need to + // perform a subregister copy. + const TargetRegisterClass *SubRegRC = + getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); + getSubRegForClass(SubRegRC, TRI, SubReg); + copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); + } else if (DstSize > SrcSize) { + // If the destination register is bigger than the source we need to do + // a promotion using SUBREG_TO_REG. + const TargetRegisterClass *PromotionRC = + getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); + getSubRegForClass(SrcRC, TRI, SubReg); + + Register PromoteReg = MRI.createVirtualRegister(PromotionRC); + BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(AArch64::SUBREG_TO_REG), PromoteReg) + .addImm(0) + .addUse(SrcReg) + .addImm(SubReg); + MachineOperand &RegOp = I.getOperand(1); + RegOp.setReg(PromoteReg); - // Promise that the copy is implicitly validated by the SUBREG_TO_REG. - KnownValid = true; - } + // Promise that the copy is implicitly validated by the SUBREG_TO_REG. + KnownValid = true; } // If the destination is a physical register, then there's nothing to @@ -1241,7 +1264,8 @@ bool AArch64InstructionSelector::selectCompareBranch( Register RHS = CCMI->getOperand(3).getReg(); auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); MachineIRBuilder MIB(I); - const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); + CmpInst::Predicate Pred = + (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI); // When we can emit a TB(N)Z, prefer that. @@ -1279,8 +1303,10 @@ bool AArch64InstructionSelector::selectCompareBranch( if (!VRegAndVal || VRegAndVal->Value != 0) { // If we can't select a CBZ then emit a cmp + Bcc. - if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3), - CCMI->getOperand(1), MIB)) + MachineInstr *Cmp; + std::tie(Cmp, Pred) = emitIntegerCompare( + CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB); + if (!Cmp) return false; const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred); MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); @@ -2625,11 +2651,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } MachineIRBuilder MIRBuilder(I); - if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), - MIRBuilder)) + MachineInstr *Cmp; + CmpInst::Predicate Pred; + std::tie(Cmp, Pred) = emitIntegerCompare(I.getOperand(2), I.getOperand(3), + I.getOperand(1), MIRBuilder); + if (!Cmp) return false; - emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(), - MIRBuilder); + emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder); I.eraseFromParent(); return true; } @@ -3646,7 +3674,8 @@ AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS, return &*TstMI; } -MachineInstr *AArch64InstructionSelector::emitIntegerCompare( +std::pair +AArch64InstructionSelector::emitIntegerCompare( MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const { assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); @@ -3656,7 +3685,7 @@ MachineInstr *AArch64InstructionSelector::emitIntegerCompare( MachineInstr *FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder); if (FoldCmp) - return FoldCmp; + return {FoldCmp, (CmpInst::Predicate)Predicate.getPredicate()}; // Can't fold into a CMN. Just emit a normal compare. unsigned CmpOpc = 0; @@ -3672,26 +3701,20 @@ MachineInstr *AArch64InstructionSelector::emitIntegerCompare( CmpOpc = AArch64::SUBSXrr; ZReg = AArch64::XZR; } else { - return nullptr; + return {nullptr, CmpInst::Predicate::BAD_ICMP_PREDICATE}; } // Try to match immediate forms. - auto ImmFns = selectArithImmed(RHS); - if (ImmFns) - CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri; - - auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg()); - // If we matched a valid constant immediate, add those operands. - if (ImmFns) { - for (auto &RenderFn : *ImmFns) - RenderFn(CmpMI); - } else { - CmpMI.addUse(RHS.getReg()); - } + MachineInstr *ImmedCmp = + tryOptArithImmedIntegerCompare(LHS, RHS, Predicate, MIRBuilder); + if (ImmedCmp) + return {ImmedCmp, (CmpInst::Predicate)Predicate.getPredicate()}; + auto CmpMI = + MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()}); // Make sure that we can constrain the compare that we emitted. constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); - return &*CmpMI; + return {&*CmpMI, (CmpInst::Predicate)Predicate.getPredicate()}; } MachineInstr *AArch64InstructionSelector::emitVectorConcat( @@ -3862,13 +3885,21 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { AArch64CC::CondCode CondCode; if (CondOpc == TargetOpcode::G_ICMP) { - CondCode = changeICMPPredToAArch64CC( - (CmpInst::Predicate)CondDef->getOperand(1).getPredicate()); - if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), - CondDef->getOperand(1), MIB)) { + MachineInstr *Cmp; + CmpInst::Predicate Pred; + + std::tie(Cmp, Pred) = + emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), + CondDef->getOperand(1), MIB); + + if (!Cmp) { LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); return false; } + + // Have to collect the CondCode after emitIntegerCompare, since it can + // update the predicate. + CondCode = changeICMPPredToAArch64CC(Pred); } else { // Get the condition code for the select. AArch64CC::CondCode CondCode2; @@ -3998,6 +4029,119 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( return nullptr; } +MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( + MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, + MachineIRBuilder &MIB) const { + // Attempt to select the immediate form of an integer compare. + MachineRegisterInfo &MRI = *MIB.getMRI(); + auto Ty = MRI.getType(LHS.getReg()); + assert(!Ty.isVector() && "Expected scalar or pointer only?"); + unsigned Size = Ty.getSizeInBits(); + assert((Size == 32 || Size == 64) && + "Expected 32 bit or 64 bit compare only?"); + auto P = (CmpInst::Predicate)Predicate.getPredicate(); + + // Check if this is a case we can already handle. + InstructionSelector::ComplexRendererFns ImmFns; + ImmFns = selectArithImmed(RHS); + + if (!ImmFns) { + // We didn't get a rendering function, but we may still have a constant. + auto MaybeImmed = getImmedFromMO(RHS); + if (!MaybeImmed) + return nullptr; + + // We have a constant, but it doesn't fit. Try adjusting it by one and + // updating the predicate if possible. + uint64_t C = *MaybeImmed; + switch (P) { + default: + return nullptr; + case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SGE: + // Check for + // + // x slt c => x sle c - 1 + // x sge c => x sgt c - 1 + // + // When c is not the smallest possible negative number. + if ((Size == 64 && static_cast(C) == INT64_MIN) || + (Size == 32 && static_cast(C) == INT32_MIN)) + return nullptr; + P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; + C -= 1; + break; + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_UGE: + // Check for + // + // x ult c => x ule c - 1 + // x uge c => x ugt c - 1 + // + // When c is not zero. + if (C == 0) + return nullptr; + P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; + C -= 1; + break; + case CmpInst::ICMP_SLE: + case CmpInst::ICMP_SGT: + // Check for + // + // x sle c => x slt c + 1 + // x sgt c => s sge c + 1 + // + // When c is not the largest possible signed integer. + if ((Size == 32 && static_cast(C) == INT32_MAX) || + (Size == 64 && static_cast(C) == INT64_MAX)) + return nullptr; + P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; + C += 1; + break; + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_UGT: + // Check for + // + // x ule c => x ult c + 1 + // x ugt c => s uge c + 1 + // + // When c is not the largest possible unsigned integer. + if ((Size == 32 && static_cast(C) == UINT32_MAX) || + (Size == 64 && C == UINT64_MAX)) + return nullptr; + P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; + C += 1; + break; + } + + // Check if the new constant is valid. + if (Size == 32) + C = static_cast(C); + ImmFns = select12BitValueWithLeftShift(C); + if (!ImmFns) + return nullptr; + Predicate.setPredicate(P); + } + + // At this point, we know we can select an immediate form. Go ahead and do + // that. + Register ZReg; + unsigned Opc; + if (Size == 32) { + ZReg = AArch64::WZR; + Opc = AArch64::SUBSWri; + } else { + ZReg = AArch64::XZR; + Opc = AArch64::SUBSXri; + } + + auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()}); + for (auto &RenderFn : *ImmFns) + RenderFn(CmpMI); + constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); + return &*CmpMI; +} + bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const { // Try to match a vector splat operation into a dup instruction. // We're looking for this pattern: @@ -4520,27 +4664,6 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, return false; } -static Optional getImmedFromMO(const MachineOperand &Root) { - auto &MI = *Root.getParent(); - auto &MBB = *MI.getParent(); - auto &MF = *MBB.getParent(); - auto &MRI = MF.getRegInfo(); - uint64_t Immed; - if (Root.isImm()) - Immed = Root.getImm(); - else if (Root.isCImm()) - Immed = Root.getCImm()->getZExtValue(); - else if (Root.isReg()) { - auto ValAndVReg = - getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); - if (!ValAndVReg) - return None; - Immed = ValAndVReg->Value; - } else - return None; - return Immed; -} - InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { auto MaybeImmed = getImmedFromMO(Root); diff --git a/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp b/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp index 9135f1b401223..9a5615ac1c09d 100644 --- a/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp +++ b/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp @@ -304,7 +304,7 @@ static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, // Do not mess with inline asm. const CallInst *CI = dyn_cast(Instr); - return !(CI && isa(CI->getCalledValue())); + return !(CI && CI->isInlineAsm()); } /// Check if the given Cst should be converted into diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 7864cc1fb6367..1593a2c0b2f45 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -134,28 +134,37 @@ def sve_cntw_imm_neg : ComplexPattern">; def sve_cntd_imm_neg : ComplexPattern">; def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>; - -def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>; -def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>; -def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>; -def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>; -def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>; -def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>; -def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>; -def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>; -def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>; - -def SDT_AArch64DIV : SDTypeProfile<1, 3, [ +def AArch64faddv_pred : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>; +def AArch64fmaxv_pred : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>; +def AArch64fmaxnmv_pred : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>; +def AArch64fminv_pred : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>; +def AArch64fminnmv_pred : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>; +def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>; +def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>; +def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>; +def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>; +def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>; +def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>; +def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>; +def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>; +def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>; + +def SDT_AArch64Arith : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3> ]>; -def AArch64sdiv_pred : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64DIV>; -def AArch64udiv_pred : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64DIV>; +def AArch64sdiv_pred : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; +def AArch64udiv_pred : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; +def AArch64smin_pred : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>; +def AArch64umin_pred : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; +def AArch64smax_pred : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>; +def AArch64umax_pred : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; +def AArch64fadda_pred : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>; @@ -227,10 +236,10 @@ let Predicates = [HasSVE] in { defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>; defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>; - defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", smax>; - defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", smin>; - defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", umax>; - defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", umin>; + defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_pred>; + defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_pred>; + defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_pred>; + defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_pred>; defm MUL_ZI : sve_int_arith_imm2<"mul", mul>; defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>; @@ -275,10 +284,10 @@ let Predicates = [HasSVE] in { defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>; - defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", int_aarch64_sve_smax>; - defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", int_aarch64_sve_umax>; - defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", int_aarch64_sve_smin>; - defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", int_aarch64_sve_umin>; + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", AArch64smax_pred>; + defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", AArch64umax_pred>; + defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", AArch64smin_pred>; + defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", AArch64umin_pred>; defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>; defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>; @@ -352,12 +361,21 @@ let Predicates = [HasSVE] in { defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>; // SVE floating point reductions. - defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", int_aarch64_sve_fadda>; - defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", int_aarch64_sve_faddv>; - defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", int_aarch64_sve_fmaxnmv>; - defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", int_aarch64_sve_fminnmv>; - defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", int_aarch64_sve_fmaxv>; - defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", int_aarch64_sve_fminv>; + defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_pred>; + defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_pred>; + defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_pred>; + defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_pred>; + defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_pred>; + defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_pred>; + + // Use more efficient NEON instructions to extract elements within the NEON + // part (first 128bits) of an SVE register. + def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)), + (f16 (EXTRACT_SUBREG (v8f16 (EXTRACT_SUBREG ZPR:$Zs, zsub)), hsub))>; + def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)), + (f32 (EXTRACT_SUBREG (v4f32 (EXTRACT_SUBREG ZPR:$Zs, zsub)), ssub))>; + def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)), + (f64 (EXTRACT_SUBREG (v2f64 (EXTRACT_SUBREG ZPR:$Zs, zsub)), dsub))>; // Splat immediate (unpredicated) defm DUP_ZI : sve_int_dup_imm<"dup">; @@ -1378,54 +1396,59 @@ multiclass sve_prefetch; } - def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert (nxv8f16 ZPR:$src))), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert (nxv4f32 ZPR:$src))), (nxv16i8 ZPR:$src)>; - def : Pat<(nxv16i8 (bitconvert (nxv2f64 ZPR:$src))), (nxv16i8 ZPR:$src)>; - - def : Pat<(nxv8i16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8i16 ZPR:$src)>; - def : Pat<(nxv8i16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8i16 ZPR:$src)>; - - def : Pat<(nxv4i32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>; - def : Pat<(nxv4i32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4i32 ZPR:$src)>; - - def : Pat<(nxv2i64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2i64 ZPR:$src)>; - def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>; - - def : Pat<(nxv8f16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8f16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8f16 ZPR:$src)>; - - def : Pat<(nxv4f32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4f32 ZPR:$src)>; - def : Pat<(nxv4f32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4f32 ZPR:$src)>; - - def : Pat<(nxv2f64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>; - def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>; + // FIXME: BigEndian requires an additional REV instruction to satisfy the + // constraint that none of the bits change when stored to memory as one + // type, and and reloaded as another type. + let Predicates = [IsLE] in { + def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv8f16 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv4f32 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv2f64 ZPR:$src))), (nxv16i8 ZPR:$src)>; + + def : Pat<(nxv8i16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8i16 ZPR:$src)>; + + def : Pat<(nxv4i32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4i32 ZPR:$src)>; + + def : Pat<(nxv2i64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>; + + def : Pat<(nxv8f16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8f16 ZPR:$src)>; + + def : Pat<(nxv4f32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4f32 ZPR:$src)>; + + def : Pat<(nxv2f64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>; + } def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; @@ -1767,6 +1790,77 @@ multiclass sve_prefetch; + def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i32 FPR32:$src), 0)), + (INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>; + def : Pat<(nxv8i16 (vector_insert (nxv8i16 (undef)), (i32 FPR32:$src), 0)), + (INSERT_SUBREG (nxv8i16 (IMPLICIT_DEF)), FPR32:$src, ssub)>; + def : Pat<(nxv4i32 (vector_insert (nxv4i32 (undef)), (i32 FPR32:$src), 0)), + (INSERT_SUBREG (nxv4i32 (IMPLICIT_DEF)), FPR32:$src, ssub)>; + def : Pat<(nxv2i64 (vector_insert (nxv2i64 (undef)), (i64 FPR64:$src), 0)), + (INSERT_SUBREG (nxv2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + + // Insert scalar into vector[0] + def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), (i32 GPR32:$src), 0)), + (CPY_ZPmR_B ZPR:$vec, (PTRUE_B 1), GPR32:$src)>; + def : Pat<(nxv8i16 (vector_insert (nxv8i16 ZPR:$vec), (i32 GPR32:$src), 0)), + (CPY_ZPmR_H ZPR:$vec, (PTRUE_H 1), GPR32:$src)>; + def : Pat<(nxv4i32 (vector_insert (nxv4i32 ZPR:$vec), (i32 GPR32:$src), 0)), + (CPY_ZPmR_S ZPR:$vec, (PTRUE_S 1), GPR32:$src)>; + def : Pat<(nxv2i64 (vector_insert (nxv2i64 ZPR:$vec), (i64 GPR64:$src), 0)), + (CPY_ZPmR_D ZPR:$vec, (PTRUE_D 1), GPR64:$src)>; + + def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), 0)), + (SEL_ZPZZ_H (PTRUE_H 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), ZPR:$vec)>; + def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), 0)), + (SEL_ZPZZ_S (PTRUE_S 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), ZPR:$vec)>; + def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), 0)), + (SEL_ZPZZ_D (PTRUE_D 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), ZPR:$vec)>; + + // Insert scalar into vector with scalar index + def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), GPR32:$src, GPR64:$index)), + (CPY_ZPmR_B ZPR:$vec, + (CMPEQ_PPzZZ_B (PTRUE_B 31), + (INDEX_II_B 0, 1), + (DUP_ZR_B (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), + GPR32:$src)>; + def : Pat<(nxv8i16 (vector_insert (nxv8i16 ZPR:$vec), GPR32:$src, GPR64:$index)), + (CPY_ZPmR_H ZPR:$vec, + (CMPEQ_PPzZZ_H (PTRUE_H 31), + (INDEX_II_H 0, 1), + (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), + GPR32:$src)>; + def : Pat<(nxv4i32 (vector_insert (nxv4i32 ZPR:$vec), GPR32:$src, GPR64:$index)), + (CPY_ZPmR_S ZPR:$vec, + (CMPEQ_PPzZZ_S (PTRUE_S 31), + (INDEX_II_S 0, 1), + (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), + GPR32:$src)>; + def : Pat<(nxv2i64 (vector_insert (nxv2i64 ZPR:$vec), GPR64:$src, GPR64:$index)), + (CPY_ZPmR_D ZPR:$vec, + (CMPEQ_PPzZZ_D (PTRUE_D 31), + (INDEX_II_D 0, 1), + (DUP_ZR_D GPR64:$index)), + GPR64:$src)>; + + // Insert FP scalar into vector with scalar index + def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)), + (CPY_ZPmV_H ZPR:$vec, + (CMPEQ_PPzZZ_H (PTRUE_H 31), + (INDEX_II_H 0, 1), + (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), + $src)>; + def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)), + (CPY_ZPmV_S ZPR:$vec, + (CMPEQ_PPzZZ_S (PTRUE_S 31), + (INDEX_II_S 0, 1), + (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), + $src)>; + def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), GPR64:$index)), + (CPY_ZPmV_D ZPR:$vec, + (CMPEQ_PPzZZ_D (PTRUE_D 31), + (INDEX_II_D 0, 1), + (DUP_ZR_D $index)), + $src)>; } let Predicates = [HasSVE, HasMatMulInt8] in { diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 2353229445e8b..0a06246f9c127 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -68,6 +68,9 @@ void AArch64Subtarget::initializeProperties() { switch (ARMProcFamily) { case Others: break; + case Carmel: + CacheLineSize = 64; + break; case CortexA35: break; case CortexA53: diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index dab5fe3626d96..ddb3a67e33690 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -45,6 +45,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { AppleA11, AppleA12, AppleA13, + Carmel, CortexA35, CortexA53, CortexA55, diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td index 6cb1b6ef391d0..ceceabc6ff4ed 100644 --- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td +++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td @@ -1331,7 +1331,6 @@ def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>; let Requires = [{ {AArch64::FeatureRASv8_4} }] in { def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>; def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>; -def : RWSysReg<"ERXTS_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b111>; def : RWSysReg<"ERXMISC2_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b010>; def : RWSysReg<"ERXMISC3_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b011>; def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index e67b0e671908b..0c0b3d6407eb5 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -57,7 +57,8 @@ int AArch64TTIImpl::getIntImmCost(int64_t Val) { } /// Calculate the cost of materializing the given constant. -int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -82,7 +83,8 @@ int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { } int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -139,16 +141,17 @@ int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, if (Idx == ImmIdx) { int NumConstants = (BitSize + 63) / 64; - int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); + int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); return (Cost <= NumConstants * TTI::TCC_Basic) ? static_cast(TTI::TCC_Free) : Cost; } - return AArch64TTIImpl::getIntImmCost(Imm, Ty); + return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); } int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -161,7 +164,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, // selected instruction, so we compute the materialization cost for the // immediate directly. if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv) - return AArch64TTIImpl::getIntImmCost(Imm, Ty); + return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); switch (IID) { default: @@ -174,7 +177,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, case Intrinsic::umul_with_overflow: if (Idx == 1) { int NumConstants = (BitSize + 63) / 64; - int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); + int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); return (Cost <= NumConstants * TTI::TCC_Basic) ? static_cast(TTI::TCC_Free) : Cost; @@ -190,7 +193,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, return TTI::TCC_Free; break; } - return AArch64TTIImpl::getIntImmCost(Imm, Ty); + return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); } TargetTransformInfo::PopcntSupportKind @@ -267,6 +270,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode, } int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -295,7 +299,7 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, EVT DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) - return BaseT::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); static const TypeConversionCostTblEntry ConversionTbl[] = { @@ -399,7 +403,7 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, SrcTy.getSimpleVT())) return Entry->Cost; - return BaseT::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); } int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst, @@ -425,17 +429,18 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst, auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy); auto DstVT = TLI->getValueType(DL, Dst); auto SrcVT = TLI->getValueType(DL, Src); + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // If the resulting type is still a vector and the destination type is legal, // we may get the extension for free. If not, get the default cost for the // extend. if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT)) - return Cost + getCastInstrCost(Opcode, Dst, Src); + return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind); // The destination type should be larger than the element type. If not, get // the default cost for the extend. if (DstVT.getSizeInBits() < SrcVT.getSizeInBits()) - return Cost + getCastInstrCost(Opcode, Dst, Src); + return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind); switch (Opcode) { default: @@ -454,7 +459,7 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst, } // If we are unable to perform the extend for free, get the default cost. - return Cost + getCastInstrCost(Opcode, Dst, Src); + return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind); } int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, @@ -483,7 +488,8 @@ int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, } int AArch64TTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { @@ -504,7 +510,8 @@ int AArch64TTIImpl::getArithmeticInstrCost( switch (ISD) { default: - return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); case ISD::SDIV: if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue && @@ -513,16 +520,20 @@ int AArch64TTIImpl::getArithmeticInstrCost( // normally expanded to the sequence ADD + CMP + SELECT + SRA. // The OperandValue properties many not be same as that of previous // operation; conservatively assume OP_None. - Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info, + Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info, + Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info, + Cost += getArithmeticInstrCost(Instruction::Select, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info, + Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); return Cost; @@ -535,31 +546,34 @@ int AArch64TTIImpl::getArithmeticInstrCost( // Vector signed division by constant are expanded to the // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division // to MULHS + SUB + SRL + ADD + SRL. - int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info, - Opd2Info, + int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, - Opd2Info, + int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, - Opd2Info, + int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1; } } - Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); if (Ty->isVectorTy()) { // On AArch64, vector divisions are not supported natively and are // expanded into scalar divisions of each pair of elements. - Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info, - Opd2Info, Opd1PropInfo, Opd2PropInfo); - Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info, - Opd2Info, Opd1PropInfo, Opd2PropInfo); + Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind, + Opd1Info, Opd2Info, Opd1PropInfo, + Opd2PropInfo); + Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind, + Opd1Info, Opd2Info, Opd1PropInfo, + Opd2PropInfo); // TODO: if one of the arguments is scalar, then it's not necessary to // double the cost of handling the vector elements. Cost += Cost; @@ -596,7 +610,9 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, } int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, const Instruction *I) { + Type *CondTy, + TTI::TargetCostKind CostKind, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower some vector selects well that are wider than the register @@ -623,7 +639,7 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return Entry->Cost; } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); } AArch64TTIImpl::TTI::MemCmpExpansionOptions @@ -646,6 +662,7 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { auto LT = TLI->getTypeLegalizationCost(DL, Ty); @@ -688,6 +705,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { assert(Factor >= 2 && "Invalid interleave factor"); @@ -707,18 +725,19 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef Tys) { int Cost = 0; + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; for (auto *I : Tys) { if (!I->isVectorTy()) continue; if (I->getScalarSizeInBits() * cast(I)->getNumElements() == 128) - Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) + - getMemoryOpCost(Instruction::Load, I, Align(128), 0); + Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) + + getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind); } return Cost; } @@ -932,10 +951,12 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, - bool IsPairwiseForm) { + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) { if (IsPairwiseForm) - return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm); + return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, + CostKind); std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; @@ -956,7 +977,8 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy)) return LT.first * Entry->Cost; - return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm); + return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, + CostKind); } int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index b676f63fe0a8b..f7233d364f716 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -72,11 +72,11 @@ class AArch64TTIImpl : public BasicTTIImplBase { using BaseT::getIntImmCost; int getIntImmCost(int64_t Val); - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); /// @} @@ -112,6 +112,7 @@ class AArch64TTIImpl : public BasicTTIImplBase { unsigned getMaxInterleaveFactor(unsigned VF); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, @@ -121,6 +122,7 @@ class AArch64TTIImpl : public BasicTTIImplBase { int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -131,13 +133,16 @@ class AArch64TTIImpl : public BasicTTIImplBase { int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); int getCostOfKeepingLiveOverCall(ArrayRef Tys); @@ -192,6 +197,7 @@ class AArch64TTIImpl : public BasicTTIImplBase { int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); @@ -225,7 +231,8 @@ class AArch64TTIImpl : public BasicTTIImplBase { TTI::ReductionFlags Flags) const; int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm); + bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, VectorType *SubTp); diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 54fab60e33fd2..52d3c166fd505 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -324,6 +324,11 @@ class SVE_1_Op_Imm_Arith_Pat; +class SVE_1_Op_Imm_Arith_Pred_Pat + : Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))), + (inst $Op1, i32:$imm)>; + class SVE_1_Op_Imm_Log_Pat : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i64:$imm)))))), @@ -3840,10 +3845,10 @@ multiclass sve_int_arith_imm1 opc, string asm, SDPatternOperator op> { def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, simm8>; def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, simm8>; - def : SVE_1_Op_Imm_Arith_Pat(NAME # _B)>; - def : SVE_1_Op_Imm_Arith_Pat(NAME # _H)>; - def : SVE_1_Op_Imm_Arith_Pat(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_Pat(NAME # _D)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _B)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _H)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _S)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _D)>; } multiclass sve_int_arith_imm1_unsigned opc, string asm, SDPatternOperator op> { @@ -3852,10 +3857,10 @@ multiclass sve_int_arith_imm1_unsigned opc, string asm, SDPatternOperato def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>; def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>; - def : SVE_1_Op_Imm_Arith_Pat(NAME # _B)>; - def : SVE_1_Op_Imm_Arith_Pat(NAME # _H)>; - def : SVE_1_Op_Imm_Arith_Pat(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_Pat(NAME # _D)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _B)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _H)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _S)>; + def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _D)>; } multiclass sve_int_arith_imm2 { @@ -4444,8 +4449,8 @@ multiclass sve2_int_while_rr rw, string asm, string op> { //===----------------------------------------------------------------------===// class sve_fp_fast_red sz, bits<3> opc, string asm, - ZPRRegOp zprty, RegisterClass dstRegClass> -: I<(outs dstRegClass:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), + ZPRRegOp zprty, FPRasZPROperand dstOpType> +: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), asm, "\t$Vd, $Pg, $Zn", "", []>, Sched<[]> { @@ -4463,13 +4468,13 @@ class sve_fp_fast_red sz, bits<3> opc, string asm, } multiclass sve_fp_fast_red opc, string asm, SDPatternOperator op> { - def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16>; - def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32>; - def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64>; + def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16asZPR>; + def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32asZPR>; + def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64asZPR>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } @@ -4478,8 +4483,8 @@ multiclass sve_fp_fast_red opc, string asm, SDPatternOperator op> { //===----------------------------------------------------------------------===// class sve_fp_2op_p_vd sz, bits<3> opc, string asm, - ZPRRegOp zprty, RegisterClass dstRegClass> -: I<(outs dstRegClass:$Vdn), (ins PPR3bAny:$Pg, dstRegClass:$_Vdn, zprty:$Zm), + ZPRRegOp zprty, FPRasZPROperand dstOpType> +: I<(outs dstOpType:$Vdn), (ins PPR3bAny:$Pg, dstOpType:$_Vdn, zprty:$Zm), asm, "\t$Vdn, $Pg, $_Vdn, $Zm", "", []>, @@ -4500,13 +4505,13 @@ class sve_fp_2op_p_vd sz, bits<3> opc, string asm, } multiclass sve_fp_2op_p_vd opc, string asm, SDPatternOperator op> { - def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16>; - def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32>; - def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64>; + def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16asZPR>; + def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32asZPR>; + def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64asZPR>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 7a691c318b0ff..d241b4899b432 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -288,7 +288,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { for (Instruction &I : BB) { if (auto *CB = dyn_cast(&I)) { const Function *Callee = - dyn_cast(CB->getCalledValue()->stripPointerCasts()); + dyn_cast(CB->getCalledOperand()->stripPointerCasts()); // TODO: Do something with indirect calls. if (!Callee) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 4e1a7842a1ee8..88644e010d35c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -765,7 +765,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( } if (AMDGPU::SReg_32RegClass.contains(Reg) || - AMDGPU::SGPR_LO16RegClass.contains(Reg) || + AMDGPU::SReg_LO16RegClass.contains(Reg) || AMDGPU::SGPR_HI16RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_32RegClass.contains(Reg) && "trap handler registers should not be used"); @@ -776,7 +776,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( AMDGPU::VGPR_HI16RegClass.contains(Reg)) { IsSGPR = false; Width = 1; - } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) { + } else if (AMDGPU::AGPR_32RegClass.contains(Reg) || + AMDGPU::AGPR_LO16RegClass.contains(Reg)) { IsSGPR = false; IsAGPR = true; Width = 1; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 2c8a5c4042105..7c83b6dcb44b9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -89,6 +89,24 @@ def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs< (sequence "VGPR%u", 32, 255) >; +def CSR_AMDGPU_VGPRs : CalleeSavedRegs< + // The CSRs & scratch-registers are interleaved at a split boundary of 8. + (add (sequence "VGPR%u", 40, 47), + (sequence "VGPR%u", 56, 63), + (sequence "VGPR%u", 72, 79), + (sequence "VGPR%u", 88, 95), + (sequence "VGPR%u", 104, 111), + (sequence "VGPR%u", 120, 127), + (sequence "VGPR%u", 136, 143), + (sequence "VGPR%u", 152, 159), + (sequence "VGPR%u", 168, 175), + (sequence "VGPR%u", 184, 191), + (sequence "VGPR%u", 200, 207), + (sequence "VGPR%u", 216, 223), + (sequence "VGPR%u", 232, 239), + (sequence "VGPR%u", 248, 255)) +>; + def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs< (sequence "SGPR%u", 32, 105) >; @@ -104,7 +122,7 @@ def CSR_AMDGPU_AllAllocatableSRegs : CalleeSavedRegs< >; def CSR_AMDGPU_HighRegs : CalleeSavedRegs< - (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_105) + (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_32_105) >; // Calling convention for leaf functions diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp b/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp index 5fbb00908e7eb..b74f2a2777861 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp @@ -34,7 +34,8 @@ class AMDGPUFixFunctionBitcasts final void visitCallBase(CallBase &CB) { if (CB.getCalledFunction()) return; - auto *Callee = dyn_cast(CB.getCalledValue()->stripPointerCasts()); + auto *Callee = + dyn_cast(CB.getCalledOperand()->stripPointerCasts()); if (Callee && isLegalToPromote(CB, Callee)) { promoteCall(CB, Callee); Modified = true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 9b5b0b5a08fa1..2ae0e8bc40aba 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1039,24 +1039,51 @@ void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) { SDValue RHS = N->getOperand(1); SDValue CI = N->getOperand(2); - unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64 - : AMDGPU::V_SUBB_U32_e64; - CurDAG->SelectNodeTo( - N, Opc, N->getVTList(), - {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); + if (N->isDivergent()) { + unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64 + : AMDGPU::V_SUBB_U32_e64; + CurDAG->SelectNodeTo( + N, Opc, N->getVTList(), + {LHS, RHS, CI, + CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); + } else { + unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO + : AMDGPU::S_SUB_CO_PSEUDO; + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI}); + } } void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned // carry out despite the _i32 name. These were renamed in VI to _U32. // FIXME: We should probably rename the opcodes here. - unsigned Opc = N->getOpcode() == ISD::UADDO ? - AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; + bool IsAdd = N->getOpcode() == ISD::UADDO; + bool IsVALU = N->isDivergent(); + + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; + ++UI) + if (UI.getUse().getResNo() == 1) { + if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) || + (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) { + IsVALU = true; + break; + } + } - CurDAG->SelectNodeTo( - N, Opc, N->getVTList(), - {N->getOperand(0), N->getOperand(1), - CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); + if (IsVALU) { + unsigned Opc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; + + CurDAG->SelectNodeTo( + N, Opc, N->getVTList(), + {N->getOperand(0), N->getOperand(1), + CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); + } else { + unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO + : AMDGPU::S_USUBO_PSEUDO; + + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), + {N->getOperand(0), N->getOperand(1)}); + } } void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td index dfb13767bfe2b..9f6ebd00cd97b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// def SGPRRegBank : RegisterBank<"SGPR", - [SGPR_LO16, SReg_32, SReg_64, SReg_128, SReg_160, SReg_192, SReg_256, SReg_512, SReg_1024] + [SReg_LO16, SReg_32, SReg_64, SReg_128, SReg_160, SReg_192, SReg_256, SReg_512, SReg_1024] >; def VGPRRegBank : RegisterBank<"VGPR", @@ -18,5 +18,5 @@ def VGPRRegBank : RegisterBank<"VGPR", def VCCRegBank : RegisterBank <"VCC", [SReg_1]>; def AGPRRegBank : RegisterBank <"AGPR", - [AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_256, AReg_512, AReg_1024] + [AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_256, AReg_512, AReg_1024] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index e71721bd27221..3db52afff8611 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -916,6 +916,12 @@ bool GCNPassConfig::addInstSelector() { AMDGPUPassConfig::addInstSelector(); addPass(&SIFixSGPRCopiesID); addPass(createSILowerI1CopiesPass()); + // TODO: We have to add FinalizeISel + // to expand V_ADD/SUB_U64_PSEUDO before SIFixupVectorISel + // that expects V_ADD/SUB -> A_ADDC/SUBB pairs expanded. + // Will be removed as soon as SIFixupVectorISel is changed + // to work with V_ADD/SUB_U64_PSEUDO instead. + addPass(&FinalizeISelID); addPass(createSIFixupVectorISelPass()); addPass(createSIAddIMGInitPass()); return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 402f55722ac1e..645b0f55315c1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -422,6 +422,7 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, } int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -430,7 +431,8 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, const Instruction *CxtI) { EVT OrigTy = TLI->getValueType(DL, Ty); if (!OrigTy.isSimple()) { - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); } @@ -542,7 +544,8 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, break; } - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); } @@ -562,13 +565,15 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { template int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) { + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I) { if (!intrinsicHasPackedVectorBenefit(ID)) - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); EVT OrigTy = TLI->getValueType(DL, RetTy); if (!OrigTy.isSimple()) { - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } // Legalize the type. @@ -597,31 +602,36 @@ int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) { - return getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I) { + return getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { return getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } -unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) { +unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) { // XXX - For some reason this isn't called for switch. switch (Opcode) { case Instruction::Br: case Instruction::Ret: return 10; default: - return BaseT::getCFInstrCost(Opcode); + return BaseT::getCFInstrCost(Opcode, CostKind); } } int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwise) { + bool IsPairwise, + TTI::TargetCostKind CostKind) { EVT OrigTy = TLI->getValueType(DL, Ty); // Computes cost on targets that have packed math instructions(which support @@ -629,15 +639,15 @@ int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, if (IsPairwise || !ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16) - return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise); + return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise, CostKind); std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); return LT.first * getFullRateInstrCost(); } int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwise, - bool IsUnsigned) { + bool IsPairwise, bool IsUnsigned, + TTI::TargetCostKind CostKind) { EVT OrigTy = TLI->getValueType(DL, Ty); // Computes cost on targets that have packed math instructions(which support @@ -645,7 +655,8 @@ int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, if (IsPairwise || !ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16) - return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned); + return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned, + CostKind); std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); return LT.first * getHalfRateInstrCost(); @@ -782,7 +793,7 @@ bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const { // Assume all function calls are a source of divergence. if (const CallInst *CI = dyn_cast(V)) { - if (isa(CI->getCalledValue())) + if (CI->isInlineAsm()) return isInlineAsmSourceOfDivergence(CI); return true; } @@ -810,7 +821,7 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const { } if (const CallInst *CI = dyn_cast(V)) { - if (isa(CI->getCalledValue())) + if (CI->isInlineAsm()) return !isInlineAsmSourceOfDivergence(CI); return false; } @@ -838,7 +849,7 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const { // If we have inline asm returning mixed SGPR and VGPR results, we inferred // divergent for the overall struct return. We need to override it in the // case we're extracting an SGPR component here. - if (isa(CI->getCalledValue())) + if (CI->isInlineAsm()) return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices()); return false; @@ -949,11 +960,12 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, CommonTTI.getUnrollingPreferences(L, SE, UP); } -unsigned GCNTTIImpl::getUserCost(const User *U, - ArrayRef Operands) { +unsigned +GCNTTIImpl::getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind) { const Instruction *I = dyn_cast(U); if (!I) - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); // Estimate different operations to be optimized out switch (I->getOpcode()) { @@ -978,9 +990,9 @@ unsigned GCNTTIImpl::getUserCost(const User *U, if (auto *FPMO = dyn_cast(II)) FMF = FPMO->getFastMathFlags(); return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, - FMF, 1, II); + FMF, 1, CostKind, II); } else { - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); } } case Instruction::ShuffleVector: { @@ -994,7 +1006,7 @@ unsigned GCNTTIImpl::getUserCost(const User *U, return getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty); if (Shuffle->changesLength()) - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); if (Shuffle->isIdentity()) return 0; @@ -1030,7 +1042,7 @@ unsigned GCNTTIImpl::getUserCost(const User *U, case Instruction::BitCast: case Instruction::AddrSpaceCast: { return getCastInstrCost(I->getOpcode(), I->getType(), - I->getOperand(0)->getType(), I); + I->getOperand(0)->getType(), CostKind, I); } case Instruction::Add: case Instruction::FAdd: @@ -1051,7 +1063,7 @@ unsigned GCNTTIImpl::getUserCost(const User *U, case Instruction::Or: case Instruction::Xor: case Instruction::FNeg: { - return getArithmeticInstrCost(I->getOpcode(), I->getType(), + return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind, TTI::OK_AnyValue, TTI::OK_AnyValue, TTI::OP_None, TTI::OP_None, Operands, I); } @@ -1059,7 +1071,7 @@ unsigned GCNTTIImpl::getUserCost(const User *U, break; } - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); } unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { @@ -1126,14 +1138,15 @@ unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) { return 8; } -unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) { +unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) { // XXX - For some reason this isn't called for switch. switch (Opcode) { case Instruction::Br: case Instruction::Ret: return 10; default: - return BaseT::getCFInstrCost(Opcode); + return BaseT::getCFInstrCost(Opcode, CostKind); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index a009837b66ee4..5553bf8c80559 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -184,6 +184,7 @@ class GCNTTIImpl final : public BasicTTIImplBase { int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -191,7 +192,7 @@ class GCNTTIImpl final : public BasicTTIImplBase { ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); - unsigned getCFInstrCost(unsigned Opcode); + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef Indices = {}) const; @@ -225,24 +226,32 @@ class GCNTTIImpl final : public BasicTTIImplBase { int getInlinerVectorBonusPercent() { return 0; } - int getArithmeticReductionCost(unsigned Opcode, - VectorType *Ty, - bool IsPairwise); + int getArithmeticReductionCost( + unsigned Opcode, + VectorType *Ty, + bool IsPairwise, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); + template int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - const Instruction *I = nullptr); - int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, const Instruction *I = nullptr); - int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, - bool IsUnsigned); - unsigned getUserCost(const User *U, ArrayRef Operands); + int getIntrinsicInstrCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); + int getIntrinsicInstrCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, + unsigned VF = 1, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); + int getMinMaxReductionCost( + VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); + + unsigned getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind); }; class R600TTIImpl final : public BasicTTIImplBase { @@ -281,7 +290,7 @@ class R600TTIImpl final : public BasicTTIImplBase { unsigned Alignment, unsigned AddrSpace) const; unsigned getMaxInterleaveFactor(unsigned VF); - unsigned getCFInstrCost(unsigned Opcode); + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); }; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 3ef5a77af45e2..02a44f9292176 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -486,6 +486,14 @@ void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) { addRegsToSet(TRI, MI.uses(), ClauseUses); } +static bool breaksSMEMSoftClause(MachineInstr *MI) { + return !SIInstrInfo::isSMRD(*MI); +} + +static bool breaksVMEMSoftClause(MachineInstr *MI) { + return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI); +} + int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { // SMEM soft clause are only present on VI+, and only matter if xnack is // enabled. @@ -512,7 +520,7 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { if (!MI) break; - if (IsSMRD != SIInstrInfo::isSMRD(*MI)) + if (IsSMRD ? breaksSMEMSoftClause(MI) : breaksVMEMSoftClause(MI)) break; addClauseInst(*MI); diff --git a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp index b1c64ed2e896b..e4017be41e727 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -280,7 +280,9 @@ unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); unsigned Size = TRI->getRegSizeInBits(*RC); - if (Size > 32) + if (Size == 16) + Reg = TRI->get32BitRegister(Reg); + else if (Size > 32) Reg = TRI->getSubReg(Reg, AMDGPU::sub0); if (TRI->hasVGPRs(RC)) { @@ -306,9 +308,16 @@ uint32_t GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg, } const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - unsigned Size = TRI->getRegSizeInBits(*RC) / 32; - if (Size > 1) - Reg = TRI->getSubReg(Reg, AMDGPU::sub0); + unsigned Size = TRI->getRegSizeInBits(*RC); + + if (Size == 16) { + Reg = TRI->get32BitRegister(Reg); + Size = 1; + } else { + Size /= 32; + if (Size > 1) + Reg = TRI->getSubReg(Reg, AMDGPU::sub0); + } if (TRI->hasVGPRs(RC)) { // VGPRs have 4 banks assigned in a round-robin fashion. @@ -440,10 +449,19 @@ bool GCNRegBankReassign::isReassignable(unsigned Reg) const { } const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg); + unsigned Size = TRI->getRegSizeInBits(*RC); + + // TODO: Support 16 bit registers. Those needs to be moved with their + // parent VGPR_32 and potentially a sibling 16 bit sub-register. + if (Size < 32) + return false; + if (TRI->hasVGPRs(RC)) return true; - unsigned Size = TRI->getRegSizeInBits(*RC); + if (Size == 16) + return AMDGPU::SGPR_LO16RegClass.contains(PhysReg); + if (Size > 32) PhysReg = TRI->getSubReg(PhysReg, AMDGPU::sub0); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 362952fe813fd..a699cd31e27fc 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -393,9 +393,9 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( // AMDGPUTargetELFStreamer //===----------------------------------------------------------------------===// -AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( - MCStreamer &S, const MCSubtargetInfo &STI) - : AMDGPUTargetStreamer(S), Streamer(S) { +AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) + : AMDGPUTargetStreamer(S), Streamer(S), Os(STI.getTargetTriple().getOS()) { MCAssembler &MCA = getStreamer().getAssembler(); unsigned EFlags = MCA.getELFHeaderEFlags(); @@ -438,9 +438,15 @@ void AMDGPUTargetELFStreamer::EmitNote( auto NameSZ = Name.size() + 1; + unsigned NoteFlags = 0; + // TODO Apparently, this is currently needed for OpenCL as mentioned in + // https://reviews.llvm.org/D74995 + if (Os == Triple::AMDHSA) + NoteFlags = ELF::SHF_ALLOC; + S.PushSection(); - S.SwitchSection(Context.getELFSection( - ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); + S.SwitchSection( + Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags)); S.emitInt32(NameSZ); // namesz S.emitValue(DescSZ, 4); // descz S.emitInt32(NoteType); // type diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 683b3e363b9aa..5f89da365de05 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -133,6 +133,7 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { MCStreamer &Streamer; + Triple::OSType Os; void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType, function_ref EmitDesc); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 4461d2be13c80..c21b3d768b317 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -423,7 +423,8 @@ SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo, MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) || - MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg)) + MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) || + MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg)) Enc |= 512; return Enc; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 69cc96292597d..467c59c906917 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3602,6 +3602,26 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( } switch (MI.getOpcode()) { + case AMDGPU::S_UADDO_PSEUDO: + case AMDGPU::S_USUBO_PSEUDO: { + const DebugLoc &DL = MI.getDebugLoc(); + MachineOperand &Dest0 = MI.getOperand(0); + MachineOperand &Dest1 = MI.getOperand(1); + MachineOperand &Src0 = MI.getOperand(2); + MachineOperand &Src1 = MI.getOperand(3); + + unsigned Opc = (MI.getOpcode() == AMDGPU::S_UADDO_PSEUDO) + ? AMDGPU::S_ADD_I32 + : AMDGPU::S_SUB_I32; + BuildMI(*BB, MI, DL, TII->get(Opc), Dest0.getReg()).add(Src0).add(Src1); + + BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CSELECT_B64), Dest1.getReg()) + .addImm(1) + .addImm(0); + + MI.eraseFromParent(); + return BB; + } case AMDGPU::S_ADD_U64_PSEUDO: case AMDGPU::S_SUB_U64_PSEUDO: { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); @@ -3617,35 +3637,146 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(MI, MRI, - Src0, BoolRC, AMDGPU::sub0, - &AMDGPU::SReg_32RegClass); - MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(MI, MRI, - Src0, BoolRC, AMDGPU::sub1, - &AMDGPU::SReg_32RegClass); + MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm( + MI, MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass); + MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm( + MI, MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass); - MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(MI, MRI, - Src1, BoolRC, AMDGPU::sub0, - &AMDGPU::SReg_32RegClass); - MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(MI, MRI, - Src1, BoolRC, AMDGPU::sub1, - &AMDGPU::SReg_32RegClass); + MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm( + MI, MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass); + MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm( + MI, MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass); bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO); unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; - BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0) - .add(Src0Sub0) - .add(Src1Sub0); - BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1) - .add(Src0Sub1) - .add(Src1Sub1); + BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0).add(Src0Sub0).add(Src1Sub0); + BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1).add(Src0Sub1).add(Src1Sub1); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg()) - .addReg(DestSub0) - .addImm(AMDGPU::sub0) - .addReg(DestSub1) - .addImm(AMDGPU::sub1); + .addReg(DestSub0) + .addImm(AMDGPU::sub0) + .addReg(DestSub1) + .addImm(AMDGPU::sub1); + MI.eraseFromParent(); + return BB; + } + case AMDGPU::V_ADD_U64_PSEUDO: + case AMDGPU::V_SUB_U64_PSEUDO: { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const GCNSubtarget &ST = MF->getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const DebugLoc &DL = MI.getDebugLoc(); + + bool IsAdd = (MI.getOpcode() == AMDGPU::V_ADD_U64_PSEUDO); + + const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); + + Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + Register CarryReg = MRI.createVirtualRegister(CarryRC); + Register DeadCarryReg = MRI.createVirtualRegister(CarryRC); + + MachineOperand &Dest = MI.getOperand(0); + MachineOperand &Src0 = MI.getOperand(1); + MachineOperand &Src1 = MI.getOperand(2); + + const TargetRegisterClass *Src0RC = Src0.isReg() + ? MRI.getRegClass(Src0.getReg()) + : &AMDGPU::VReg_64RegClass; + const TargetRegisterClass *Src1RC = Src1.isReg() + ? MRI.getRegClass(Src1.getReg()) + : &AMDGPU::VReg_64RegClass; + + const TargetRegisterClass *Src0SubRC = + TRI->getSubRegClass(Src0RC, AMDGPU::sub0); + const TargetRegisterClass *Src1SubRC = + TRI->getSubRegClass(Src1RC, AMDGPU::sub1); + + MachineOperand SrcReg0Sub0 = TII->buildExtractSubRegOrImm( + MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); + MachineOperand SrcReg1Sub0 = TII->buildExtractSubRegOrImm( + MI, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC); + + MachineOperand SrcReg0Sub1 = TII->buildExtractSubRegOrImm( + MI, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); + MachineOperand SrcReg1Sub1 = TII->buildExtractSubRegOrImm( + MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); + + unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; + MachineInstr *LoHalf = BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0) + .addReg(CarryReg, RegState::Define) + .add(SrcReg0Sub0) + .add(SrcReg1Sub0) + .addImm(0); // clamp bit + + unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64; + MachineInstr *HiHalf = + BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1) + .addReg(DeadCarryReg, RegState::Define | RegState::Dead) + .add(SrcReg0Sub1) + .add(SrcReg1Sub1) + .addReg(CarryReg, RegState::Kill) + .addImm(0); // clamp bit + + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg()) + .addReg(DestSub0) + .addImm(AMDGPU::sub0) + .addReg(DestSub1) + .addImm(AMDGPU::sub1); + TII->legalizeOperands(*LoHalf); + TII->legalizeOperands(*HiHalf); + MI.eraseFromParent(); + return BB; + } + case AMDGPU::S_ADD_CO_PSEUDO: + case AMDGPU::S_SUB_CO_PSEUDO: { + // This pseudo has a chance to be selected + // only from uniform add/subcarry node. All the VGPR operands + // therefore assumed to be splat vectors. + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const GCNSubtarget &ST = MF->getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + MachineBasicBlock::iterator MII = MI; + const DebugLoc &DL = MI.getDebugLoc(); + MachineOperand &Dest = MI.getOperand(0); + MachineOperand &Src0 = MI.getOperand(2); + MachineOperand &Src1 = MI.getOperand(3); + MachineOperand &Src2 = MI.getOperand(4); + unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) + ? AMDGPU::S_ADDC_U32 + : AMDGPU::S_SUBB_U32; + if (Src0.isReg() && TRI->isVectorRegister(MRI, Src0.getReg())) { + Register RegOp0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0) + .addReg(Src0.getReg()); + Src0.setReg(RegOp0); + } + if (Src1.isReg() && TRI->isVectorRegister(MRI, Src1.getReg())) { + Register RegOp1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp1) + .addReg(Src1.getReg()); + Src1.setReg(RegOp1); + } + Register RegOp2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + if (TRI->isVectorRegister(MRI, Src2.getReg())) { + BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp2) + .addReg(Src2.getReg()); + Src2.setReg(RegOp2); + } + + if (TRI->getRegSizeInBits(*MRI.getRegClass(Src2.getReg())) == 64) { + BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64)) + .addReg(Src2.getReg()) + .addImm(0); + } else { + BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMPK_LG_U32)) + .addReg(Src2.getReg()) + .addImm(0); + } + + BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1); MI.eraseFromParent(); return BB; } @@ -11009,7 +11140,7 @@ static bool hasCFUser(const Value *V, SmallPtrSet &Visited, bool SITargetLowering::requiresUniformRegister(MachineFunction &MF, const Value *V) const { if (const CallInst *CI = dyn_cast(V)) { - if (isa(CI->getCalledValue())) { + if (CI->isInlineAsm()) { // FIXME: This cannot give a correct answer. This should only trigger in // the case where inline asm returns mixed SGPR and VGPR results, used // outside the defining block. We don't have a specific result to diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 5cf4909bd9b7e..c115d26fa6a34 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -32,6 +32,7 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -57,7 +58,6 @@ #include #include #include -#include using namespace llvm; @@ -109,15 +109,13 @@ iterator_range> inst_counter_types() { enum_iterator(NUM_INST_CNTS)); } -using RegInterval = std::pair; +using RegInterval = std::pair; struct { - uint32_t VmcntMax; - uint32_t ExpcntMax; - uint32_t LgkmcntMax; - uint32_t VscntMax; - int32_t NumVGPRsMax; - int32_t NumSGPRsMax; + unsigned VmcntMax; + unsigned ExpcntMax; + unsigned LgkmcntMax; + unsigned VscntMax; } HardwareLimits; struct { @@ -143,7 +141,7 @@ enum WaitEventType { NUM_WAIT_EVENTS, }; -static const uint32_t WaitEventMaskForInst[NUM_INST_CNTS] = { +static const unsigned WaitEventMaskForInst[NUM_INST_CNTS] = { (1 << VMEM_ACCESS) | (1 << VMEM_READ_ACCESS), (1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) | (1 << SQ_MESSAGE), @@ -166,6 +164,28 @@ enum RegisterMapping { NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts. }; +// Enumerate different types of result-returning VMEM operations. Although +// s_waitcnt orders them all with a single vmcnt counter, in the absence of +// s_waitcnt only instructions of the same VmemType are guaranteed to write +// their results in order -- so there is no need to insert an s_waitcnt between +// two instructions of the same type that write the same vgpr. +enum VmemType { + // BUF instructions and MIMG instructions without a sampler. + VMEM_NOSAMPLER, + // MIMG instructions with a sampler. + VMEM_SAMPLER, +}; + +VmemType getVmemType(const MachineInstr &Inst) { + assert(SIInstrInfo::isVMEM(Inst)); + if (!SIInstrInfo::isMIMG(Inst)) + return VMEM_NOSAMPLER; + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode()); + return AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler + ? VMEM_SAMPLER + : VMEM_NOSAMPLER; +} + void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) { switch (T) { case VM_CNT: @@ -195,12 +215,9 @@ void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) { // "s_waitcnt 0" before use. class WaitcntBrackets { public: - WaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) { - for (auto T : inst_counter_types()) - memset(VgprScores[T], 0, sizeof(VgprScores[T])); - } + WaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) {} - static uint32_t getWaitCountMax(InstCounterType T) { + static unsigned getWaitCountMax(InstCounterType T) { switch (T) { case VM_CNT: return HardwareLimits.VmcntMax; @@ -216,17 +233,13 @@ class WaitcntBrackets { return 0; } - uint32_t getScoreLB(InstCounterType T) const { + unsigned getScoreLB(InstCounterType T) const { assert(T < NUM_INST_CNTS); - if (T >= NUM_INST_CNTS) - return 0; return ScoreLBs[T]; } - uint32_t getScoreUB(InstCounterType T) const { + unsigned getScoreUB(InstCounterType T) const { assert(T < NUM_INST_CNTS); - if (T >= NUM_INST_CNTS) - return 0; return ScoreUBs[T]; } @@ -242,7 +255,7 @@ class WaitcntBrackets { return EXP_CNT; } - uint32_t getRegScore(int GprNo, InstCounterType T) { + unsigned getRegScore(int GprNo, InstCounterType T) { if (GprNo < NUM_ALL_VGPRS) { return VgprScores[T][GprNo]; } @@ -250,30 +263,16 @@ class WaitcntBrackets { return SgprScores[GprNo - NUM_ALL_VGPRS]; } - void clear() { - memset(ScoreLBs, 0, sizeof(ScoreLBs)); - memset(ScoreUBs, 0, sizeof(ScoreUBs)); - PendingEvents = 0; - memset(MixedPendingEvents, 0, sizeof(MixedPendingEvents)); - for (auto T : inst_counter_types()) - memset(VgprScores[T], 0, sizeof(VgprScores[T])); - memset(SgprScores, 0, sizeof(SgprScores)); - } - bool merge(const WaitcntBrackets &Other); RegInterval getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII, const MachineRegisterInfo *MRI, - const SIRegisterInfo *TRI, unsigned OpNo, - bool Def) const; - - int32_t getMaxVGPR() const { return VgprUB; } - int32_t getMaxSGPR() const { return SgprUB; } + const SIRegisterInfo *TRI, unsigned OpNo) const; bool counterOutOfOrder(InstCounterType T) const; bool simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const; bool simplifyWaitcnt(InstCounterType T, unsigned &Count) const; - void determineWait(InstCounterType T, uint32_t ScoreToWait, + void determineWait(InstCounterType T, unsigned ScoreToWait, AMDGPU::Waitcnt &Wait) const; void applyWaitcnt(const AMDGPU::Waitcnt &Wait); void applyWaitcnt(InstCounterType T, unsigned Count); @@ -286,6 +285,12 @@ class WaitcntBrackets { return PendingEvents & (1 << E); } + bool hasMixedPendingEvents(InstCounterType T) const { + unsigned Events = PendingEvents & WaitEventMaskForInst[T]; + // Return true if more than one bit is set in Events. + return Events & (Events - 1); + } + bool hasPendingFlat() const { return ((LastFlat[LGKM_CNT] > ScoreLBs[LGKM_CNT] && LastFlat[LGKM_CNT] <= ScoreUBs[LGKM_CNT]) || @@ -298,71 +303,77 @@ class WaitcntBrackets { LastFlat[LGKM_CNT] = ScoreUBs[LGKM_CNT]; } + // Return true if there might be pending writes to the specified vgpr by VMEM + // instructions with types different from V. + bool hasOtherPendingVmemTypes(int GprNo, VmemType V) const { + assert(GprNo < NUM_ALL_VGPRS); + return VgprVmemTypes[GprNo] & ~(1 << V); + } + + void clearVgprVmemTypes(int GprNo) { + assert(GprNo < NUM_ALL_VGPRS); + VgprVmemTypes[GprNo] = 0; + } + void print(raw_ostream &); void dump() { print(dbgs()); } private: struct MergeInfo { - uint32_t OldLB; - uint32_t OtherLB; - uint32_t MyShift; - uint32_t OtherShift; + unsigned OldLB; + unsigned OtherLB; + unsigned MyShift; + unsigned OtherShift; }; - static bool mergeScore(const MergeInfo &M, uint32_t &Score, - uint32_t OtherScore); + static bool mergeScore(const MergeInfo &M, unsigned &Score, + unsigned OtherScore); - void setScoreLB(InstCounterType T, uint32_t Val) { + void setScoreLB(InstCounterType T, unsigned Val) { assert(T < NUM_INST_CNTS); - if (T >= NUM_INST_CNTS) - return; ScoreLBs[T] = Val; } - void setScoreUB(InstCounterType T, uint32_t Val) { + void setScoreUB(InstCounterType T, unsigned Val) { assert(T < NUM_INST_CNTS); - if (T >= NUM_INST_CNTS) - return; ScoreUBs[T] = Val; if (T == EXP_CNT) { - uint32_t UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT); + unsigned UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT); if (ScoreLBs[T] < UB && UB < ScoreUBs[T]) ScoreLBs[T] = UB; } } - void setRegScore(int GprNo, InstCounterType T, uint32_t Val) { + void setRegScore(int GprNo, InstCounterType T, unsigned Val) { if (GprNo < NUM_ALL_VGPRS) { - if (GprNo > VgprUB) { - VgprUB = GprNo; - } + VgprUB = std::max(VgprUB, GprNo); VgprScores[T][GprNo] = Val; } else { assert(T == LGKM_CNT); - if (GprNo - NUM_ALL_VGPRS > SgprUB) { - SgprUB = GprNo - NUM_ALL_VGPRS; - } + SgprUB = std::max(SgprUB, GprNo - NUM_ALL_VGPRS); SgprScores[GprNo - NUM_ALL_VGPRS] = Val; } } void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII, const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI, - unsigned OpNo, uint32_t Val); + unsigned OpNo, unsigned Val); const GCNSubtarget *ST = nullptr; - uint32_t ScoreLBs[NUM_INST_CNTS] = {0}; - uint32_t ScoreUBs[NUM_INST_CNTS] = {0}; - uint32_t PendingEvents = 0; - bool MixedPendingEvents[NUM_INST_CNTS] = {false}; + unsigned ScoreLBs[NUM_INST_CNTS] = {0}; + unsigned ScoreUBs[NUM_INST_CNTS] = {0}; + unsigned PendingEvents = 0; // Remember the last flat memory operation. - uint32_t LastFlat[NUM_INST_CNTS] = {0}; + unsigned LastFlat[NUM_INST_CNTS] = {0}; // wait_cnt scores for every vgpr. // Keep track of the VgprUB and SgprUB to make merge at join efficient. - int32_t VgprUB = 0; - int32_t SgprUB = 0; - uint32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS]; + int VgprUB = -1; + int SgprUB = -1; + unsigned VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS] = {{0}}; // Wait cnt scores for every sgpr, only lgkmcnt is relevant. - uint32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0}; + unsigned SgprScores[SQ_MAX_PGM_SGPRS] = {0}; + // Bitmask of the VmemTypes of VMEM instructions that might have a pending + // write to each vgpr. + unsigned char VgprVmemTypes[NUM_ALL_VGPRS] = {0}; }; class SIInsertWaitcnts : public MachineFunctionPass { @@ -385,8 +396,7 @@ class SIInsertWaitcnts : public MachineFunctionPass { explicit BlockInfo(MachineBasicBlock *MBB) : MBB(MBB) {} }; - std::vector BlockInfos; // by reverse post-order traversal index - DenseMap RpotIdxMap; + MapVector BlockInfos; // ForceEmitZeroWaitcnts: force all waitcnts insts to be s_waitcnt 0 // because of amdgpu-waitcnt-forcezero flag @@ -464,10 +474,10 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII, const MachineRegisterInfo *MRI, const SIRegisterInfo *TRI, - unsigned OpNo, bool Def) const { + unsigned OpNo) const { const MachineOperand &Op = MI->getOperand(OpNo); - if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()) || - (Def && !Op.isDef()) || TRI->isAGPR(*MRI, Op.getReg())) + assert(Op.isReg()); + if (!TRI->isInAllocatableClass(Op.getReg()) || TRI->isAGPR(*MRI, Op.getReg())) return {-1, -1}; // A use via a PW operand does not need a waitcnt. @@ -475,27 +485,25 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI, assert(!Op.getSubReg() || !Op.isUndef()); RegInterval Result; - const MachineRegisterInfo &MRIA = *MRI; unsigned Reg = TRI->getEncodingValue(Op.getReg()); - if (TRI->isVGPR(MRIA, Op.getReg())) { + if (TRI->isVGPR(*MRI, Op.getReg())) { assert(Reg >= RegisterEncoding.VGPR0 && Reg <= RegisterEncoding.VGPRL); Result.first = Reg - RegisterEncoding.VGPR0; assert(Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS); - } else if (TRI->isSGPRReg(MRIA, Op.getReg())) { + } else if (TRI->isSGPRReg(*MRI, Op.getReg())) { assert(Reg >= RegisterEncoding.SGPR0 && Reg < SQ_MAX_PGM_SGPRS); Result.first = Reg - RegisterEncoding.SGPR0 + NUM_ALL_VGPRS; assert(Result.first >= NUM_ALL_VGPRS && Result.first < SQ_MAX_PGM_SGPRS + NUM_ALL_VGPRS); } // TODO: Handle TTMP - // else if (TRI->isTTMP(MRIA, Reg.getReg())) ... + // else if (TRI->isTTMP(*MRI, Reg.getReg())) ... else return {-1, -1}; - const MachineInstr &MIA = *MI; - const TargetRegisterClass *RC = TII->getOpRegClass(MIA, OpNo); + const TargetRegisterClass *RC = TII->getOpRegClass(*MI, OpNo); unsigned Size = TRI->getRegSizeInBits(*RC); Result.second = Result.first + (Size / 32); @@ -506,13 +514,10 @@ void WaitcntBrackets::setExpScore(const MachineInstr *MI, const SIInstrInfo *TII, const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI, unsigned OpNo, - uint32_t Val) { - RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false); - LLVM_DEBUG({ - const MachineOperand &Opnd = MI->getOperand(OpNo); - assert(TRI->isVGPR(*MRI, Opnd.getReg())); - }); - for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { + unsigned Val) { + RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo); + assert(TRI->isVGPR(*MRI, MI->getOperand(OpNo).getReg())); + for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { setRegScore(RegNo, EXP_CNT, Val); } } @@ -521,19 +526,14 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI, WaitEventType E, MachineInstr &Inst) { - const MachineRegisterInfo &MRIA = *MRI; InstCounterType T = eventCounter(E); - uint32_t CurrScore = getScoreUB(T) + 1; + unsigned CurrScore = getScoreUB(T) + 1; if (CurrScore == 0) report_fatal_error("InsertWaitcnt score wraparound"); // PendingEvents and ScoreUB need to be update regardless if this event // changes the score of a register or not. // Examples including vm_cnt when buffer-store or lgkm_cnt when send-message. - if (!hasPendingEvent(E)) { - if (PendingEvents & WaitEventMaskForInst[T]) - MixedPendingEvents[T] = true; - PendingEvents |= 1 << E; - } + PendingEvents |= 1 << E; setScoreUB(T, CurrScore); if (T == EXP_CNT) { @@ -574,7 +574,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) { for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { const MachineOperand &Op = Inst.getOperand(I); - if (Op.isReg() && !Op.isDef() && TRI->isVGPR(MRIA, Op.getReg())) { + if (Op.isReg() && !Op.isDef() && TRI->isVGPR(*MRI, Op.getReg())) { setExpScore(&Inst, TII, TRI, MRI, I, CurrScore); } } @@ -622,7 +622,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { MachineOperand &DefMO = Inst.getOperand(I); if (DefMO.isReg() && DefMO.isDef() && - TRI->isVGPR(MRIA, DefMO.getReg())) { + TRI->isVGPR(*MRI, DefMO.getReg())) { setRegScore(TRI->getEncodingValue(DefMO.getReg()), EXP_CNT, CurrScore); } @@ -630,7 +630,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, } for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { MachineOperand &MO = Inst.getOperand(I); - if (MO.isReg() && !MO.isDef() && TRI->isVGPR(MRIA, MO.getReg())) { + if (MO.isReg() && !MO.isDef() && TRI->isVGPR(*MRI, MO.getReg())) { setExpScore(&Inst, TII, TRI, MRI, I, CurrScore); } } @@ -641,8 +641,8 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX4) { MachineOperand *MO = TII->getNamedOperand(Inst, AMDGPU::OpName::data); unsigned OpNo;//TODO: find the OpNo for this operand; - RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, OpNo, false); - for (signed RegNo = Interval.first; RegNo < Interval.second; + RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, OpNo); + for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { setRegScore(RegNo + NUM_ALL_VGPRS, t, CurrScore); } @@ -650,10 +650,20 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, } else { // Match the score to the destination registers. for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { - RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, I, true); - if (T == VM_CNT && Interval.first >= NUM_ALL_VGPRS) + auto &Op = Inst.getOperand(I); + if (!Op.isReg() || !Op.isDef()) continue; - for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { + RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, I); + if (T == VM_CNT) { + if (Interval.first >= NUM_ALL_VGPRS) + continue; + if (SIInstrInfo::isVMEM(Inst)) { + VmemType V = getVmemType(Inst); + for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) + VgprVmemTypes[RegNo] |= 1 << V; + } + } + for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { setRegScore(RegNo, T, CurrScore); } } @@ -666,8 +676,8 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, void WaitcntBrackets::print(raw_ostream &OS) { OS << '\n'; for (auto T : inst_counter_types()) { - uint32_t LB = getScoreLB(T); - uint32_t UB = getScoreUB(T); + unsigned LB = getScoreLB(T); + unsigned UB = getScoreUB(T); switch (T) { case VM_CNT: @@ -689,11 +699,11 @@ void WaitcntBrackets::print(raw_ostream &OS) { if (LB < UB) { // Print vgpr scores. - for (int J = 0; J <= getMaxVGPR(); J++) { - uint32_t RegScore = getRegScore(J, T); + for (int J = 0; J <= VgprUB; J++) { + unsigned RegScore = getRegScore(J, T); if (RegScore <= LB) continue; - uint32_t RelScore = RegScore - LB - 1; + unsigned RelScore = RegScore - LB - 1; if (J < SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS) { OS << RelScore << ":v" << J << " "; } else { @@ -702,11 +712,11 @@ void WaitcntBrackets::print(raw_ostream &OS) { } // Also need to print sgpr scores for lgkm_cnt. if (T == LGKM_CNT) { - for (int J = 0; J <= getMaxSGPR(); J++) { - uint32_t RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT); + for (int J = 0; J <= SgprUB; J++) { + unsigned RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT); if (RegScore <= LB) continue; - uint32_t RelScore = RegScore - LB - 1; + unsigned RelScore = RegScore - LB - 1; OS << RelScore << ":s" << J << " "; } } @@ -727,8 +737,8 @@ bool WaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const { bool WaitcntBrackets::simplifyWaitcnt(InstCounterType T, unsigned &Count) const { - const uint32_t LB = getScoreLB(T); - const uint32_t UB = getScoreUB(T); + const unsigned LB = getScoreLB(T); + const unsigned UB = getScoreUB(T); if (Count < UB && UB - Count > LB) return true; @@ -736,12 +746,12 @@ bool WaitcntBrackets::simplifyWaitcnt(InstCounterType T, return false; } -void WaitcntBrackets::determineWait(InstCounterType T, uint32_t ScoreToWait, +void WaitcntBrackets::determineWait(InstCounterType T, unsigned ScoreToWait, AMDGPU::Waitcnt &Wait) const { // If the score of src_operand falls within the bracket, we need an // s_waitcnt instruction. - const uint32_t LB = getScoreLB(T); - const uint32_t UB = getScoreUB(T); + const unsigned LB = getScoreLB(T); + const unsigned UB = getScoreUB(T); if ((UB >= ScoreToWait) && (ScoreToWait > LB)) { if ((T == VM_CNT || T == LGKM_CNT) && hasPendingFlat() && @@ -758,7 +768,7 @@ void WaitcntBrackets::determineWait(InstCounterType T, uint32_t ScoreToWait, } else { // If a counter has been maxed out avoid overflow by waiting for // MAX(CounterType) - 1 instead. - uint32_t NeededWait = std::min(UB - ScoreToWait, getWaitCountMax(T) - 1); + unsigned NeededWait = std::min(UB - ScoreToWait, getWaitCountMax(T) - 1); addWait(Wait, T, NeededWait); } } @@ -772,7 +782,7 @@ void WaitcntBrackets::applyWaitcnt(const AMDGPU::Waitcnt &Wait) { } void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) { - const uint32_t UB = getScoreUB(T); + const unsigned UB = getScoreUB(T); if (Count >= UB) return; if (Count != 0) { @@ -781,7 +791,6 @@ void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) { setScoreLB(T, std::max(getScoreLB(T), UB - Count)); } else { setScoreLB(T, UB); - MixedPendingEvents[T] = false; PendingEvents &= ~WaitEventMaskForInst[T]; } } @@ -792,7 +801,7 @@ bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const { // Scalar memory read always can go out of order. if (T == LGKM_CNT && hasPendingEvent(SMEM_ACCESS)) return true; - return MixedPendingEvents[T]; + return hasMixedPendingEvents(T); } INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false, @@ -954,10 +963,10 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( int CallAddrOpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); - RegInterval CallAddrOpInterval = ScoreBrackets.getRegInterval( - &MI, TII, MRI, TRI, CallAddrOpIdx, false); + RegInterval CallAddrOpInterval = + ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, CallAddrOpIdx); - for (signed RegNo = CallAddrOpInterval.first; + for (int RegNo = CallAddrOpInterval.first; RegNo < CallAddrOpInterval.second; ++RegNo) ScoreBrackets.determineWait( LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait); @@ -965,10 +974,10 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( int RtnAddrOpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); if (RtnAddrOpIdx != -1) { - RegInterval RtnAddrOpInterval = ScoreBrackets.getRegInterval( - &MI, TII, MRI, TRI, RtnAddrOpIdx, false); + RegInterval RtnAddrOpInterval = + ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, RtnAddrOpIdx); - for (signed RegNo = RtnAddrOpInterval.first; + for (int RegNo = RtnAddrOpInterval.first; RegNo < RtnAddrOpInterval.second; ++RegNo) ScoreBrackets.determineWait( LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait); @@ -982,7 +991,19 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( // emitted. // If the source operand was defined by a load, add the s_waitcnt // instruction. + // + // Two cases are handled for destination operands: + // 1) If the destination operand was defined by a load, add the s_waitcnt + // instruction to guarantee the right WAW order. + // 2) If a destination operand that was used by a recent export/store ins, + // add s_waitcnt on exp_cnt to guarantee the WAR order. for (const MachineMemOperand *Memop : MI.memoperands()) { + const Value *Ptr = Memop->getValue(); + if (Memop->isStore() && SLoadAddresses.count(Ptr)) { + addWait(Wait, LGKM_CNT, 0); + if (PDT->dominates(MI.getParent(), SLoadAddresses.find(Ptr)->second)) + SLoadAddresses.erase(Ptr); + } unsigned AS = Memop->getAddrSpace(); if (AS != AMDGPUAS::LOCAL_ADDRESS) continue; @@ -990,67 +1011,41 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( // VM_CNT is only relevant to vgpr or LDS. ScoreBrackets.determineWait( VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait); - } - - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &Op = MI.getOperand(I); - const MachineRegisterInfo &MRIA = *MRI; - RegInterval Interval = - ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, false); - for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { - if (TRI->isVGPR(MRIA, Op.getReg())) { - // VM_CNT is only relevant to vgpr or LDS. - ScoreBrackets.determineWait( - VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait); - } - ScoreBrackets.determineWait( - LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait); - } - } - // End of for loop that looks at all source operands to decide vm_wait_cnt - // and lgk_wait_cnt. - - // Two cases are handled for destination operands: - // 1) If the destination operand was defined by a load, add the s_waitcnt - // instruction to guarantee the right WAW order. - // 2) If a destination operand that was used by a recent export/store ins, - // add s_waitcnt on exp_cnt to guarantee the WAR order. - if (MI.mayStore()) { - // FIXME: Should not be relying on memoperands. - for (const MachineMemOperand *Memop : MI.memoperands()) { - const Value *Ptr = Memop->getValue(); - if (SLoadAddresses.count(Ptr)) { - addWait(Wait, LGKM_CNT, 0); - if (PDT->dominates(MI.getParent(), - SLoadAddresses.find(Ptr)->second)) - SLoadAddresses.erase(Ptr); - } - unsigned AS = Memop->getAddrSpace(); - if (AS != AMDGPUAS::LOCAL_ADDRESS) - continue; - unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS; - ScoreBrackets.determineWait( - VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait); + if (Memop->isStore()) { ScoreBrackets.determineWait( EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait); } } + + // Loop over use and def operands. for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - MachineOperand &Def = MI.getOperand(I); - const MachineRegisterInfo &MRIA = *MRI; + MachineOperand &Op = MI.getOperand(I); + if (!Op.isReg()) + continue; RegInterval Interval = - ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, true); - for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { - if (TRI->isVGPR(MRIA, Def.getReg())) { - ScoreBrackets.determineWait( - VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait); - ScoreBrackets.determineWait( - EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait); + ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I); + for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { + if (TRI->isVGPR(*MRI, Op.getReg())) { + // RAW always needs an s_waitcnt. WAW needs an s_waitcnt unless the + // previous write and this write are the same type of VMEM + // instruction, in which case they're guaranteed to write their + // results in order anyway. + if (Op.isUse() || !SIInstrInfo::isVMEM(MI) || + ScoreBrackets.hasOtherPendingVmemTypes(RegNo, + getVmemType(MI))) { + ScoreBrackets.determineWait( + VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait); + ScoreBrackets.clearVgprVmemTypes(RegNo); + } + if (Op.isDef()) { + ScoreBrackets.determineWait( + EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait); + } } ScoreBrackets.determineWait( LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait); } - } // End of for loop that looks at all dest operands. + } } } @@ -1154,7 +1149,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( } LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n" - << "Old Instr: " << MI << '\n' + << "Old Instr: " << MI << "New Instr: " << *II << '\n'); if (!Wait.hasWait()) @@ -1171,7 +1166,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( Modified = true; LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n" - << "Old Instr: " << MI << '\n' + << "Old Instr: " << MI << "New Instr: " << *SWaitInst << '\n'); } @@ -1187,7 +1182,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( Modified = true; LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n" - << "Old Instr: " << MI << '\n' + << "Old Instr: " << MI << "New Instr: " << *SWaitInst << '\n'); } @@ -1303,10 +1298,10 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, } } -bool WaitcntBrackets::mergeScore(const MergeInfo &M, uint32_t &Score, - uint32_t OtherScore) { - uint32_t MyShifted = Score <= M.OldLB ? 0 : Score + M.MyShift; - uint32_t OtherShifted = +bool WaitcntBrackets::mergeScore(const MergeInfo &M, unsigned &Score, + unsigned OtherScore) { + unsigned MyShifted = Score <= M.OldLB ? 0 : Score + M.MyShift; + unsigned OtherShifted = OtherScore <= M.OtherLB ? 0 : OtherScore + M.OtherShift; Score = std::max(MyShifted, OtherShifted); return OtherShifted > MyShifted; @@ -1320,44 +1315,50 @@ bool WaitcntBrackets::mergeScore(const MergeInfo &M, uint32_t &Score, bool WaitcntBrackets::merge(const WaitcntBrackets &Other) { bool StrictDom = false; + VgprUB = std::max(VgprUB, Other.VgprUB); + SgprUB = std::max(SgprUB, Other.SgprUB); + for (auto T : inst_counter_types()) { // Merge event flags for this counter const bool OldOutOfOrder = counterOutOfOrder(T); - const uint32_t OldEvents = PendingEvents & WaitEventMaskForInst[T]; - const uint32_t OtherEvents = Other.PendingEvents & WaitEventMaskForInst[T]; + const unsigned OldEvents = PendingEvents & WaitEventMaskForInst[T]; + const unsigned OtherEvents = Other.PendingEvents & WaitEventMaskForInst[T]; if (OtherEvents & ~OldEvents) StrictDom = true; - if (Other.MixedPendingEvents[T] || - (OldEvents && OtherEvents && OldEvents != OtherEvents)) - MixedPendingEvents[T] = true; PendingEvents |= OtherEvents; // Merge scores for this counter - const uint32_t MyPending = ScoreUBs[T] - ScoreLBs[T]; - const uint32_t OtherPending = Other.ScoreUBs[T] - Other.ScoreLBs[T]; + const unsigned MyPending = ScoreUBs[T] - ScoreLBs[T]; + const unsigned OtherPending = Other.ScoreUBs[T] - Other.ScoreLBs[T]; + const unsigned NewUB = ScoreLBs[T] + std::max(MyPending, OtherPending); + if (NewUB < ScoreLBs[T]) + report_fatal_error("waitcnt score overflow"); + MergeInfo M; M.OldLB = ScoreLBs[T]; M.OtherLB = Other.ScoreLBs[T]; - M.MyShift = OtherPending > MyPending ? OtherPending - MyPending : 0; - M.OtherShift = ScoreUBs[T] - Other.ScoreUBs[T] + M.MyShift; + M.MyShift = NewUB - ScoreUBs[T]; + M.OtherShift = NewUB - Other.ScoreUBs[T]; - const uint32_t NewUB = ScoreUBs[T] + M.MyShift; - if (NewUB < ScoreUBs[T]) - report_fatal_error("waitcnt score overflow"); ScoreUBs[T] = NewUB; - ScoreLBs[T] = std::min(M.OldLB + M.MyShift, M.OtherLB + M.OtherShift); StrictDom |= mergeScore(M, LastFlat[T], Other.LastFlat[T]); bool RegStrictDom = false; - for (int J = 0, E = std::max(getMaxVGPR(), Other.getMaxVGPR()) + 1; J != E; - J++) { + for (int J = 0; J <= VgprUB; J++) { RegStrictDom |= mergeScore(M, VgprScores[T][J], Other.VgprScores[T][J]); } + if (T == VM_CNT) { + for (int J = 0; J <= VgprUB; J++) { + unsigned char NewVmemTypes = VgprVmemTypes[J] | Other.VgprVmemTypes[J]; + RegStrictDom |= NewVmemTypes != VgprVmemTypes[J]; + VgprVmemTypes[J] = NewVmemTypes; + } + } + if (T == LGKM_CNT) { - for (int J = 0, E = std::max(getMaxSGPR(), Other.getMaxSGPR()) + 1; - J != E; J++) { + for (int J = 0; J <= SgprUB; J++) { RegStrictDom |= mergeScore(M, SgprScores[J], Other.SgprScores[J]); } } @@ -1366,9 +1367,6 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) { StrictDom = true; } - VgprUB = std::max(getMaxVGPR(), Other.getMaxVGPR()); - SgprUB = std::max(getMaxSGPR(), Other.getMaxSGPR()); - return StrictDom; } @@ -1507,29 +1505,23 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { HardwareLimits.LgkmcntMax = AMDGPU::getLgkmcntBitMask(IV); HardwareLimits.VscntMax = ST->hasVscnt() ? 63 : 0; - HardwareLimits.NumVGPRsMax = ST->getAddressableNumVGPRs(); - HardwareLimits.NumSGPRsMax = ST->getAddressableNumSGPRs(); - assert(HardwareLimits.NumVGPRsMax <= SQ_MAX_PGM_VGPRS); - assert(HardwareLimits.NumSGPRsMax <= SQ_MAX_PGM_SGPRS); + unsigned NumVGPRsMax = ST->getAddressableNumVGPRs(); + unsigned NumSGPRsMax = ST->getAddressableNumSGPRs(); + assert(NumVGPRsMax <= SQ_MAX_PGM_VGPRS); + assert(NumSGPRsMax <= SQ_MAX_PGM_SGPRS); RegisterEncoding.VGPR0 = TRI->getEncodingValue(AMDGPU::VGPR0); - RegisterEncoding.VGPRL = - RegisterEncoding.VGPR0 + HardwareLimits.NumVGPRsMax - 1; + RegisterEncoding.VGPRL = RegisterEncoding.VGPR0 + NumVGPRsMax - 1; RegisterEncoding.SGPR0 = TRI->getEncodingValue(AMDGPU::SGPR0); - RegisterEncoding.SGPRL = - RegisterEncoding.SGPR0 + HardwareLimits.NumSGPRsMax - 1; + RegisterEncoding.SGPRL = RegisterEncoding.SGPR0 + NumSGPRsMax - 1; TrackedWaitcntSet.clear(); - RpotIdxMap.clear(); BlockInfos.clear(); // Keep iterating over the blocks in reverse post order, inserting and // updating s_waitcnt where needed, until a fix point is reached. - for (MachineBasicBlock *MBB : - ReversePostOrderTraversal(&MF)) { - RpotIdxMap[MBB] = BlockInfos.size(); - BlockInfos.emplace_back(MBB); - } + for (auto *MBB : ReversePostOrderTraversal(&MF)) + BlockInfos.insert({MBB, BlockInfo(MBB)}); std::unique_ptr Brackets; bool Modified = false; @@ -1537,12 +1529,12 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { do { Repeat = false; - for (BlockInfo &BI : BlockInfos) { + for (auto BII = BlockInfos.begin(), BIE = BlockInfos.end(); BII != BIE; + ++BII) { + BlockInfo &BI = BII->second; if (!BI.Dirty) continue; - unsigned Idx = std::distance(&*BlockInfos.begin(), &BI); - if (BI.Incoming) { if (!Brackets) Brackets = std::make_unique(*BI.Incoming); @@ -1552,7 +1544,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { if (!Brackets) Brackets = std::make_unique(ST); else - Brackets->clear(); + *Brackets = WaitcntBrackets(ST); } Modified |= insertWaitcntInBlock(MF, *BI.MBB, *Brackets); @@ -1561,11 +1553,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { if (Brackets->hasPending()) { BlockInfo *MoveBracketsToSucc = nullptr; for (MachineBasicBlock *Succ : BI.MBB->successors()) { - unsigned SuccIdx = RpotIdxMap[Succ]; - BlockInfo &SuccBI = BlockInfos[SuccIdx]; + auto SuccBII = BlockInfos.find(Succ); + BlockInfo &SuccBI = SuccBII->second; if (!SuccBI.Incoming) { SuccBI.Dirty = true; - if (SuccIdx <= Idx) + if (SuccBII <= BII) Repeat = true; if (!MoveBracketsToSucc) { MoveBracketsToSucc = &SuccBI; @@ -1574,7 +1566,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { } } else if (SuccBI.Incoming->merge(*Brackets)) { SuccBI.Dirty = true; - if (SuccIdx <= Idx) + if (SuccBII <= BII) Repeat = true; } } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 60569dfbbe1d2..1d0397afd771f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -83,6 +83,12 @@ static cl::opt BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)")); +static cl::opt Fix16BitCopies( + "amdgpu-fix-16-bit-physreg-copies", + cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), + cl::init(true), + cl::ReallyHidden); + SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), RI(ST), ST(ST) { @@ -527,6 +533,25 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MCRegister SrcReg, bool KillSrc) const { const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg); + // FIXME: This is hack to resolve copies between 16 bit and 32 bit + // registers until all patterns are fixed. + if (Fix16BitCopies && + ((RI.getRegSizeInBits(*RC) == 16) ^ + (RI.getRegSizeInBits(*RI.getPhysRegClass(SrcReg)) == 16))) { + MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg; + MCRegister Super = RI.get32BitRegister(RegToFix); + assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix); + RegToFix = Super; + + if (DestReg == SrcReg) { + // Insert empty bundle since ExpandPostRA expects an instruction here. + BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE)); + return; + } + + RC = RI.getPhysRegClass(DestReg); + } + if (RC == &AMDGPU::VGPR_32RegClass) { assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || AMDGPU::SReg_32RegClass.contains(SrcReg) || @@ -678,28 +703,24 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass || - RC == &AMDGPU::SGPR_LO16RegClass) { + if (RI.getRegSizeInBits(*RC) == 16) { assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || AMDGPU::VGPR_HI16RegClass.contains(SrcReg) || - AMDGPU::SGPR_LO16RegClass.contains(SrcReg)); - - bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg); - bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg); - bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass || - RC == &AMDGPU::SGPR_LO16RegClass); + AMDGPU::SReg_LO16RegClass.contains(SrcReg) || + AMDGPU::AGPR_LO16RegClass.contains(SrcReg)); + + bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg); + bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg); + bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg); + bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg); + bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) || + AMDGPU::SReg_LO16RegClass.contains(DestReg) || + AMDGPU::AGPR_LO16RegClass.contains(DestReg); bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || - AMDGPU::SGPR_LO16RegClass.contains(SrcReg); - const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass - : &AMDGPU::VGPR_32RegClass; - const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass - : &AMDGPU::VGPR_32RegClass; - MCRegister NewDestReg = - RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16, - DstRC); - MCRegister NewSrcReg = - RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16, - SrcRC); + AMDGPU::SReg_LO16RegClass.contains(SrcReg) || + AMDGPU::AGPR_LO16RegClass.contains(SrcReg); + MCRegister NewDestReg = RI.get32BitRegister(DestReg); + MCRegister NewSrcReg = RI.get32BitRegister(SrcReg); if (IsSGPRDst) { if (!IsSGPRSrc) { @@ -712,6 +733,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + if (IsAGPRDst || IsAGPRSrc) { + if (!DstLow || !SrcLow) { + reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, + "Cannot use hi16 subreg with an AGPR!"); + } + + copyPhysReg(MBB, MI, DL, NewDestReg, NewSrcReg, KillSrc); + return; + } + if (IsSGPRSrc && !ST.hasSDWAScalar()) { if (!DstLow || !SrcLow) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, @@ -2478,15 +2509,40 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Opc = UseMI.getOpcode(); if (Opc == AMDGPU::COPY) { - bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg()); + Register DstReg = UseMI.getOperand(0).getReg(); + bool Is16Bit = getOpSize(UseMI, 0) == 2; + bool isVGPRCopy = RI.isVGPR(*MRI, DstReg); unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; - if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) { - if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32)) + APInt Imm(32, ImmOp->getImm()); + + if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16) + Imm = Imm.ashr(16); + + if (RI.isAGPR(*MRI, DstReg)) { + if (!isInlineConstant(Imm)) return false; NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32; } + + if (Is16Bit) { + if (isVGPRCopy) + return false; // Do not clobber vgpr_hi16 + + if (DstReg.isVirtual() && + UseMI.getOperand(0).getSubReg() != AMDGPU::lo16) + return false; + + UseMI.getOperand(0).setSubReg(0); + if (DstReg.isPhysical()) { + DstReg = RI.get32BitRegister(DstReg); + UseMI.getOperand(0).setReg(DstReg); + } + assert(UseMI.getOperand(1).getReg().isVirtual()); + } + UseMI.setDesc(get(NewOpc)); - UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm()); + UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue()); + UseMI.getOperand(1).setTargetFlags(0); UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); return true; } @@ -5160,6 +5216,64 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst, splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32); Inst.eraseFromParent(); continue; + + // TODO: remove as soon as everything is ready + // to replace VGPR to SGPR copy with V_READFIRSTLANEs. + // S_ADD/SUB_CO_PSEUDO as well as S_UADDO/USUBO_PSEUDO + // can only be selected from the uniform SDNode. + case AMDGPU::S_ADD_CO_PSEUDO: + case AMDGPU::S_SUB_CO_PSEUDO: { + unsigned Opc = (Inst.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) + ? AMDGPU::V_ADDC_U32_e64 + : AMDGPU::V_SUBB_U32_e64; + const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); + Register DummyCReg = MRI.createVirtualRegister(CarryRC); + Register CarryReg = MRI.createVirtualRegister(CarryRC); + Register DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass( + MRI.getRegClass(Inst.getOperand(0).getReg()))); + BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), CarryReg) + .addReg(Inst.getOperand(4).getReg()); + MachineInstr *CarryOp = + BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(Opc), DestReg) + .addReg(DummyCReg, RegState::Define | RegState::Dead) + .add(Inst.getOperand(2)) + .add(Inst.getOperand(3)) + .addReg(CarryReg, RegState::Kill) + .addImm(0); + legalizeOperands(*CarryOp); + MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg); + addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist); + Inst.eraseFromParent(); + } + continue; + case AMDGPU::S_UADDO_PSEUDO: + case AMDGPU::S_USUBO_PSEUDO: { + const DebugLoc &DL = Inst.getDebugLoc(); + MachineOperand &Dest0 = Inst.getOperand(0); + MachineOperand &Dest1 = Inst.getOperand(1); + MachineOperand &Src0 = Inst.getOperand(2); + MachineOperand &Src1 = Inst.getOperand(3); + + unsigned Opc = (Inst.getOpcode() == AMDGPU::S_UADDO_PSEUDO) + ? AMDGPU::V_ADD_I32_e64 + : AMDGPU::V_SUB_I32_e64; + const TargetRegisterClass *NewRC = + RI.getEquivalentVGPRClass(MRI.getRegClass(Dest0.getReg())); + Register DestReg = MRI.createVirtualRegister(NewRC); + MachineInstr *NewInstr = BuildMI(*MBB, &Inst, DL, get(Opc), DestReg) + .addReg(Dest1.getReg(), RegState::Define) + .add(Src0) + .add(Src1) + .addImm(0); // clamp bit + + legalizeOperands(*NewInstr, MDT); + + MRI.replaceRegWith(Dest0.getReg(), DestReg); + addUsersToMoveToVALUWorklist(NewInstr->getOperand(0).getReg(), MRI, + Worklist); + Inst.eraseFromParent(); + } + continue; } if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { @@ -5884,18 +5998,37 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op, // Ensure that def inst defines SCC, which is still live. assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() && !Op.isDead() && Op.getParent() == &SCCDefInst); + SmallVector CopyToDelete; // This assumes that all the users of SCC are in the same block // as the SCC def. for (MachineInstr &MI : // Skip the def inst itself. make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)), SCCDefInst.getParent()->end())) { // Check if SCC is used first. - if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1) - Worklist.insert(&MI); + if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1) { + if (MI.isCopy()) { + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + unsigned DestReg = MI.getOperand(0).getReg(); + SmallVector Users; + for (auto &User : MRI.use_nodbg_instructions(DestReg)) { + if ((User.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) || + (User.getOpcode() == AMDGPU::S_SUB_CO_PSEUDO)) { + Users.push_back(&User); + Worklist.insert(&User); + } + } + for (auto &U : Users) + U->getOperand(4).setReg(RI.getVCC()); + CopyToDelete.push_back(&MI); + } else + Worklist.insert(&MI); + } // Exit if we find another SCC def. if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1) - return; + break; } + for (auto &Copy : CopyToDelete) + Copy->eraseFromParent(); } const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 9fcc5caf7dfdd..8231a96f5f6b2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -827,11 +827,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { const MachineOperand &MO = MI.getOperand(OpNo); if (MO.isReg()) { if (unsigned SubReg = MO.getSubReg()) { - assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg( - MI.getParent()->getParent()->getRegInfo(). - getRegClass(MO.getReg()), SubReg)) >= 32 && - "Sub-dword subregs are not supported"); - return RI.getNumChannelsFromSubReg(SubReg) * 4; + return RI.getSubRegIdxSize(SubReg) / 8; } } return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index dae8b0b7d39aa..ab6f51187a2f4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -193,16 +193,27 @@ def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst), let Constraints = "$src = $vdst"; } +let usesCustomInserter = 1, Defs = [VCC, EXEC] in { +def V_ADD_U64_PSEUDO : VPseudoInstSI < + (outs VReg_64:$vdst), (ins VSrc_b64:$src0, VSrc_b64:$src1), + [(set VReg_64:$vdst, (getDivergentFrag.ret i64:$src0, i64:$src1))] +>; + +def V_SUB_U64_PSEUDO : VPseudoInstSI < + (outs VReg_64:$vdst), (ins VSrc_b64:$src0, VSrc_b64:$src1), + [(set VReg_64:$vdst, (getDivergentFrag.ret i64:$src0, i64:$src1))] +>; +} // End usesCustomInserter = 1, Defs = [VCC, EXEC] let usesCustomInserter = 1, Defs = [SCC] in { def S_ADD_U64_PSEUDO : SPseudoInstSI < - (outs SReg_64:$vdst), (ins SSrc_b64:$src0, SSrc_b64:$src1), - [(set SReg_64:$vdst, (add i64:$src0, i64:$src1))] + (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1), + [(set SReg_64:$sdst, (UniformBinFrag i64:$src0, i64:$src1))] >; def S_SUB_U64_PSEUDO : SPseudoInstSI < - (outs SReg_64:$vdst), (ins SSrc_b64:$src0, SSrc_b64:$src1), - [(set SReg_64:$vdst, (sub i64:$src0, i64:$src1))] + (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1), + [(set SReg_64:$sdst, (UniformBinFrag i64:$src0, i64:$src1))] >; def S_ADD_U64_CO_PSEUDO : SPseudoInstSI < @@ -212,6 +223,23 @@ def S_ADD_U64_CO_PSEUDO : SPseudoInstSI < def S_SUB_U64_CO_PSEUDO : SPseudoInstSI < (outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) >; + +def S_ADD_CO_PSEUDO : SPseudoInstSI < + (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in) +>; + +def S_SUB_CO_PSEUDO : SPseudoInstSI < + (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in) +>; + +def S_UADDO_PSEUDO : SPseudoInstSI < + (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1) +>; + +def S_USUBO_PSEUDO : SPseudoInstSI < + (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1) +>; + } // End usesCustomInserter = 1, Defs = [SCC] let usesCustomInserter = 1 in { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 6f668b3b720a2..07d5b62cd2508 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -263,6 +263,18 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, Reg); } + for (auto Reg : AMDGPU::SReg_32RegClass) { + Reserved.set(getSubReg(Reg, AMDGPU::hi16)); + Register Low = getSubReg(Reg, AMDGPU::lo16); + // This is to prevent BB vcc liveness errors. + if (!AMDGPU::SGPR_LO16RegClass.contains(Low)) + Reserved.set(Low); + } + + for (auto Reg : AMDGPU::AGPR_32RegClass) { + Reserved.set(getSubReg(Reg, AMDGPU::hi16)); + } + // Reserve all the rest AGPRs if there are no instructions to use it. if (!ST.hasMAIInsts()) { for (unsigned i = 0; i < MaxNumVGPRs; ++i) { @@ -1307,6 +1319,8 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) { const TargetRegisterClass * SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) { switch (BitWidth) { + case 16: + return &AMDGPU::AGPR_LO16RegClass; case 32: return &AMDGPU::AGPR_32RegClass; case 64: @@ -1365,7 +1379,8 @@ SIRegisterInfo::getPhysRegClass(MCRegister Reg) const { static const TargetRegisterClass *const BaseClasses[] = { &AMDGPU::VGPR_LO16RegClass, &AMDGPU::VGPR_HI16RegClass, - &AMDGPU::SGPR_LO16RegClass, + &AMDGPU::SReg_LO16RegClass, + &AMDGPU::AGPR_LO16RegClass, &AMDGPU::VGPR_32RegClass, &AMDGPU::SReg_32RegClass, &AMDGPU::AGPR_32RegClass, @@ -1424,7 +1439,7 @@ bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const { unsigned Size = getRegSizeInBits(*RC); - if (Size < 32) + if (Size < 16) return false; const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size); if (!ARC) { @@ -1783,3 +1798,20 @@ MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg, return Def; } + +MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const { + assert(getRegSizeInBits(*getPhysRegClass(Reg)) <= 32); + + for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass, + AMDGPU::SReg_32RegClass, + AMDGPU::AGPR_32RegClass } ) { + if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC)) + return Super; + } + if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16, + &AMDGPU::VGPR_32RegClass)) { + return Super; + } + + return AMDGPU::NoRegister; +} diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 0597728f7f1f9..2f328e77506ef 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -283,7 +283,7 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { // \returns a DWORD offset of a \p SubReg unsigned getChannelFromSubReg(unsigned SubReg) const { - return SubReg ? divideCeil(getSubRegIdxOffset(SubReg), 32) : 0; + return SubReg ? (getSubRegIdxOffset(SubReg) + 31) / 32 : 0; } // \returns a DWORD size of a \p SubReg @@ -291,6 +291,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { return getNumCoveredRegs(getSubRegIndexLaneMask(SubReg)); } + // For a given 16 bit \p Reg \returns a 32 bit register holding it. + // \returns \p Reg otherwise. + MCPhysReg get32BitRegister(MCPhysReg Reg) const; + private: void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index f493f937430c9..5728a4df29958 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -123,25 +123,41 @@ class SIRegisterTuples Indices, RegisterClass RC, class SIReg regIdx = 0> : Register { let Namespace = "AMDGPU"; - - // This is the not yet the complete register encoding. An additional - // bit is set for VGPRs. let HWEncoding = regIdx; } -class SIRegWithSubRegs subregs, bits<16> regIdx = 0> : +class SIRegWithSubRegs subregs, bits<16> regIdx> : RegisterWithSubRegs { - let Namespace = "AMDGPU"; +} - // This is the not yet the complete register encoding. An additional - // bit is set for VGPRs. - let HWEncoding = regIdx; - let CoveredBySubRegs = 1; +multiclass SIRegLoHi16 regIdx, bit ArtificialHigh = 1, + bit HWEncodingHigh = 0> { + // There is no special encoding for 16 bit subregs, these are not real + // registers but rather operands for instructions preserving other 16 bits + // of the result or reading just 16 bits of a 32 bit VGPR. + // It is encoded as a corresponding 32 bit register. + // Non-VGPR register classes use it as we need to have matching subregisters + // to move instructions and data between ALUs. + def _LO16 : SIReg { + let HWEncoding{8} = HWEncodingHigh; + } + def _HI16 : SIReg { + let isArtificial = ArtificialHigh; + let HWEncoding{8} = HWEncodingHigh; + } + def "" : RegisterWithSubRegs(NAME#"_LO16"), + !cast(NAME#"_HI16")]> { + let Namespace = "AMDGPU"; + let SubRegIndices = [lo16, hi16]; + let CoveredBySubRegs = !if(ArtificialHigh,0,1); + let HWEncoding = regIdx; + let HWEncoding{8} = HWEncodingHigh; + } } // Special Registers -def VCC_LO : SIReg<"vcc_lo", 106>; -def VCC_HI : SIReg<"vcc_hi", 107>; +defm VCC_LO : SIRegLoHi16<"vcc_lo", 106>; +defm VCC_HI : SIRegLoHi16<"vcc_hi", 107>; // Pseudo-registers: Used as placeholders during isel and immediately // replaced, never seeing the verifier. @@ -164,8 +180,8 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> { let HWEncoding = 106; } -def EXEC_LO : SIReg<"exec_lo", 126>, DwarfRegNum<[1, 1]>; -def EXEC_HI : SIReg<"exec_hi", 127>; +defm EXEC_LO : SIRegLoHi16<"exec_lo", 126>, DwarfRegNum<[1, 1]>; +defm EXEC_HI : SIRegLoHi16<"exec_hi", 127>; def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]> { let Namespace = "AMDGPU"; @@ -175,22 +191,22 @@ def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]> // 32-bit real registers, for MC only. // May be used with both 32-bit and 64-bit operands. -def SRC_VCCZ : SIReg<"src_vccz", 251>; -def SRC_EXECZ : SIReg<"src_execz", 252>; -def SRC_SCC : SIReg<"src_scc", 253>; +defm SRC_VCCZ : SIRegLoHi16<"src_vccz", 251>; +defm SRC_EXECZ : SIRegLoHi16<"src_execz", 252>; +defm SRC_SCC : SIRegLoHi16<"src_scc", 253>; // 1-bit pseudo register, for codegen only. // Should never be emitted. def SCC : SIReg<"scc">; -def M0 : SIReg <"m0", 124>; -def SGPR_NULL : SIReg<"null", 125>; +defm M0 : SIRegLoHi16 <"m0", 124>; +defm SGPR_NULL : SIRegLoHi16 <"null", 125>; -def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>; -def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>; -def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>; -def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>; -def SRC_POPS_EXITING_WAVE_ID : SIReg<"src_pops_exiting_wave_id", 239>; +defm SRC_SHARED_BASE : SIRegLoHi16<"src_shared_base", 235>; +defm SRC_SHARED_LIMIT : SIRegLoHi16<"src_shared_limit", 236>; +defm SRC_PRIVATE_BASE : SIRegLoHi16<"src_private_base", 237>; +defm SRC_PRIVATE_LIMIT : SIRegLoHi16<"src_private_limit", 238>; +defm SRC_POPS_EXITING_WAVE_ID : SIRegLoHi16<"src_pops_exiting_wave_id", 239>; def LDS_DIRECT : SIReg <"src_lds_direct", 254> { // There is no physical register corresponding to this. This is an @@ -199,8 +215,8 @@ def LDS_DIRECT : SIReg <"src_lds_direct", 254> { let isArtificial = 1; } -def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>; -def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>; +defm XNACK_MASK_LO : SIRegLoHi16<"xnack_mask_lo", 104>; +defm XNACK_MASK_HI : SIRegLoHi16<"xnack_mask_hi", 105>; def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]> { @@ -210,8 +226,8 @@ def XNACK_MASK : } // Trap handler registers -def TBA_LO : SIReg<"tba_lo", 108>; -def TBA_HI : SIReg<"tba_hi", 109>; +defm TBA_LO : SIRegLoHi16<"tba_lo", 108>; +defm TBA_HI : SIRegLoHi16<"tba_hi", 109>; def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]> { let Namespace = "AMDGPU"; @@ -219,8 +235,8 @@ def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]> { let HWEncoding = 108; } -def TMA_LO : SIReg<"tma_lo", 110>; -def TMA_HI : SIReg<"tma_hi", 111>; +defm TMA_LO : SIRegLoHi16<"tma_lo", 110>; +defm TMA_HI : SIRegLoHi16<"tma_hi", 111>; def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> { let Namespace = "AMDGPU"; @@ -229,15 +245,15 @@ def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> { } foreach Index = 0-15 in { - def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>; - def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>; - def TTMP#Index : SIReg<"ttmp"#Index, 0>; + defm TTMP#Index#_vi : SIRegLoHi16<"ttmp"#Index, !add(112, Index)>; + defm TTMP#Index#_gfx9_gfx10 : SIRegLoHi16<"ttmp"#Index, !add(108, Index)>; + defm TTMP#Index : SIRegLoHi16<"ttmp"#Index, 0>; } multiclass FLAT_SCR_LOHI_m ci_e, bits<16> vi_e> { - def _ci : SIReg; - def _vi : SIReg; - def "" : SIReg; + defm _ci : SIRegLoHi16; + defm _vi : SIRegLoHi16; + defm "" : SIRegLoHi16; } class FlatReg encoding> : @@ -256,59 +272,24 @@ def FLAT_SCR : FlatReg; // SGPR registers foreach Index = 0-105 in { - def SGPR#Index#_LO16 : SIReg <"s"#Index#".l", Index>, - DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)), - !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>; - - // This is a placeholder to fill high lane in mask. - def SGPR#Index#_HI16 : SIReg <"", Index> { - let isArtificial = 1; - } - - def SGPR#Index : - SIRegWithSubRegs <"s"#Index, [!cast("SGPR"#Index#"_LO16"), - !cast("SGPR"#Index#"_HI16")], - Index>, - DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)), - !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]> { - let SubRegIndices = [lo16, hi16]; - } + defm SGPR#Index : + SIRegLoHi16 <"s"#Index, Index>, + DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)), + !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>; } // VGPR registers foreach Index = 0-255 in { - // There is no special encoding for low 16 bit subreg, this not a real - // register but rather an operand for instructions preserving high 16 bits - // of the result or reading just low 16 bits of a 32 bit VGPR. - // It is encoded as a corresponding 32 bit register. - def VGPR#Index#_LO16 : SIReg <"v"#Index#".l", Index>, - DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> { - let HWEncoding{8} = 1; - } - // There is no special encoding for low 16 bit subreg, this not a real - // register but rather an operand for instructions preserving low 16 bits - // of the result or reading just high 16 bits of a 32 bit VGPR. - // It is encoded as a corresponding 32 bit register. - def VGPR#Index#_HI16 : SIReg <"v"#Index#".h", Index>, - DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> { - let HWEncoding{8} = 1; - } - def VGPR#Index : SIRegWithSubRegs <"v"#Index, - [!cast("VGPR"#Index#"_LO16"), !cast("VGPR"#Index#"_HI16")], - Index>, - DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> { - let HWEncoding{8} = 1; - let SubRegIndices = [lo16, hi16]; - } + defm VGPR#Index : + SIRegLoHi16 <"v"#Index, Index, 0, 1>, + DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]>; } // AccVGPR registers foreach Index = 0-255 in { - def AGPR#Index : - SIReg <"a"#Index, Index>, - DwarfRegNum<[!add(Index, 3072), !add(Index, 2048)]> { - let HWEncoding{8} = 1; - } + defm AGPR#Index : + SIRegLoHi16 <"a"#Index, Index, 1, 1>, + DwarfRegNum<[!add(Index, 3072), !add(Index, 2048)]>; } //===----------------------------------------------------------------------===// @@ -325,11 +306,17 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> { let isAllocatable = 0; } +def M0_CLASS_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> { + let CopyCost = 1; + let Size = 16; + let isAllocatable = 0; +} + // TODO: Do we need to set DwarfRegAlias on register tuples? def SGPR_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, (add (sequence "SGPR%u_LO16", 0, 105))> { - let AllocationPriority = 1; + let AllocationPriority = 9; let Size = 16; let GeneratePressureSet = 0; } @@ -380,6 +367,12 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, let isAllocatable = 0; } +def TTMP_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, + (add (sequence "TTMP%u_LO16", 0, 15))> { + let Size = 16; + let isAllocatable = 0; +} + // Trap handler TMP 64-bit registers def TTMP_64Regs : SIRegisterTuples.ret, TTMP_32, 15, 2, 2, "ttmp">; @@ -523,6 +516,13 @@ def VGPR_512 : SIRegisterTuples.ret, VGPR_32, 255, 1, 16, "v">; // VGPR 1024-bit registers def VGPR_1024 : SIRegisterTuples.ret, VGPR_32, 255, 1, 32, "v">; +def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16, + (add (sequence "AGPR%u_LO16", 0, 255))> { + let isAllocatable = 0; + let Size = 16; + let GeneratePressureSet = 0; +} + // AccVGPR 32-bit registers def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add (sequence "AGPR%u", 0, 255))> { @@ -588,15 +588,43 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1 let AllocationPriority = 10; } +def SReg_LO16_XM0_XEXEC : RegisterClass<"AMDGPU", [i16, f16], 16, + (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16, + XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, TTMP_LO16, TMA_LO_LO16, + TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16, + SRC_SHARED_LIMIT_LO16, SRC_PRIVATE_BASE_LO16, SRC_PRIVATE_LIMIT_LO16, + SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16)> { + let Size = 16; + let AllocationPriority = 10; +} + def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> { let AllocationPriority = 10; } +def SReg_LO16_XEXEC_HI : RegisterClass<"AMDGPU", [i16, f16], 16, + (add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, M0_CLASS_LO16)> { + let Size = 16; + let AllocationPriority = 10; +} + def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { let AllocationPriority = 10; } + +def SReg_LO16_XM0 : RegisterClass<"AMDGPU", [i16, f16], 16, + (add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, EXEC_HI_LO16)> { + let Size = 16; + let AllocationPriority = 10; +} + +def SReg_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, + (add SGPR_LO16, SReg_LO16_XM0, M0_CLASS_LO16, EXEC_LO_LO16, EXEC_HI_LO16, SReg_LO16_XEXEC_HI)> { + let Size = 16; + let AllocationPriority = 10; +} } // End GeneratePressureSet = 0 // Register class for all scalar registers (SGPRs + Special Registers) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index b896896af4c3c..370da2abad004 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1083,6 +1083,11 @@ bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { // (move from MC* level to Target* level). Return size in bits. unsigned getRegBitWidth(unsigned RCID) { switch (RCID) { + case AMDGPU::VGPR_LO16RegClassID: + case AMDGPU::VGPR_HI16RegClassID: + case AMDGPU::SGPR_LO16RegClassID: + case AMDGPU::AGPR_LO16RegClassID: + return 16; case AMDGPU::SGPR_32RegClassID: case AMDGPU::VGPR_32RegClassID: case AMDGPU::VRegOrLds_32RegClassID: diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp index 6a8ad3fdff266..bc03154a679d3 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -429,7 +429,7 @@ bool ARMCallLowering::lowerFormalArguments( for (auto &Arg : F.args()) { if (!isSupportedType(DL, TLI, Arg.getType())) return false; - if (Arg.hasByValOrInAllocaAttr()) + if (Arg.hasPassPointeeByValueAttr()) return false; } diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 0939d890f36bc..ad0125a599d46 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -2288,7 +2288,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { bool ARMFastISel::SelectCall(const Instruction *I, const char *IntrMemName = nullptr) { const CallInst *CI = cast(I); - const Value *Callee = CI->getCalledValue(); + const Value *Callee = CI->getCalledOperand(); // Can't handle inline asm. if (isa(Callee)) return false; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 3e4d798322ae4..fa4e83b5f665f 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1460,6 +1460,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); + if (Subtarget->hasMVEIntegerOps()) + setTargetDAGCombine(ISD::VSELECT); + if (Subtarget->hasV6Ops()) setTargetDAGCombine(ISD::SRL); if (Subtarget->isThumb1Only()) @@ -2663,9 +2666,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( // Check that the call results are passed in the same way. LLVMContext &C = *DAG.getContext(); - if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, - CCAssignFnForReturn(CalleeCC, isVarArg), - CCAssignFnForReturn(CallerCC, isVarArg))) + if (!CCState::resultsCompatible( + getEffectiveCallingConv(CalleeCC, isVarArg), + getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins, + CCAssignFnForReturn(CalleeCC, isVarArg), + CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) return false; // The callee has to preserve all registers the caller needs to preserve. const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); @@ -5752,57 +5757,25 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG, SDLoc dl(N); SDValue Op = N->getOperand(0); - // This function is only supposed to be called for i64 types, either as the - // source or destination of the bit convert. + // This function is only supposed to be called for i16 and i64 types, either + // as the source or destination of the bit convert. EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); - const bool HasFullFP16 = Subtarget->hasFullFP16(); if (SrcVT == MVT::i16 && DstVT == MVT::f16) { - if (!HasFullFP16) + if (!Subtarget->hasFullFP16()) return SDValue(); - // SoftFP: read half-precision arguments: - // - // t2: i32,ch = ... - // t7: i16 = truncate t2 <~~~~ Op - // t8: f16 = bitcast t7 <~~~~ N - // - if (Op.getOperand(0).getValueType() == MVT::i32) - return DAG.getNode(ARMISD::VMOVhr, SDLoc(Op), - MVT::f16, Op.getOperand(0)); - - return SDValue(); + // f16 bitcast i16 -> VMOVhr + return DAG.getNode(ARMISD::VMOVhr, SDLoc(N), MVT::f16, + DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op)); } - // Half-precision return values if (SrcVT == MVT::f16 && DstVT == MVT::i16) { - if (!HasFullFP16) - return SDValue(); - // - // t11: f16 = fadd t8, t10 - // t12: i16 = bitcast t11 <~~~ SDNode N - // t13: i32 = zero_extend t12 - // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t13 - // t17: ch = ARMISD::RET_FLAG t16, Register:i32 %r0, t16:1 - // - // transform this into: - // - // t20: i32 = ARMISD::VMOVrh t11 - // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t20 - // - auto ZeroExtend = N->use_begin(); - if (N->use_size() != 1 || ZeroExtend->getOpcode() != ISD::ZERO_EXTEND || - ZeroExtend->getValueType(0) != MVT::i32) + if (!Subtarget->hasFullFP16()) return SDValue(); - - auto Copy = ZeroExtend->use_begin(); - if (Copy->getOpcode() == ISD::CopyToReg && - Copy->use_begin()->getOpcode() == ARMISD::RET_FLAG) { - SDValue Cvt = DAG.getNode(ARMISD::VMOVrh, SDLoc(Op), MVT::i32, Op); - DAG.ReplaceAllUsesWith(*ZeroExtend, &Cvt); - return Cvt; - } - return SDValue(); + // i16 bitcast f16 -> VMOVrh + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, + DAG.getNode(ARMISD::VMOVrh, SDLoc(N), MVT::i32, Op)); } if (!(SrcVT == MVT::i64 || DstVT == MVT::i64)) @@ -11749,6 +11722,42 @@ static SDValue PerformAddeSubeCombine(SDNode *N, return SDValue(); } +static SDValue PerformVSELECTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs). + // + // We need to re-implement this optimization here as the implementation in the + // Target-Independent DAGCombiner does not handle the kind of constant we make + // (it calls isConstOrConstSplat with AllowTruncation set to false - and for + // good reason, allowing truncation there would break other targets). + // + // Currently, this is only done for MVE, as it's the only target that benefits + // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL). + if (!Subtarget->hasMVEIntegerOps()) + return SDValue(); + + if (N->getOperand(0).getOpcode() != ISD::XOR) + return SDValue(); + SDValue XOR = N->getOperand(0); + + // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s. + // It is important to check with truncation allowed as the BUILD_VECTORs we + // generate in those situations will truncate their operands. + ConstantSDNode *Const = + isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false, + /*AllowTruncation*/ true); + if (!Const || !Const->isOne()) + return SDValue(); + + // Rewrite into vselect(cond, rhs, lhs). + SDValue Cond = XOR->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + EVT Type = N->getValueType(0); + return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS); +} + static SDValue PerformABSCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -12642,58 +12651,44 @@ static bool isValidMVECond(unsigned CC, bool IsFloat) { }; } +static ARMCC::CondCodes getVCMPCondCode(SDValue N) { + if (N->getOpcode() == ARMISD::VCMP) + return (ARMCC::CondCodes)N->getConstantOperandVal(2); + else if (N->getOpcode() == ARMISD::VCMPZ) + return (ARMCC::CondCodes)N->getConstantOperandVal(1); + else + llvm_unreachable("Not a VCMP/VCMPZ!"); +} + +static bool CanInvertMVEVCMP(SDValue N) { + ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N)); + return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint()); +} + static SDValue PerformORCombine_i1(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain // together with predicates EVT VT = N->getValueType(0); + SDLoc DL(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ARMCC::CondCodes CondCode0 = ARMCC::AL; - ARMCC::CondCodes CondCode1 = ARMCC::AL; - if (N0->getOpcode() == ARMISD::VCMP) - CondCode0 = (ARMCC::CondCodes)cast(N0->getOperand(2)) - ->getZExtValue(); - else if (N0->getOpcode() == ARMISD::VCMPZ) - CondCode0 = (ARMCC::CondCodes)cast(N0->getOperand(1)) - ->getZExtValue(); - if (N1->getOpcode() == ARMISD::VCMP) - CondCode1 = (ARMCC::CondCodes)cast(N1->getOperand(2)) - ->getZExtValue(); - else if (N1->getOpcode() == ARMISD::VCMPZ) - CondCode1 = (ARMCC::CondCodes)cast(N1->getOperand(1)) - ->getZExtValue(); - - if (CondCode0 == ARMCC::AL || CondCode1 == ARMCC::AL) - return SDValue(); - - unsigned Opposite0 = ARMCC::getOppositeCondition(CondCode0); - unsigned Opposite1 = ARMCC::getOppositeCondition(CondCode1); + auto IsFreelyInvertable = [&](SDValue V) { + if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ) + return CanInvertMVEVCMP(V); + return false; + }; - if (!isValidMVECond(Opposite0, - N0->getOperand(0)->getValueType(0).isFloatingPoint()) || - !isValidMVECond(Opposite1, - N1->getOperand(0)->getValueType(0).isFloatingPoint())) + // At least one operand must be freely invertable. + if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1))) return SDValue(); - SmallVector Ops0; - Ops0.push_back(N0->getOperand(0)); - if (N0->getOpcode() == ARMISD::VCMP) - Ops0.push_back(N0->getOperand(1)); - Ops0.push_back(DCI.DAG.getConstant(Opposite0, SDLoc(N0), MVT::i32)); - SmallVector Ops1; - Ops1.push_back(N1->getOperand(0)); - if (N1->getOpcode() == ARMISD::VCMP) - Ops1.push_back(N1->getOperand(1)); - Ops1.push_back(DCI.DAG.getConstant(Opposite1, SDLoc(N1), MVT::i32)); - - SDValue NewN0 = DCI.DAG.getNode(N0->getOpcode(), SDLoc(N0), VT, Ops0); - SDValue NewN1 = DCI.DAG.getNode(N1->getOpcode(), SDLoc(N1), VT, Ops1); - SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1); - return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And, - DCI.DAG.getAllOnesConstant(SDLoc(N), VT)); + SDValue NewN0 = DCI.DAG.getLogicalNOT(DL, N0, VT); + SDValue NewN1 = DCI.DAG.getLogicalNOT(DL, N1, VT); + SDValue And = DCI.DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1); + return DCI.DAG.getLogicalNOT(DL, And, VT); } /// PerformORCombine - Target-specific dag combine xforms for ISD::OR @@ -12814,6 +12809,27 @@ static SDValue PerformXORCombine(SDNode *N, return Result; } + if (Subtarget->hasMVEIntegerOps()) { + // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition. + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + const TargetLowering *TLI = Subtarget->getTargetLowering(); + if (TLI->isConstTrueVal(N1.getNode()) && + (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) { + if (CanInvertMVEVCMP(N0)) { + SDLoc DL(N0); + ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N0)); + + SmallVector Ops; + Ops.push_back(N0->getOperand(0)); + if (N0->getOpcode() == ARMISD::VCMP) + Ops.push_back(N0->getOperand(1)); + Ops.push_back(DCI.DAG.getConstant(CC, DL, MVT::i32)); + return DCI.DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops); + } + } + } + return SDValue(); } @@ -13019,16 +13035,25 @@ static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo & // t2: f32,ch = CopyFromReg t0, Register:f32 %0 // t5: i32 = bitcast t2 // t18: f16 = ARMISD::VMOVhr t5 - SDValue BC = N->getOperand(0); - if (BC->getOpcode() != ISD::BITCAST) - return SDValue(); - SDValue Copy = BC->getOperand(0); - if (Copy.getValueType() != MVT::f32 || Copy->getOpcode() != ISD::CopyFromReg) - return SDValue(); + SDValue Op0 = N->getOperand(0); + if (Op0->getOpcode() == ISD::BITCAST) { + SDValue Copy = Op0->getOperand(0); + if (Copy.getValueType() == MVT::f32 && + Copy->getOpcode() == ISD::CopyFromReg) { + SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1)}; + SDValue NewCopy = + DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N), MVT::f16, Ops); + return NewCopy; + } + } + + // Only the bottom 16 bits of the source register are used. + APInt DemandedMask = APInt::getLowBitsSet(32, 16); + const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo(); + if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI)) + return SDValue(N, 0); - SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1)}; - SDValue NewCopy = DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N), MVT::f16, Ops); - return NewCopy; + return SDValue(); } static SDValue PerformVMOVrhCombine(SDNode *N, @@ -13201,8 +13226,7 @@ PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // If the valuetypes are the same, we can remove the cast entirely. if (Op->getOperand(0).getValueType() == VT) return Op->getOperand(0); - return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, - Op->getOperand(0).getValueType(), Op->getOperand(0)); + return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0)); } return SDValue(); @@ -15246,6 +15270,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget); case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget); case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); @@ -16393,6 +16418,12 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, assert(DstSz == Known.getBitWidth()); break; } + case ARMISD::VMOVrh: { + KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); + assert(KnownOp.getBitWidth() == 16); + Known = KnownOp.zext(32); + break; + } } } @@ -16518,7 +16549,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { if (!Subtarget->hasV6Ops()) return false; - InlineAsm *IA = cast(CI->getCalledValue()); + InlineAsm *IA = cast(CI->getCalledOperand()); std::string AsmStr = IA->getAsmString(); SmallVector AsmPieces; SplitString(AsmStr, AsmPieces, ";\n"); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index bab6d224b84de..d6e082d64e7af 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -80,7 +80,8 @@ bool ARMTTIImpl::shouldFavorPostInc() const { return false; } -int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned Bits = Ty->getPrimitiveSizeInBits(); @@ -123,7 +124,7 @@ int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, } int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) { + Type *Ty, TTI::TargetCostKind CostKind) { // Division by a constant can be turned into multiplication, but only if we // know it's constant. So it's not so much that the immediate is cheap (it's // not), but that the alternative is worse. @@ -138,12 +139,14 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im if (Imm == 255 || Imm == 65535) return 0; // Conversion to BIC is free, and means we can use ~Imm instead. - return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty)); + return std::min(getIntImmCost(Imm, Ty, CostKind), + getIntImmCost(~Imm, Ty, CostKind)); } if (Opcode == Instruction::Add) // Conversion to SUB is free, and means we can use -Imm instead. - return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty)); + return std::min(getIntImmCost(Imm, Ty, CostKind), + getIntImmCost(-Imm, Ty, CostKind)); if (Opcode == Instruction::ICmp && Imm.isNegative() && Ty->getIntegerBitWidth() == 32) { @@ -160,10 +163,11 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im if (Opcode == Instruction::Xor && Imm.isAllOnesValue()) return 0; - return getIntImmCost(Imm, Ty); + return getIntImmCost(Imm, Ty, CostKind); } int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -187,7 +191,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, EVT DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) - return BaseT::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); // The extend of a load is free if (I && isa(I->getOperand(0))) { @@ -418,7 +422,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy() ? ST->getMVEVectorCostFactor() : 1; - return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src); + return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); } int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, @@ -458,6 +462,7 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, } int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a vector select gets lowered to vbsl. @@ -485,7 +490,8 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy() ? ST->getMVEVectorCostFactor() : 1; - return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, + I); } int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, @@ -681,6 +687,7 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, } int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -737,7 +744,8 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second)) return LT.first * Entry->Cost; - int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, + Op2Info, Opd1PropInfo, Opd2PropInfo); // This is somewhat of a hack. The problem that we are facing is that SROA @@ -795,10 +803,11 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, // Else this is expand, assume that we need to scalarize this op. if (auto *VTy = dyn_cast(Ty)) { unsigned Num = VTy->getNumElements(); - unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); + unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType(), + CostKind); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. - return BaseT::getScalarizationOverhead(Ty, Args) + Num * Cost; + return BaseT::getScalarizationOverhead(VTy, Args) + Num * Cost; } return BaseCost; @@ -806,6 +815,7 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { std::pair LT = TLI->getTypeLegalizationCost(DL, Src); @@ -824,8 +834,9 @@ int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, int ARMTTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, - bool UseMaskForGaps) { + unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + bool UseMaskForCond, bool UseMaskForGaps) { assert(Factor >= 2 && "Invalid interleave factor"); assert(isa(VecTy) && "Expect a vector type"); @@ -856,18 +867,19 @@ int ARMTTIImpl::getInterleavedMemoryOpCost( } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment, + TTI::TargetCostKind CostKind, const Instruction *I) { using namespace PatternMatch; if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters) return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment, I); + Alignment, CostKind, I); assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!"); VectorType *VTy = cast(DataTy); @@ -887,7 +899,7 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, // The scalarization cost should be a lot higher. We use the number of vector // elements plus the scalarization overhead. unsigned ScalarCost = - NumElems * LT.first + BaseT::getScalarizationOverhead(DataTy, {}); + NumElems * LT.first + BaseT::getScalarizationOverhead(VTy, {}); if (Alignment < EltSize / 8) return ScalarCost; @@ -1025,23 +1037,31 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, HardwareLoopInfo &HWLoopInfo) { // Low-overhead branches are only supported in the 'low-overhead branch' // extension of v8.1-m. - if (!ST->hasLOB() || DisableLowOverheadLoops) + if (!ST->hasLOB() || DisableLowOverheadLoops) { + LLVM_DEBUG(dbgs() << "ARMHWLoops: Disabled\n"); return false; + } - if (!SE.hasLoopInvariantBackedgeTakenCount(L)) + if (!SE.hasLoopInvariantBackedgeTakenCount(L)) { + LLVM_DEBUG(dbgs() << "ARMHWLoops: No BETC\n"); return false; + } const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); - if (isa(BackedgeTakenCount)) + if (isa(BackedgeTakenCount)) { + LLVM_DEBUG(dbgs() << "ARMHWLoops: Uncomputable BETC\n"); return false; + } const SCEV *TripCountSCEV = SE.getAddExpr(BackedgeTakenCount, SE.getOne(BackedgeTakenCount->getType())); // We need to store the trip count in LR, a 32-bit register. - if (SE.getUnsignedRangeMax(TripCountSCEV).getBitWidth() > 32) + if (SE.getUnsignedRangeMax(TripCountSCEV).getBitWidth() > 32) { + LLVM_DEBUG(dbgs() << "ARMHWLoops: Trip count does not fit into 32bits\n"); return false; + } // Making a call will trash LR and clear LO_BRANCH_INFO, so there's little // point in generating a hardware loop if that's going to happen. @@ -1146,8 +1166,10 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, auto ScanLoop = [&](Loop *L) { for (auto *BB : L->getBlocks()) { for (auto &I : *BB) { - if (MaybeCall(I) || IsHardwareLoopIntrinsic(I)) + if (MaybeCall(I) || IsHardwareLoopIntrinsic(I)) { + LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n"); return false; + } } } return true; @@ -1362,7 +1384,7 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, SmallVector Operands(I.value_op_begin(), I.value_op_end()); - Cost += getUserCost(&I, Operands); + Cost += getUserCost(&I, Operands, TargetTransformInfo::TCK_CodeSize); } } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 5f7d17d75d71d..9cb4916b70022 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -110,9 +110,10 @@ class ARMTTIImpl : public BasicTTIImplBase { Type *Ty); using BaseT::getIntImmCost; - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty, TTI::TargetCostKind CostKind); /// @} @@ -194,9 +195,11 @@ class ARMTTIImpl : public BasicTTIImplBase { } int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); @@ -206,6 +209,7 @@ class ARMTTIImpl : public BasicTTIImplBase { int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, TTI::OperandValueKind Op2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -214,17 +218,21 @@ class ARMTTIImpl : public BasicTTIImplBase { const Instruction *CxtI = nullptr); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); - unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, - bool VariableMask, unsigned Alignment, - const Instruction *I = nullptr); + unsigned getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); bool isLoweredToCall(const Function *F); bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 1536a3412dddd..e84d53346d80b 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -266,7 +266,9 @@ class ARMMCInstrAnalysis : public MCInstrAnalysis { bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const override { // We only handle PCRel branches for now. - if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL) + if (Inst.getNumOperands() == 0 || + Info->get(Inst.getOpcode()).OpInfo[0].OperandType != + MCOI::OPERAND_PCREL) return false; int64_t Imm = Inst.getOperand(0).getImm(); @@ -285,6 +287,8 @@ class ThumbMCInstrAnalysis : public ARMMCInstrAnalysis { switch (Inst.getOpcode()) { default: OpId = 0; + if (Inst.getNumOperands() == 0) + return false; break; case ARM::MVE_WLSTP_8: case ARM::MVE_WLSTP_16: diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp index 9325dd26bba72..9fbd8e58f4540 100644 --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -355,6 +355,8 @@ bool MVETailPredication::IsPredicatedVectorLoop() { return false; MaskedInsts.push_back(cast(&I)); } else if (auto *Int = dyn_cast(&I)) { + if (Int->getIntrinsicID() == Intrinsic::fma) + continue; for (auto &U : Int->args()) { if (isa(U->getType())) return false; diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 067b5a84856c1..0e943aa07a3b5 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -239,7 +239,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, if (!Call) return false; - const auto *GV = dyn_cast(Call->getCalledValue()); + const auto *GV = dyn_cast(Call->getCalledOperand()); if (!GV) return false; if (GV->getName().startswith("llvm.preserve.array.access.index")) { diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index fb43e9f22fdcf..1e7862c36ea09 100644 --- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -945,7 +945,7 @@ bool HexagonAsmParser::isLabel(AsmToken &Token) { StringRef Raw(String.data(), Third.getString().data() - String.data() + Third.getString().size()); std::string Collapsed = std::string(Raw); - Collapsed.erase(llvm::remove_if(Collapsed, isspace), Collapsed.end()); + Collapsed.erase(llvm::remove_if(Collapsed, isSpace), Collapsed.end()); StringRef Whole = Collapsed; std::pair DotSplit = Whole.split('.'); if (!matchRegister(DotSplit.first.lower())) @@ -997,7 +997,7 @@ OperandMatchResultTy HexagonAsmParser::tryParseRegister(unsigned &RegNo, NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type)); } std::string Collapsed = std::string(RawString); - Collapsed.erase(llvm::remove_if(Collapsed, isspace), Collapsed.end()); + Collapsed.erase(llvm::remove_if(Collapsed, isSpace), Collapsed.end()); StringRef FullString = Collapsed; std::pair DotSplit = FullString.split('.'); unsigned DotReg = matchRegister(DotSplit.first.lower()); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index a484dda23df9b..efaeaacfbb08b 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3504,9 +3504,5 @@ bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { TargetLowering::AtomicExpansionKind HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { - const DataLayout &DL = AI->getModule()->getDataLayout(); - unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType()); - if (Size >= 4 && Size <= 8) - return AtomicExpansionKind::LLSC; - return AtomicExpansionKind::None; + return AtomicExpansionKind::LLSC; } diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 20e3a48d8c862..0bfb28b935c3a 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1758,7 +1758,7 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str, if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), strlen(MAI.getSeparatorString())) == 0) atInsnStart = true; - if (atInsnStart && !std::isspace(static_cast(*Str))) { + if (atInsnStart && !isSpace(static_cast(*Str))) { Length += MaxInstLength; atInsnStart = false; } diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 4dcda3b10f4da..5f18767c76cc7 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -424,8 +424,17 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const { int DefIdx = -1; for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) { const MachineOperand &MO = SrcI->getOperand(OpNum); - if (MO.isReg() && MO.isDef() && MO.getReg() == DepR) - DefIdx = OpNum; + bool IsSameOrSubReg = false; + if (MO.isReg()) { + unsigned MOReg = MO.getReg(); + if (Register::isVirtualRegister(DepR)) { + IsSameOrSubReg = (MOReg == DepR); + } else { + IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(DepR, MOReg); + } + if (MO.isDef() && IsSameOrSubReg) + DefIdx = OpNum; + } } assert(DefIdx >= 0 && "Def Reg not found in Src MI"); MachineInstr *DstI = Dst->getInstr(); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 71f31cb48f8b6..99845ae7ca845 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -115,9 +115,10 @@ unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const { return (8 * ST.getVectorLength()) / ElemWidth; } -unsigned HexagonTTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, - bool Extract) { - return BaseT::getScalarizationOverhead(Ty, Insert, Extract); +unsigned HexagonTTIImpl::getScalarizationOverhead(VectorType *Ty, + const APInt &DemandedElts, + bool Insert, bool Extract) { + return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); } unsigned HexagonTTIImpl::getOperandsScalarizationOverhead( @@ -126,28 +127,30 @@ unsigned HexagonTTIImpl::getOperandsScalarizationOverhead( } unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy, - ArrayRef Tys) { - return BaseT::getCallInstrCost(F, RetTy, Tys); + ArrayRef Tys, TTI::TargetCostKind CostKind) { + return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind); } unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF, + TTI::TargetCostKind CostKind, const Instruction *I) { - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { if (ID == Intrinsic::bswap) { std::pair LT = TLI.getTypeLegalizationCost(DL, RetTy); return LT.first + 2; } return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp, @@ -158,10 +161,12 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp, unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { assert(Opcode == Instruction::Load || Opcode == Instruction::Store); if (Opcode == Instruction::Store) - return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, I); if (Src->isVectorTy()) { VectorType *VecTy = cast(Src); @@ -199,12 +204,15 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, return (3 - LogA) * Cost * NumLoads; } - return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, I); } unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, - Type *Src, unsigned Alignment, unsigned AddressSpace) { - return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + Type *Src, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind) { + return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); } unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, @@ -212,38 +220,41 @@ unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, return 1; } -unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, - Value *Ptr, bool VariableMask, - unsigned Alignment, - const Instruction *I) { +unsigned HexagonTTIImpl::getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I) { return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment, I); + Alignment, CostKind, I); } unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, + unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, + CostKind, UseMaskForCond, UseMaskForGaps); return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, - nullptr); + CostKind); } unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, const Instruction *I) { + Type *CondTy, TTI::TargetCostKind CostKind, const Instruction *I) { if (ValTy->isVectorTy()) { std::pair LT = TLI.getTypeLegalizationCost(DL, ValTy); if (Opcode == Instruction::FCmp) return LT.first + FloatFactor * getTypeNumElements(ValTy); } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); } unsigned HexagonTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { @@ -252,12 +263,12 @@ unsigned HexagonTTIImpl::getArithmeticInstrCost( if (LT.second.isFloatingPoint()) return LT.first + FloatFactor * getTypeNumElements(Ty); } - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); } unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy, - Type *SrcTy, const Instruction *I) { + Type *SrcTy, TTI::TargetCostKind CostKind, const Instruction *I) { if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) { unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0; unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0; @@ -298,8 +309,10 @@ unsigned HexagonTTIImpl::getCacheLineSize() const { return ST.getL1CacheLineSize(); } -int HexagonTTIImpl::getUserCost(const User *U, - ArrayRef Operands) { +int +HexagonTTIImpl::getUserCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool { if (!CI->isIntegerCast()) return false; @@ -321,7 +334,7 @@ int HexagonTTIImpl::getUserCost(const User *U, if (const CastInst *CI = dyn_cast(U)) if (isCastFoldedIntoLoad(CI)) return TargetTransformInfo::TCC_Free; - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); } bool HexagonTTIImpl::shouldBuildLookupTables() const { diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 409e1dfdb6b3d..b2191910a238f 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -101,37 +101,48 @@ class HexagonTTIImpl : public BasicTTIImplBase { return true; } - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); - unsigned getOperandsScalarizationOverhead(ArrayRef Args, - unsigned VF); - unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys); + unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, + bool Insert, bool Extract); + unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF); + unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, + TTI::TargetCostKind CostKind); unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I); - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - const Instruction *I = nullptr); + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I); + unsigned getIntrinsicInstrCost( + Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S); unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency); unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment, + TTI::TargetCostKind CostKind, const Instruction *I); unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, - unsigned AddressSpace, bool UseMaskForCond = false, - bool UseMaskForGaps = false); + unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, + bool UseMaskForCond = false, bool UseMaskForGaps = false); unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - const Instruction *I); + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -139,16 +150,18 @@ class HexagonTTIImpl : public BasicTTIImplBase { ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - unsigned getCFInstrCost(unsigned Opcode) { + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) { return 1; } /// @} - int getUserCost(const User *U, ArrayRef Operands); + int getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind); // Hexagon specific decision to generate a lookup table. bool shouldBuildLookupTables() const; diff --git a/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h b/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h index a22d3a34f98c9..7366d5059c9ff 100644 --- a/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h +++ b/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h @@ -49,7 +49,7 @@ class LanaiTTIImpl : public BasicTTIImplBase { return TTI::PSK_Software; } - int getIntImmCost(const APInt &Imm, Type *Ty) { + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); if (Imm == 0) return TTI::TCC_Free; @@ -66,17 +66,19 @@ class LanaiTTIImpl : public BasicTTIImplBase { return 4 * TTI::TCC_Basic; } - int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) { - return getIntImmCost(Imm, Ty); + int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { + return getIntImmCost(Imm, Ty, CostKind); } int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) { - return getIntImmCost(Imm, Ty); + Type *Ty, TTI::TargetCostKind CostKind) { + return getIntImmCost(Imm, Ty, CostKind); } unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -87,7 +89,8 @@ class LanaiTTIImpl : public BasicTTIImplBase { switch (ISD) { default: - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); case ISD::MUL: case ISD::SDIV: @@ -98,7 +101,8 @@ class LanaiTTIImpl : public BasicTTIImplBase { // instruction cost was arbitrarily chosen to reduce the desirability // of emitting arithmetic instructions that are emulated in software. // TODO: Investigate the performance impact given specialized lowerings. - return 64 * BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return 64 * BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); } } diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp index a07ec4eb2e2db..130211878be17 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -160,18 +160,6 @@ reverseBranchCondition(SmallVectorImpl &Cond) const { return false; } -bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { - if (!MI.isTerminator()) - return false; - - // Conditional branch is a special case. - if (MI.isBranch() && !MI.isBarrier()) - return true; - if (!MI.isPredicable()) - return true; - return !isPredicated(MI); -} - bool MSP430InstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/llvm/lib/Target/MSP430/MSP430InstrInfo.h index 1f532a3d24020..710913b2d36f6 100644 --- a/llvm/lib/Target/MSP430/MSP430InstrInfo.h +++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.h @@ -56,7 +56,6 @@ class MSP430InstrInfo : public MSP430GenInstrInfo { // Branch folding goodness bool reverseBranchCondition(SmallVectorImpl &Cond) const override; - bool isUnpredicatedTerminator(const MachineInstr &MI) const override; bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 1fbbed770c5ca..9669fb5964e38 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1361,19 +1361,19 @@ Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, } unsigned Alignment = 0; - const Value *DirectCallee = CB->getCalledFunction(); + const Function *DirectCallee = CB->getCalledFunction(); if (!DirectCallee) { // We don't have a direct function symbol, but that may be because of // constant cast instructions in the call. // With bitcast'd call targets, the instruction will be the call - if (isa(CB)) { + if (const auto *CI = dyn_cast(CB)) { // Check if we have call alignment metadata - if (getAlign(*cast(CB), Idx, Alignment)) + if (getAlign(*CI, Idx, Alignment)) return Align(Alignment); - const Value *CalleeV = cast(CB)->getCalledValue(); + const Value *CalleeV = CI->getCalledOperand(); // Ignore any bitcast instructions while (isa(CalleeV)) { const ConstantExpr *CE = cast(CalleeV); @@ -1385,15 +1385,15 @@ Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, // We have now looked past all of the bitcasts. Do we finally have a // Function? - if (isa(CalleeV)) - DirectCallee = CalleeV; + if (const auto *CalleeF = dyn_cast(CalleeV)) + DirectCallee = CalleeF; } } // Check for function alignment information if we found that the // ultimate target is a Function if (DirectCallee) - if (getAlign(*cast(DirectCallee), Idx, Alignment)) + if (getAlign(*DirectCallee, Idx, Alignment)) return Align(Alignment); // Call is indirect or alignment information is not available, fall back to diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index afc40a7abed08..5c14d0f1a24d5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -112,7 +112,8 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { } int NVPTXTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { @@ -123,7 +124,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost( switch (ISD) { default: - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); case ISD::ADD: case ISD::MUL: @@ -136,7 +138,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost( if (LT.second.SimpleTy == MVT::i64) return 2 * LT.first; // Delegate other cases to the basic TTI. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); } } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 864d8b91a89a5..e28a8220c33e7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -87,6 +87,7 @@ class NVPTXTTIImpl : public BasicTTIImplBase { int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 97dd969b8fe49..13fd7d05ab9f4 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1731,10 +1731,10 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { if (parseToken(AsmToken::EndOfStatement)) return addErrorSuffix(" in '.machine' directive"); - PPCTargetStreamer &TStreamer = - *static_cast( - getParser().getStreamer().getTargetStreamer()); - TStreamer.emitMachine(CPU); + PPCTargetStreamer *TStreamer = static_cast( + getParser().getStreamer().getTargetStreamer()); + if (TStreamer != nullptr) + TStreamer->emitMachine(CPU); return false; } @@ -1773,10 +1773,10 @@ bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) { parseToken(AsmToken::EndOfStatement)) return addErrorSuffix(" in '.abiversion' directive"); - PPCTargetStreamer &TStreamer = - *static_cast( - getParser().getStreamer().getTargetStreamer()); - TStreamer.emitAbiVersion(AbiVersion); + PPCTargetStreamer *TStreamer = static_cast( + getParser().getStreamer().getTargetStreamer()); + if (TStreamer != nullptr) + TStreamer->emitAbiVersion(AbiVersion); return false; } @@ -1796,10 +1796,10 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) { parseToken(AsmToken::EndOfStatement)) return addErrorSuffix(" in '.localentry' directive"); - PPCTargetStreamer &TStreamer = - *static_cast( - getParser().getStreamer().getTargetStreamer()); - TStreamer.emitLocalEntry(Sym, Expr); + PPCTargetStreamer *TStreamer = static_cast( + getParser().getStreamer().getTargetStreamer()); + if (TStreamer != nullptr) + TStreamer->emitLocalEntry(Sym, Expr); return false; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 8faf1bad7375e..39c5c0fa97462 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -58,8 +58,4 @@ void PPCXCOFFMCAsmInfo::anchor() {} PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) { assert(!IsLittleEndian && "Little-endian XCOFF not supported."); CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4; - ZeroDirective = "\t.space\t"; - ZeroDirectiveSupportsNonZeroValue = false; - SymbolsHaveSMC = true; - UseIntegratedAssembler = false; } diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index d4fd0fb77a055..2651b683bd0d2 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1396,8 +1396,8 @@ void PPCLinuxAsmPrinter::emitEndOfAsmFile(Module &M) { bool isPPC64 = DL.getPointerSizeInBits() == 64; - PPCTargetStreamer &TS = - static_cast(*OutStreamer->getTargetStreamer()); + PPCTargetStreamer *TS = + static_cast(OutStreamer->getTargetStreamer()); if (!TOC.empty()) { const char *Name = isPPC64 ? ".toc" : ".got2"; @@ -1412,8 +1412,8 @@ void PPCLinuxAsmPrinter::emitEndOfAsmFile(Module &M) { MCSymbol *const TOCEntryLabel = TOCMapPair.second; OutStreamer->emitLabel(TOCEntryLabel); - if (isPPC64) - TS.emitTCEntry(*TOCEntryTarget); + if (isPPC64 && TS != nullptr) + TS->emitTCEntry(*TOCEntryTarget); else OutStreamer->emitSymbolValue(TOCEntryTarget, 4); } @@ -1537,13 +1537,14 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() { // 3) A function does not use the TOC pointer R2 but does have calls. // In this case st_other=1 since we do not know whether or not any // of the callees clobber R2. This case is dealt with in this else if - // block. + // block. Tail calls are considered calls and the st_other should also + // be set to 1 in that case as well. // 4) The function does not use the TOC pointer but R2 is used inside // the function. In this case st_other=1 once again. // 5) This function uses inline asm. We mark R2 as reserved if the function - // has inline asm so we have to assume that it may be used. - if (MF->getFrameInfo().hasCalls() || MF->hasInlineAsm() || - (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) { + // has inline asm as we have to assume that it may be used. + if (MF->getFrameInfo().hasCalls() || MF->getFrameInfo().hasTailCall() || + MF->hasInlineAsm() || (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) { PPCTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); if (TS) @@ -1615,9 +1616,10 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { GVSym->setStorageClass( TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV)); - // External global variables are already handled. - if (GV->isDeclaration()) + if (GV->isDeclarationForLinker()) { + emitLinkage(GV, GVSym); return; + } SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM); if (!GVKind.isGlobalWriteableData() && !GVKind.isReadOnly()) @@ -1686,8 +1688,8 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { // Switch to section to emit TOC base. OutStreamer->SwitchSection(getObjFileLowering().getTOCBaseSection()); - PPCTargetStreamer &TS = - static_cast(*OutStreamer->getTargetStreamer()); + PPCTargetStreamer *TS = + static_cast(OutStreamer->getTargetStreamer()); const unsigned EntryByteSize = Subtarget->isPPC64() ? 8 : 4; const unsigned TOCEntriesByteSize = TOC.size() * EntryByteSize; @@ -1706,7 +1708,8 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { OutStreamer->SwitchSection(TCEntry); OutStreamer->emitLabel(I.second); - TS.emitTCEntry(*I.first); + if (TS != nullptr) + TS->emitTCEntry(*I.first); } } diff --git a/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp index e8ef451c7ec96..4c74e82cf0412 100644 --- a/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp +++ b/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp @@ -381,21 +381,10 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL, MBB->end()); NewSuccessor->transferSuccessorsAndUpdatePHIs(MBB); - // Copy the original liveIns of MBB to NewSuccessor. - for (auto &LI : MBB->liveins()) - NewSuccessor->addLiveIn(LI); - - // After splitting the NewSuccessor block, Regs defined but not killed - // in MBB should be treated as liveins of NewSuccessor. - // Note: Cannot use stepBackward instead since we are using the Reg - // liveness state at the end of MBB (liveOut of MBB) as the liveIn for - // NewSuccessor. Otherwise, will cause cyclic dependence. - LivePhysRegs LPR(*MF->getSubtarget().getRegisterInfo()); - SmallVector, 2> Clobbers; - for (MachineInstr &MI : *MBB) - LPR.stepForward(MI, Clobbers); - for (auto &LI : LPR) - NewSuccessor->addLiveIn(LI); + // Update the liveins for NewSuccessor. + LivePhysRegs LPR; + computeAndAddLiveIns(LPR, *NewSuccessor); + } else { // Remove successor from MBB. MBB->removeSuccessor(Successor); @@ -441,44 +430,26 @@ void PPCExpandISEL::populateBlocks(BlockISELList &BIL) { // condition is true MachineOperand &FalseValue = MI->getOperand(2); // Value to store if // condition is false - MachineOperand &ConditionRegister = MI->getOperand(3); // Condition LLVM_DEBUG(dbgs() << "Dest: " << Dest << "\n"); LLVM_DEBUG(dbgs() << "TrueValue: " << TrueValue << "\n"); LLVM_DEBUG(dbgs() << "FalseValue: " << FalseValue << "\n"); - LLVM_DEBUG(dbgs() << "ConditionRegister: " << ConditionRegister << "\n"); + LLVM_DEBUG(dbgs() << "ConditionRegister: " << MI->getOperand(3) << "\n"); // If the Dest Register and True Value Register are not the same one, we // need the True Block. bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue); bool IsORIInstRequired = !useSameRegister(Dest, FalseValue); - if (IsADDIInstRequired) { - // Copy the result into the destination if the condition is true. + // Copy the result into the destination if the condition is true. + if (IsADDIInstRequired) BuildMI(*TrueBlock, TrueBlockI, dl, TII->get(isISEL8(*MI) ? PPC::ADDI8 : PPC::ADDI)) .add(Dest) .add(TrueValue) .add(MachineOperand::CreateImm(0)); - // Add the LiveIn registers required by true block. - TrueBlock->addLiveIn(TrueValue.getReg()); - } - - if (IsORIInstRequired) { - // Add the LiveIn registers required by false block. - FalseBlock->addLiveIn(FalseValue.getReg()); - } - - if (NewSuccessor) { - // Add the LiveIn registers required by NewSuccessor block. - NewSuccessor->addLiveIn(Dest.getReg()); - NewSuccessor->addLiveIn(TrueValue.getReg()); - NewSuccessor->addLiveIn(FalseValue.getReg()); - NewSuccessor->addLiveIn(ConditionRegister.getReg()); - } - - // Copy the value into the destination if the condition is false. + // Copy the result into the destination if the condition is false. if (IsORIInstRequired) BuildMI(*FalseBlock, FalseBlockI, dl, TII->get(isISEL8(*MI) ? PPC::ORI8 : PPC::ORI)) @@ -490,6 +461,18 @@ void PPCExpandISEL::populateBlocks(BlockISELList &BIL) { NumExpanded++; } + + if (IsTrueBlockRequired) { + // Update the liveins for TrueBlock. + LivePhysRegs LPR; + computeAndAddLiveIns(LPR, *TrueBlock); + } + + if (IsFalseBlockRequired) { + // Update the liveins for FalseBlock. + LivePhysRegs LPR; + computeAndAddLiveIns(LPR, *FalseBlock); + } } void PPCExpandISEL::expandMergeableISELs(BlockISELList &BIL) { diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 93695a854c246..ad1f16b4820af 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1674,13 +1674,25 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { DebugLoc dl = MBBI->getDebugLoc(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); - // Create branch instruction for pseudo tail call return instruction + // Create branch instruction for pseudo tail call return instruction. + // The TCRETURNdi variants are direct calls. Valid targets for those are + // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel + // since we can tail call external functions with PC-Rel (i.e. we don't need + // to worry about different TOC pointers). Some of the external functions will + // be MO_GlobalAddress while others like memcpy for example, are going to + // be MO_ExternalSymbol. unsigned RetOpcode = MBBI->getOpcode(); if (RetOpcode == PPC::TCRETURNdi) { MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + if (JumpTarget.isGlobal()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + else if (JumpTarget.isSymbol()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). + addExternalSymbol(JumpTarget.getSymbolName()); + else + llvm_unreachable("Expecting Global or External Symbol"); } else if (RetOpcode == PPC::TCRETURNri) { MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); @@ -1692,8 +1704,14 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { } else if (RetOpcode == PPC::TCRETURNdi8) { MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + if (JumpTarget.isGlobal()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + else if (JumpTarget.isSymbol()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). + addExternalSymbol(JumpTarget.getSymbolName()); + else + llvm_unreachable("Expecting Global or External Symbol"); } else if (RetOpcode == PPC::TCRETURNri8) { MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7df040776cfc2..ac2483cd1abd2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4780,16 +4780,6 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( const SmallVectorImpl &Ins, SelectionDAG &DAG) const { bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; - // FIXME: Tail calls are currently disabled when using PC Relative addressing. - // The issue is that PC Relative is only partially implemented and so there - // is currently a mix of functions that require the TOC and functions that do - // not require it. If we have A calls B calls C and both A and B require the - // TOC and C does not and is marked as clobbering R2 then it is not safe for - // B to tail call C. Since we do not have the information of whether or not - // a funciton needs to use the TOC here in this function we need to be - // conservatively safe and disable all tail calls for now. - if (Subtarget.isUsingPCRelativeCalls()) return false; - if (DisableSCO && !TailCallOpt) return false; // Variadic argument functions are not supported. @@ -4829,15 +4819,22 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( needStackSlotPassParameters(Subtarget, Outs)) return false; - // No TCO/SCO on indirect call because Caller have to restore its TOC - if (!isFunctionGlobalAddress(Callee) && - !isa(Callee)) + // All variants of 64-bit ELF ABIs without PC-Relative addressing require that + // the caller and callee share the same TOC for TCO/SCO. If the caller and + // callee potentially have different TOC bases then we cannot tail call since + // we need to restore the TOC pointer after the call. + // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 + // We cannot guarantee this for indirect calls or calls to external functions. + // When PC-Relative addressing is used, the concept of the TOC is no longer + // applicable so this check is not required. + // Check first for indirect calls. + if (!Subtarget.isUsingPCRelativeCalls() && + !isFunctionGlobalAddress(Callee) && !isa(Callee)) return false; - // If the caller and callee potentially have different TOC bases then we - // cannot tail call since we need to restore the TOC pointer after the call. - // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 - if (!callsShareTOCBase(&Caller, Callee, getTargetMachine())) + // Check if we share the TOC base. + if (!Subtarget.isUsingPCRelativeCalls() && + !callsShareTOCBase(&Caller, Callee, getTargetMachine())) return false; // TCO allows altering callee ABI, so we don't have to check further. @@ -4849,11 +4846,14 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( // If callee use the same argument list that caller is using, then we can // apply SCO on this case. If it is not, then we need to check if callee needs // stack for passing arguments. - assert(CB && "Expected to have a CallBase!"); - if (!hasSameArgumentList(&Caller, *CB) && - needStackSlotPassParameters(Subtarget, Outs)) { + // PC Relative tail calls may not have a CallBase. + // If there is no CallBase we cannot verify if we have the same argument + // list so assume that we don't have the same argument list. + if (CB && !hasSameArgumentList(&Caller, *CB) && + needStackSlotPassParameters(Subtarget, Outs)) + return false; + else if (!CB && needStackSlotPassParameters(Subtarget, Outs)) return false; - } return true; } @@ -5368,7 +5368,7 @@ static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, MachineMemOperand::MOInvariant) : MachineMemOperand::MONone; - MachinePointerInfo MPI(CB ? CB->getCalledValue() : nullptr); + MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr); // Registers used in building the DAG. const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister(); @@ -5534,13 +5534,18 @@ SDValue PPCTargetLowering::FinishCall( // Emit tail call. if (CFlags.IsTailCall) { + // Indirect tail call when using PC Relative calls do not have the same + // constraints. assert(((Callee.getOpcode() == ISD::Register && cast(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || - isa(Callee)) && - "Expecting a global address, external symbol, absolute value or " - "register"); + isa(Callee) || + (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && + "Expecting a global address, external symbol, absolute value, " + "register or an indirect tail call when PC Relative calls are " + "used."); + // PC Relative calls also use TC_RETURN as the way to mark tail calls. assert(CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."); DAG.getMachineFunction().getFrameInfo().setHasTailCall(); @@ -5598,17 +5603,19 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (!getTargetMachine().Options.GuaranteedTailCallOpt) ++NumSiblingCalls; - assert(isa(Callee) && + // PC Relative calls no longer guarantee that the callee is a Global + // Address Node. The callee could be an indirect tail call in which + // case the SDValue for the callee could be a load (to load the address + // of a function pointer) or it may be a register copy (to move the + // address of the callee from a function parameter into a virtual + // register). It may also be an ExternalSymbolSDNode (ex memcopy). + assert((Subtarget.isUsingPCRelativeCalls() || + isa(Callee)) && "Callee should be an llvm::Function object."); - LLVM_DEBUG( - const GlobalValue *GV = - cast(Callee)->getGlobal(); - const unsigned Width = - 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0"); - dbgs() << "TCO caller: " - << left_justify(DAG.getMachineFunction().getName(), Width) - << ", callee linkage: " << GV->getVisibility() << ", " - << GV->getLinkage() << "\n"); + + LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName() + << "\nTCO callee: "); + LLVM_DEBUG(Callee.dump()); } } @@ -7225,20 +7232,24 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( unsigned Offset = 0; HandleRegLoc(VA.getLocReg(), Offset); Offset += PtrByteSize; - for (; Offset != StackSize; Offset += PtrByteSize) { - assert(I != End && - "Expecting enough RegLocs to copy entire ByVal arg."); - - if (!ArgLocs[I].isRegLoc()) - report_fatal_error("Passing ByVals split between registers and stack " - "not yet implemented."); - + for (; Offset != StackSize && ArgLocs[I].isRegLoc(); + Offset += PtrByteSize) { assert(ArgLocs[I].getValNo() == VA.getValNo() && - "Expecting more RegLocs for ByVal argument."); + "RegLocs should be for ByVal argument."); const CCValAssign RL = ArgLocs[I++]; HandleRegLoc(RL.getLocReg(), Offset); } + + if (Offset != StackSize) { + assert(ArgLocs[I].getValNo() == VA.getValNo() && + "Expected MemLoc for remaining bytes."); + assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes."); + // Consume the MemLoc.The InVal has already been emitted, so nothing + // more needs to be done. + ++I; + } + continue; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 709b21384538e..1ab3891c568f1 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -225,6 +225,26 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +/// This is an architecture-specific helper function of reassociateOps. +/// Set special operand attributes for new instructions after reassociation. +void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, + MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const { + // Propagate FP flags from the original instructions. + // But clear poison-generating flags because those may not be valid now. + uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags(); + NewMI1.setFlags(IntersectedFlags); + NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap); + NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap); + NewMI1.clearFlag(MachineInstr::MIFlag::IsExact); + + NewMI2.setFlags(IntersectedFlags); + NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap); + NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap); + NewMI2.clearFlag(MachineInstr::MIFlag::IsExact); +} + // This function does not list all associative and commutative operations, but // only those worth feeding through the machine combiner in an attempt to // reduce the critical path. Mostly, this means floating-point operations, @@ -258,7 +278,8 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { case PPC::QVFMUL: case PPC::QVFMULS: case PPC::QVFMULSs: - return true; + return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && + Inst.getFlag(MachineInstr::MIFlag::FmNsz); default: return false; } @@ -272,10 +293,6 @@ bool PPCInstrInfo::getMachineCombinerPatterns( if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) return false; - // FP reassociation is only legal when we don't need strict IEEE semantics. - if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath) - return false; - return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); } @@ -1435,17 +1452,6 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const { return false; } -bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { - if (!MI.isTerminator()) - return false; - - // Conditional branch is a special case. - if (MI.isBranch() && !MI.isBarrier()) - return true; - - return !isPredicated(MI); -} - bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI, ArrayRef Pred) const { unsigned OpC = MI.getOpcode(); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 5459fa3ee216a..02f5a78ca3bbd 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -248,6 +248,10 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; + void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const override; + bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override; @@ -360,8 +364,6 @@ class PPCInstrInfo : public PPCGenInstrInfo { // Predication support. bool isPredicated(const MachineInstr &MI) const override; - bool isUnpredicatedTerminator(const MachineInstr &MI) const override; - bool PredicateInstruction(MachineInstr &MI, ArrayRef Pred) const override; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 5dd73a2e8aeed..7621913780d1a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -25,6 +25,32 @@ // ** in PPCVSXSwapRemoval::gatherVectorInstructions(). ** // **************************************************************************** +// *********************************** NOTE *********************************** +// ** When adding new anonymous patterns to this file, please add them to ** +// ** the section titled Anonymous Patterns. Chances are that the existing ** +// ** predicate blocks already contain a combination of features that you ** +// ** are after. There is a list of blocks at the top of the section. If ** +// ** you definitely need a new combination of predicates, please add that ** +// ** combination to the list. ** +// ** File Structure: ** +// ** - Custom PPCISD node definitions ** +// ** - Predicate definitions: predicates to specify the subtargets for ** +// ** which an instruction or pattern can be emitted. ** +// ** - Instruction formats: classes instantiated by the instructions. ** +// ** These generally correspond to instruction formats in section 1.6 of ** +// ** the ISA document. ** +// ** - Instruction definitions: the actual definitions of the instructions ** +// ** often including input patterns that they match. ** +// ** - Helper DAG definitions: We define a number of dag objects to use as ** +// ** input or output patterns for consciseness of the code. ** +// ** - Anonymous patterns: input patterns that an instruction matches can ** +// ** often not be specified as part of the instruction definition, so an ** +// ** anonymous pattern must be specified mapping an input pattern to an ** +// ** output pattern. These are generally guarded by subtarget predicates. ** +// ** - Instruction aliases: used to define extended mnemonics for assembly ** +// ** printing (for example: xxswapd for xxpermdi with 0x2 as the imm). ** +// **************************************************************************** + def PPCRegVSRCAsmOperand : AsmOperandClass { let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber"; } @@ -89,6 +115,7 @@ def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [ SDTCisVec<0>, SDTCisPtrTy<1> ]>; +//--------------------------- Custom PPC nodes -------------------------------// def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, @@ -112,6 +139,21 @@ def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +//-------------------------- Predicate definitions ---------------------------// +def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; +def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">; +def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">; +def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">; +def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">; +def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; +def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; +def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">; +def NoP9Altivec : Predicate<"!PPCSubTarget->hasP9Altivec()">; + +//--------------------- VSX-specific instruction formats ---------------------// +// By default, all VSX instructions are to be selected over their Altivec +// counter parts and they do not have unmodeled sideeffects. +let AddedComplexity = 400, hasSideEffects = 0 in { multiclass XX3Form_Rcr opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, ValueType OutTy, ValueType InTy> { @@ -144,14 +186,114 @@ class XX3Form_2s opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, let XB = XA; } -def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; -def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">; -def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">; -def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">; +let Predicates = [HasVSX, HasP9Vector] in { +class X_VT5_XO5_VB5 opcode, bits<5> xo2, bits<10> xo, string opc, + list pattern> + : X_RD5_XO5_RS5; + +// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] +class X_VT5_XO5_VB5_Ro opcode, bits<5> xo2, bits<10> xo, string opc, + list pattern> + : X_VT5_XO5_VB5, isRecordForm; + +// [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less), +// So we use different operand class for VRB +class X_VT5_XO5_VB5_TyVB opcode, bits<5> xo2, bits<10> xo, string opc, + RegisterOperand vbtype, list pattern> + : X_RD5_XO5_RS5; + +// [PO VRT XO VRB XO /] +class X_VT5_XO5_VB5_VSFR opcode, bits<5> xo2, bits<10> xo, string opc, + list pattern> + : X_RD5_XO5_RS5; + +// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] +class X_VT5_XO5_VB5_VSFR_Ro opcode, bits<5> xo2, bits<10> xo, string opc, + list pattern> + : X_VT5_XO5_VB5_VSFR, isRecordForm; + +// [PO T XO B XO BX /] +class XX2_RT5_XO5_XB6 opcode, bits<5> xo2, bits<9> xo, string opc, + list pattern> + : XX2_RD5_XO5_RS6; -let Predicates = [HasVSX] in { -let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. -let hasSideEffects = 0 in { // VSX instructions don't have side effects. +// [PO T XO B XO BX TX] +class XX2_XT6_XO5_XB6 opcode, bits<5> xo2, bits<9> xo, string opc, + RegisterOperand vtype, list pattern> + : XX2_RD6_XO5_RS6; + +// [PO T A B XO AX BX TX], src and dest register use different operand class +class XX3_XT5_XA5_XB5 opcode, bits<8> xo, string opc, + RegisterOperand xty, RegisterOperand aty, RegisterOperand bty, + InstrItinClass itin, list pattern> + : XX3Form; + +// [PO VRT VRA VRB XO /] +class X_VT5_VA5_VB5 opcode, bits<10> xo, string opc, + list pattern> + : XForm_1; + +// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] +class X_VT5_VA5_VB5_Ro opcode, bits<10> xo, string opc, + list pattern> + : X_VT5_VA5_VB5, isRecordForm; + +// [PO VRT VRA VRB XO /] +class X_VT5_VA5_VB5_FMA opcode, bits<10> xo, string opc, + list pattern> + : XForm_1, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">; + +// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] +class X_VT5_VA5_VB5_FMA_Ro opcode, bits<10> xo, string opc, + list pattern> + : X_VT5_VA5_VB5_FMA, isRecordForm; + +class Z23_VT5_R1_VB5_RMC2_EX1 opcode, bits<8> xo, bit ex, string opc, + list pattern> + : Z23Form_8 { + let RC = ex; +} + +// [PO BF // VRA VRB XO /] +class X_BF3_VA5_VB5 opcode, bits<10> xo, string opc, + list pattern> + : XForm_17 { + let Pattern = pattern; +} + +// [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different +// "out" and "in" dag +class X_XT6_RA5_RB5 opcode, bits<10> xo, string opc, + RegisterOperand vtype, list pattern> + : XX1Form_memOp; + +// [PO S RA RB XO SX] +class X_XS6_RA5_RB5 opcode, bits<10> xo, string opc, + RegisterOperand vtype, list pattern> + : XX1Form_memOp; +} // Predicates = HasP9Vector +} // AddedComplexity = 400, hasSideEffects = 0 + +//-------------------------- Instruction definitions -------------------------// +// VSX instructions require the VSX feature, they are to be selected over +// equivalent Altivec patterns (as they address a larger register set) and +// they do not have unmodeled side effects. +let Predicates = [HasVSX], AddedComplexity = 400 in { +let hasSideEffects = 0 in { // Load indexed instructions let mayLoad = 1, mayStore = 0 in { @@ -835,7 +977,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects. [(set vsrc:$XT, (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>; } // isCommutable -} // Uses = [RM], mayRaiseFPException + } // Uses = [RM], mayRaiseFPException // Logical Instructions let isCommutable = 1 in @@ -924,1760 +1066,279 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects. (outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$UIM), "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; -} // hasSideEffects +// The following VSX instructions were introduced in Power ISA 2.07 +let Predicates = [HasVSX, HasP8Vector] in { + let isCommutable = 1 in { + def XXLEQV : XX3Form<60, 186, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxleqv $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>; + def XXLNAND : XX3Form<60, 178, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlnand $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, + v4i32:$XB)))]>; + } // isCommutable -// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after -// instruction selection into a branch sequence. -let PPC970_Single = 1 in { + let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, + isReMaterializable = 1 in { + def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins), + "xxleqv $XT, $XT, $XT", IIC_VecGeneral, + [(set v4i32:$XT, (bitconvert (v16i8 immAllOnesV)))]>; + } - def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst), - (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC), - "#SELECT_CC_VSRC", - []>; - def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst), - (ins crbitrc:$cond, vsrc:$T, vsrc:$F), - "#SELECT_VSRC", - [(set v2f64:$dst, - (select i1:$cond, v2f64:$T, v2f64:$F))]>; - def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst), - (ins crrc:$cond, f8rc:$T, f8rc:$F, - i32imm:$BROPC), "#SELECT_CC_VSFRC", - []>; - def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst), - (ins crbitrc:$cond, f8rc:$T, f8rc:$F), - "#SELECT_VSFRC", - [(set f64:$dst, - (select i1:$cond, f64:$T, f64:$F))]>; - def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst), - (ins crrc:$cond, f4rc:$T, f4rc:$F, - i32imm:$BROPC), "#SELECT_CC_VSSRC", - []>; - def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst), - (ins crbitrc:$cond, f4rc:$T, f4rc:$F), - "#SELECT_VSSRC", - [(set f32:$dst, - (select i1:$cond, f32:$T, f32:$F))]>; -} -} // AddedComplexity + def XXLORC : XX3Form<60, 170, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlorc $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; -def : InstAlias<"xvmovdp $XT, $XB", - (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; -def : InstAlias<"xvmovsp $XT, $XB", - (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; + // VSX scalar loads introduced in ISA 2.07 + let mayLoad = 1, mayStore = 0 in { + let CodeSize = 3 in + def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src), + "lxsspx $XT, $src", IIC_LdStLFD, []>; + def LXSIWAX : XX1Form_memOp<31, 76, (outs vsfrc:$XT), (ins memrr:$src), + "lxsiwax $XT, $src", IIC_LdStLFD, []>; + def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src), + "lxsiwzx $XT, $src", IIC_LdStLFD, []>; -def : InstAlias<"xxspltd $XT, $XB, 0", - (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>; -def : InstAlias<"xxspltd $XT, $XB, 1", - (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>; -def : InstAlias<"xxmrghd $XT, $XA, $XB", - (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>; -def : InstAlias<"xxmrgld $XT, $XA, $XB", - (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>; -def : InstAlias<"xxswapd $XT, $XB", - (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>; -def : InstAlias<"xxspltd $XT, $XB, 0", - (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>; -def : InstAlias<"xxspltd $XT, $XB, 1", - (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>; -def : InstAlias<"xxswapd $XT, $XB", - (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>; + // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later + let CodeSize = 3 in + def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src), + "#XFLOADf32", + [(set f32:$XT, (load xoaddr:$src))]>; + // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later + def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), + "#LIWAX", + [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; + // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later + def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), + "#LIWZX", + [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + } // mayLoad -let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. + // VSX scalar stores introduced in ISA 2.07 + let mayStore = 1, mayLoad = 0 in { + let CodeSize = 3 in + def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), + "stxsspx $XT, $dst", IIC_LdStSTFD, []>; + def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), + "stxsiwx $XT, $dst", IIC_LdStSTFD, []>; -def : Pat<(v4i32 (vnot_ppc v4i32:$A)), - (v4i32 (XXLNOR $A, $A))>; -def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A), - (and v4i32:$B, v4i32:$C))), - (v4i32 (XXSEL $A, $B, $C))>; + // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later + let CodeSize = 3 in + def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst), + "#XFSTOREf32", + [(store f32:$XT, xoaddr:$dst)]>; + // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later + def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), + "#STIWX", + [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + } // mayStore -let Predicates = [IsBigEndian] in { -def : Pat<(v2f64 (scalar_to_vector f64:$A)), - (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>; + // VSX Elementary Scalar FP arithmetic (SP) + let mayRaiseFPException = 1 in { + let isCommutable = 1 in { + def XSADDSP : XX3Form<60, 0, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsaddsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (any_fadd f32:$XA, f32:$XB))]>; + def XSMULSP : XX3Form<60, 16, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsmulsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (any_fmul f32:$XA, f32:$XB))]>; + } // isCommutable -def : Pat<(f64 (extractelt v2f64:$S, 0)), - (f64 (EXTRACT_SUBREG $S, sub_64))>; -def : Pat<(f64 (extractelt v2f64:$S, 1)), - (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; -} + def XSSUBSP : XX3Form<60, 8, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xssubsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (any_fsub f32:$XA, f32:$XB))]>; + def XSDIVSP : XX3Form<60, 24, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsdivsp $XT, $XA, $XB", IIC_FPDivS, + [(set f32:$XT, (any_fdiv f32:$XA, f32:$XB))]>; + } // mayRaiseFPException -let Predicates = [IsLittleEndian] in { -def : Pat<(v2f64 (scalar_to_vector f64:$A)), - (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64), - (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>; + def XSRESP : XX2Form<60, 26, + (outs vssrc:$XT), (ins vssrc:$XB), + "xsresp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfre f32:$XB))]>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in + def XSRSP : XX2Form<60, 281, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xsrsp $XT, $XB", IIC_VecFP, []>; + def XSSQRTSP : XX2Form<60, 11, + (outs vssrc:$XT), (ins vssrc:$XB), + "xssqrtsp $XT, $XB", IIC_FPSqrtS, + [(set f32:$XT, (fsqrt f32:$XB))]>; + def XSRSQRTESP : XX2Form<60, 10, + (outs vssrc:$XT), (ins vssrc:$XB), + "xsrsqrtesp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfrsqrte f32:$XB))]>; -def : Pat<(f64 (extractelt v2f64:$S, 0)), - (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; -def : Pat<(f64 (extractelt v2f64:$S, 1)), - (f64 (EXTRACT_SUBREG $S, sub_64))>; -} + // FMA Instructions + let BaseName = "XSMADDASP" in { + let isCommutable = 1 in + def XSMADDASP : XX3Form<60, 1, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let IsVSXFMAAlt = 1, hasSideEffects = 1 in + def XSMADDMSP : XX3Form<60, 9, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } -// Additional fnmsub patterns: -a*b + c == -(a*b - c) -def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C), - (XSNMSUBADP $C, $A, $B)>; -def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C), - (XSNMSUBADP $C, $A, $B)>; + let BaseName = "XSMSUBASP" in { + let isCommutable = 1 in + def XSMSUBASP : XX3Form<60, 17, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fma f32:$XA, f32:$XB, + (fneg f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let IsVSXFMAAlt = 1, hasSideEffects = 1 in + def XSMSUBMSP : XX3Form<60, 25, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } -def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C), - (XVNMSUBADP $C, $A, $B)>; -def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C), - (XVNMSUBADP $C, $A, $B)>; + let BaseName = "XSNMADDASP" in { + let isCommutable = 1 in + def XSNMADDASP : XX3Form<60, 129, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let IsVSXFMAAlt = 1, hasSideEffects = 1 in + def XSNMADDMSP : XX3Form<60, 137, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } -def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C), - (XVNMSUBASP $C, $A, $B)>; -def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C), - (XVNMSUBASP $C, $A, $B)>; - -def : Pat<(v2f64 (bitconvert v4f32:$A)), - (COPY_TO_REGCLASS $A, VSRC)>; -def : Pat<(v2f64 (bitconvert v4i32:$A)), - (COPY_TO_REGCLASS $A, VSRC)>; -def : Pat<(v2f64 (bitconvert v8i16:$A)), - (COPY_TO_REGCLASS $A, VSRC)>; -def : Pat<(v2f64 (bitconvert v16i8:$A)), - (COPY_TO_REGCLASS $A, VSRC)>; - -def : Pat<(v4f32 (bitconvert v2f64:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; -def : Pat<(v4i32 (bitconvert v2f64:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; -def : Pat<(v8i16 (bitconvert v2f64:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; -def : Pat<(v16i8 (bitconvert v2f64:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; - -def : Pat<(v2i64 (bitconvert v4f32:$A)), - (COPY_TO_REGCLASS $A, VSRC)>; -def : Pat<(v2i64 (bitconvert v4i32:$A)), - (COPY_TO_REGCLASS $A, VSRC)>; -def : Pat<(v2i64 (bitconvert v8i16:$A)), - (COPY_TO_REGCLASS $A, VSRC)>; -def : Pat<(v2i64 (bitconvert v16i8:$A)), - (COPY_TO_REGCLASS $A, VSRC)>; - -def : Pat<(v4f32 (bitconvert v2i64:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; -def : Pat<(v4i32 (bitconvert v2i64:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; -def : Pat<(v8i16 (bitconvert v2i64:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; -def : Pat<(v16i8 (bitconvert v2i64:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; - -def : Pat<(v2f64 (bitconvert v2i64:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; -def : Pat<(v2i64 (bitconvert v2f64:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; - -def : Pat<(v2f64 (bitconvert v1i128:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; -def : Pat<(v1i128 (bitconvert v2f64:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; - -def : Pat<(v2i64 (bitconvert f128:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; -def : Pat<(v4i32 (bitconvert f128:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; -def : Pat<(v8i16 (bitconvert f128:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; -def : Pat<(v16i8 (bitconvert f128:$A)), - (COPY_TO_REGCLASS $A, VRRC)>; - -def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)), - (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>; -def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)), - (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>; - -def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)), - (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>; -def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)), - (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; - -def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>; -def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>; - -// Loads. -let Predicates = [HasVSX, HasOnlySwappingMemOps] in { - def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; - - // Stores. - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), - (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; -} - -// Load vector big endian order -let Predicates = [IsLittleEndian, HasVSX] in { - def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; - def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; - def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; - def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; -} - -let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { - def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; - def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; - def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVW4X $rS, xoaddr:$dst)>; -} - -// Permutes. -def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>; -def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>; -def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; -def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; -def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; - -// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and -// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable. -def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>; - -// Selects. -def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), - (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; -def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)), - (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; -def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)), - (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; -def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)), - (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; -def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)), - (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>; -def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)), - (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; -def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)), - (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; -def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)), - (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; -def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)), - (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; -def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)), - (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>; - -def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), - (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; -def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), - (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; -def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), - (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; -def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), - (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; -def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), - (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>; -def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), - (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; -def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), - (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; -def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), - (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; -def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), - (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; -def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), - (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>; - -// Divides. -def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B), - (XVDIVSP $A, $B)>; -def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), - (XVDIVDP $A, $B)>; - -// Reciprocal estimate -def : Pat<(int_ppc_vsx_xvresp v4f32:$A), - (XVRESP $A)>; -def : Pat<(int_ppc_vsx_xvredp v2f64:$A), - (XVREDP $A)>; - -// Recip. square root estimate -def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A), - (XVRSQRTESP $A)>; -def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A), - (XVRSQRTEDP $A)>; - -// Vector selection -def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)), - (COPY_TO_REGCLASS - (XXSEL (COPY_TO_REGCLASS $vC, VSRC), - (COPY_TO_REGCLASS $vB, VSRC), - (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; -def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)), - (COPY_TO_REGCLASS - (XXSEL (COPY_TO_REGCLASS $vC, VSRC), - (COPY_TO_REGCLASS $vB, VSRC), - (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; -def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC), - (XXSEL $vC, $vB, $vA)>; -def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC), - (XXSEL $vC, $vB, $vA)>; -def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC), - (XXSEL $vC, $vB, $vA)>; -def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC), - (XXSEL $vC, $vB, $vA)>; - -def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)), - (v4f32 (XVMAXSP $src1, $src2))>; -def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)), - (v4f32 (XVMINSP $src1, $src2))>; -def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)), - (v2f64 (XVMAXDP $src1, $src2))>; -def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)), - (v2f64 (XVMINDP $src1, $src2))>; - -let Predicates = [IsLittleEndian] in { -def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), - (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; -def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), - (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; -def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), - (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; -def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), - (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; -} // IsLittleEndian - -let Predicates = [IsBigEndian] in { -def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), - (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; -def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), - (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; -def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), - (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; -def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), - (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; -} // IsBigEndian - -} // AddedComplexity -} // HasVSX - -def FpMinMax { - dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC), - (COPY_TO_REGCLASS $B, VSFRC)), - VSSRC); - dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC), - (COPY_TO_REGCLASS $B, VSFRC)), - VSSRC); -} - -let AddedComplexity = 400, Predicates = [HasVSX] in { - // f32 Min. - def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)), - (f32 FpMinMax.F32Min)>; - def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)), - (f32 FpMinMax.F32Min)>; - def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))), - (f32 FpMinMax.F32Min)>; - def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))), - (f32 FpMinMax.F32Min)>; - // F32 Max. - def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)), - (f32 FpMinMax.F32Max)>; - def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)), - (f32 FpMinMax.F32Max)>; - def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))), - (f32 FpMinMax.F32Max)>; - def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))), - (f32 FpMinMax.F32Max)>; - - // f64 Min. - def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)), - (f64 (XSMINDP $A, $B))>; - def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)), - (f64 (XSMINDP $A, $B))>; - def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))), - (f64 (XSMINDP $A, $B))>; - def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), - (f64 (XSMINDP $A, $B))>; - // f64 Max. - def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)), - (f64 (XSMAXDP $A, $B))>; - def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)), - (f64 (XSMAXDP $A, $B))>; - def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))), - (f64 (XSMAXDP $A, $B))>; - def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), - (f64 (XSMAXDP $A, $B))>; -} - -def ScalarLoads { - dag Li8 = (i32 (extloadi8 xoaddr:$src)); - dag ZELi8 = (i32 (zextloadi8 xoaddr:$src)); - dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src)); - dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8)); - dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8)); - - dag Li16 = (i32 (extloadi16 xoaddr:$src)); - dag ZELi16 = (i32 (zextloadi16 xoaddr:$src)); - dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src)); - dag SELi16 = (i32 (sextloadi16 xoaddr:$src)); - dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src)); - - dag Li32 = (i32 (load xoaddr:$src)); -} - -def DWToSPExtractConv { - dag El0US1 = (f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0)))))); - dag El1US1 = (f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1)))))); - dag El0US2 = (f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0)))))); - dag El1US2 = (f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1)))))); - dag El0SS1 = (f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0)))))); - dag El1SS1 = (f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1)))))); - dag El0SS2 = (f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0)))))); - dag El1SS2 = (f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1)))))); - dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2)); - dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2)); -} - -def WToDPExtractConv { - dag El0S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 0)))); - dag El1S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 1)))); - dag El2S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 2)))); - dag El3S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 3)))); - dag El0U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 0)))); - dag El1U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 1)))); - dag El2U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 2)))); - dag El3U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 3)))); - dag BV02S = (v2f64 (build_vector El0S, El2S)); - dag BV13S = (v2f64 (build_vector El1S, El3S)); - dag BV02U = (v2f64 (build_vector El0U, El2U)); - dag BV13U = (v2f64 (build_vector El1U, El3U)); -} - -// The following VSX instructions were introduced in Power ISA 2.07 -/* FIXME: if the operands are v2i64, these patterns will not match. - we should define new patterns or otherwise match the same patterns - when the elements are larger than i32. -*/ -def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">; -def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; -def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; -let Predicates = [HasP8Vector] in { -let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. - let isCommutable = 1 in { - def XXLEQV : XX3Form<60, 186, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xxleqv $XT, $XA, $XB", IIC_VecGeneral, - [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>; - def XXLNAND : XX3Form<60, 178, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xxlnand $XT, $XA, $XB", IIC_VecGeneral, - [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, - v4i32:$XB)))]>; - } // isCommutable - - def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), - (XXLEQV $A, $B)>; - - let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, - isReMaterializable = 1 in { - def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins), - "xxleqv $XT, $XT, $XT", IIC_VecGeneral, - [(set v4i32:$XT, (bitconvert (v16i8 immAllOnesV)))]>; - } - - def XXLORC : XX3Form<60, 170, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xxlorc $XT, $XA, $XB", IIC_VecGeneral, - [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; - - // VSX scalar loads introduced in ISA 2.07 - let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { - let CodeSize = 3 in - def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src), - "lxsspx $XT, $src", IIC_LdStLFD, []>; - def LXSIWAX : XX1Form_memOp<31, 76, (outs vsfrc:$XT), (ins memrr:$src), - "lxsiwax $XT, $src", IIC_LdStLFD, []>; - def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src), - "lxsiwzx $XT, $src", IIC_LdStLFD, []>; - - // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later - let CodeSize = 3 in - def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src), - "#XFLOADf32", - [(set f32:$XT, (load xoaddr:$src))]>; - // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later - def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), - "#LIWAX", - [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; - // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later - def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), - "#LIWZX", - [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; - } // mayLoad - - // VSX scalar stores introduced in ISA 2.07 - let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { - let CodeSize = 3 in - def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), - "stxsspx $XT, $dst", IIC_LdStSTFD, []>; - def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), - "stxsiwx $XT, $dst", IIC_LdStSTFD, []>; - - // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later - let CodeSize = 3 in - def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst), - "#XFSTOREf32", - [(store f32:$XT, xoaddr:$dst)]>; - // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later - def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), - "#STIWX", - [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; - } // mayStore - - def : Pat<(f64 (extloadf32 xoaddr:$src)), - (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>; - def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))), - (f32 (XFLOADf32 xoaddr:$src))>; - def : Pat<(f64 (fpextend f32:$src)), - (COPY_TO_REGCLASS $src, VSFRC)>; - - def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), - (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; - def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), - (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; - def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), - (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; - def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), - (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; - def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), - (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; - def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), - (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; - def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), - (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; - def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), - (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; - def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), - (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; - def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), - (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; - - // VSX Elementary Scalar FP arithmetic (SP) - let mayRaiseFPException = 1 in { - let isCommutable = 1 in { - def XSADDSP : XX3Form<60, 0, - (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), - "xsaddsp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (any_fadd f32:$XA, f32:$XB))]>; - def XSMULSP : XX3Form<60, 16, - (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), - "xsmulsp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (any_fmul f32:$XA, f32:$XB))]>; - } // isCommutable - def XSSUBSP : XX3Form<60, 8, - (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), - "xssubsp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (any_fsub f32:$XA, f32:$XB))]>; - def XSDIVSP : XX3Form<60, 24, - (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), - "xsdivsp $XT, $XA, $XB", IIC_FPDivS, - [(set f32:$XT, (any_fdiv f32:$XA, f32:$XB))]>; - } // mayRaiseFPException - def XSRESP : XX2Form<60, 26, - (outs vssrc:$XT), (ins vssrc:$XB), - "xsresp $XT, $XB", IIC_VecFP, - [(set f32:$XT, (PPCfre f32:$XB))]>; - def XSRSP : XX2Form<60, 281, - (outs vssrc:$XT), (ins vsfrc:$XB), - "xsrsp $XT, $XB", IIC_VecFP, []>; - def XSSQRTSP : XX2Form<60, 11, - (outs vssrc:$XT), (ins vssrc:$XB), - "xssqrtsp $XT, $XB", IIC_FPSqrtS, - [(set f32:$XT, (fsqrt f32:$XB))]>; - def XSRSQRTESP : XX2Form<60, 10, - (outs vssrc:$XT), (ins vssrc:$XB), - "xsrsqrtesp $XT, $XB", IIC_VecFP, - [(set f32:$XT, (PPCfrsqrte f32:$XB))]>; - - // FMA Instructions - let BaseName = "XSMADDASP" in { - let isCommutable = 1 in - def XSMADDASP : XX3Form<60, 1, - (outs vssrc:$XT), - (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), - "xsmaddasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, - AltVSXFMARel; - let IsVSXFMAAlt = 1 in - def XSMADDMSP : XX3Form<60, 9, - (outs vssrc:$XT), - (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), - "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, - AltVSXFMARel; - } - - let BaseName = "XSMSUBASP" in { - let isCommutable = 1 in - def XSMSUBASP : XX3Form<60, 17, - (outs vssrc:$XT), - (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), - "xsmsubasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fma f32:$XA, f32:$XB, - (fneg f32:$XTi)))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, - AltVSXFMARel; - let IsVSXFMAAlt = 1 in - def XSMSUBMSP : XX3Form<60, 25, - (outs vssrc:$XT), - (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), - "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, - AltVSXFMARel; - } - - let BaseName = "XSNMADDASP" in { - let isCommutable = 1 in - def XSNMADDASP : XX3Form<60, 129, - (outs vssrc:$XT), - (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), - "xsnmaddasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, - f32:$XTi)))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, - AltVSXFMARel; - let IsVSXFMAAlt = 1 in - def XSNMADDMSP : XX3Form<60, 137, - (outs vssrc:$XT), - (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), - "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, - AltVSXFMARel; - } - - let BaseName = "XSNMSUBASP" in { - let isCommutable = 1 in - def XSNMSUBASP : XX3Form<60, 145, - (outs vssrc:$XT), - (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), - "xsnmsubasp $XT, $XA, $XB", IIC_VecFP, - [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, - (fneg f32:$XTi))))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, - AltVSXFMARel; - let IsVSXFMAAlt = 1 in - def XSNMSUBMSP : XX3Form<60, 153, - (outs vssrc:$XT), - (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), - "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, - AltVSXFMARel; - } - - // Additional xsnmsubasp patterns: -a*b + c == -(a*b - c) - def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C), - (XSNMSUBASP $C, $A, $B)>; - def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C), - (XSNMSUBASP $C, $A, $B)>; - - // Single Precision Conversions (FP <-> INT) - def XSCVSXDSP : XX2Form<60, 312, - (outs vssrc:$XT), (ins vsfrc:$XB), - "xscvsxdsp $XT, $XB", IIC_VecFP, - [(set f32:$XT, (PPCfcfids f64:$XB))]>; - def XSCVUXDSP : XX2Form<60, 296, - (outs vssrc:$XT), (ins vsfrc:$XB), - "xscvuxdsp $XT, $XB", IIC_VecFP, - [(set f32:$XT, (PPCfcfidus f64:$XB))]>; - - // Conversions between vector and scalar single precision - def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB), - "xscvdpspn $XT, $XB", IIC_VecFP, []>; - def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), - "xscvspdpn $XT, $XB", IIC_VecFP, []>; - - let Predicates = [IsLittleEndian] in { - def : Pat; - def : Pat; - def : Pat; - def : Pat; - } - - let Predicates = [IsBigEndian] in { - def : Pat; - def : Pat; - def : Pat; - def : Pat; - } - - // Instructions for converting float to i64 feeding a store. - let Predicates = [NoP9Vector] in { - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8), - (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8), - (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>; - } - - // Instructions for converting float to i32 feeding a store. - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4), - (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4), - (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; - - def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)), - (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC), - (COPY_TO_REGCLASS $src2, VRRC)))>; - def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)), - (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC), - (COPY_TO_REGCLASS $src2, VRRC)))>; - def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)), - (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC), - (COPY_TO_REGCLASS $src2, VRRC)))>; - def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)), - (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC), - (COPY_TO_REGCLASS $src2, VRRC)))>; -} // AddedComplexity = 400 -} // HasP8Vector - -let AddedComplexity = 400 in { -let Predicates = [HasDirectMove] in { - // VSX direct move instructions - def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), - "mfvsrd $rA, $XT", IIC_VecGeneral, - [(set i64:$rA, (PPCmfvsr f64:$XT))]>, - Requires<[In64BitMode]>; - let isCodeGenOnly = 1 in - def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT), - "mfvsrd $rA, $XT", IIC_VecGeneral, - []>, - Requires<[In64BitMode]>; - def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), - "mfvsrwz $rA, $XT", IIC_VecGeneral, - [(set i32:$rA, (PPCmfvsr f64:$XT))]>; - let isCodeGenOnly = 1 in - def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT), - "mfvsrwz $rA, $XT", IIC_VecGeneral, - []>; - def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), - "mtvsrd $XT, $rA", IIC_VecGeneral, - [(set f64:$XT, (PPCmtvsra i64:$rA))]>, - Requires<[In64BitMode]>; - let isCodeGenOnly = 1 in - def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA), - "mtvsrd $XT, $rA", IIC_VecGeneral, - []>, - Requires<[In64BitMode]>; - def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), - "mtvsrwa $XT, $rA", IIC_VecGeneral, - [(set f64:$XT, (PPCmtvsra i32:$rA))]>; - let isCodeGenOnly = 1 in - def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA), - "mtvsrwa $XT, $rA", IIC_VecGeneral, - []>; - def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), - "mtvsrwz $XT, $rA", IIC_VecGeneral, - [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; - let isCodeGenOnly = 1 in - def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA), - "mtvsrwz $XT, $rA", IIC_VecGeneral, - []>; -} // HasDirectMove - -let Predicates = [IsISA3_0, HasDirectMove] in { - def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA), - "mtvsrws $XT, $rA", IIC_VecGeneral, []>; - - def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB), - "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral, - []>, Requires<[In64BitMode]>; - - def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT), - "mfvsrld $rA, $XT", IIC_VecGeneral, - []>, Requires<[In64BitMode]>; - -} // IsISA3_0, HasDirectMove -} // AddedComplexity = 400 - -// We want to parse this from asm, but we don't want to emit this as it would -// be emitted with a VSX reg. So leave Emit = 0 here. -def : InstAlias<"mfvrd $rA, $XT", - (MFVRD g8rc:$rA, vrrc:$XT), 0>; -def : InstAlias<"mffprd $rA, $src", - (MFVSRD g8rc:$rA, f8rc:$src)>; -def : InstAlias<"mtvrd $XT, $rA", - (MTVRD vrrc:$XT, g8rc:$rA), 0>; -def : InstAlias<"mtfprd $dst, $rA", - (MTVSRD f8rc:$dst, g8rc:$rA)>; -def : InstAlias<"mfvrwz $rA, $XT", - (MFVRWZ gprc:$rA, vrrc:$XT), 0>; -def : InstAlias<"mffprwz $rA, $src", - (MFVSRWZ gprc:$rA, f8rc:$src)>; -def : InstAlias<"mtvrwa $XT, $rA", - (MTVRWA vrrc:$XT, gprc:$rA), 0>; -def : InstAlias<"mtfprwa $dst, $rA", - (MTVSRWA f8rc:$dst, gprc:$rA)>; -def : InstAlias<"mtvrwz $XT, $rA", - (MTVRWZ vrrc:$XT, gprc:$rA), 0>; -def : InstAlias<"mtfprwz $dst, $rA", - (MTVSRWZ f8rc:$dst, gprc:$rA)>; - -/* Direct moves of various widths from GPR's into VSR's. Each move lines - the value up into element 0 (both BE and LE). Namely, entities smaller than - a doubleword are shifted left and moved for BE. For LE, they're moved, then - swapped to go into the least significant element of the VSR. -*/ -def MovesToVSR { - dag BE_BYTE_0 = - (MTVSRD - (RLDICR - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7)); - dag BE_HALF_0 = - (MTVSRD - (RLDICR - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15)); - dag BE_WORD_0 = - (MTVSRD - (RLDICR - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31)); - dag BE_DWORD_0 = (MTVSRD $A); - - dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32)); - dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - LE_MTVSRW, sub_64)); - dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2); - dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - BE_DWORD_0, sub_64)); - dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2); -} - -/* Patterns for extracting elements out of vectors. Integer elements are - extracted using direct move operations. Patterns for extracting elements - whose indices are not available at compile time are also provided with - various _VARIABLE_ patterns. - The numbering for the DAG's is for LE, but when used on BE, the correct - LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13). -*/ -def VectorExtractions { - // Doubleword extraction - dag LE_DWORD_0 = - (MFVSRD - (EXTRACT_SUBREG - (XXPERMDI (COPY_TO_REGCLASS $S, VSRC), - (COPY_TO_REGCLASS $S, VSRC), 2), sub_64)); - dag LE_DWORD_1 = (MFVSRD - (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); - - // Word extraction - dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64)); - dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64)); - dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); - dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64)); - - // Halfword extraction - dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32)); - dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32)); - dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32)); - dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32)); - dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32)); - dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32)); - dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32)); - dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32)); - - // Byte extraction - dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32)); - dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32)); - dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32)); - dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32)); - dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32)); - dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32)); - dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32)); - dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32)); - dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32)); - dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32)); - dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32)); - dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32)); - dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32)); - dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32)); - dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32)); - dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32)); - - /* Variable element number (BE and LE patterns must be specified separately) - This is a rather involved process. - - Conceptually, this is how the move is accomplished: - 1. Identify which doubleword contains the element - 2. Shift in the VMX register so that the correct doubleword is correctly - lined up for the MFVSRD - 3. Perform the move so that the element (along with some extra stuff) - is in the GPR - 4. Right shift within the GPR so that the element is right-justified - - Of course, the index is an element number which has a different meaning - on LE/BE so the patterns have to be specified separately. - - Note: The final result will be the element right-justified with high - order bits being arbitrarily defined (namely, whatever was in the - vector register to the left of the value originally). - */ - - /* LE variable byte - Number 1. above: - - For elements 0-7, we shift left by 8 bytes since they're on the right - - For elements 8-15, we need not shift (shift left by zero bytes) - This is accomplished by inverting the bits of the index and AND-ing - with 0x8 (i.e. clearing all bits of the index and inverting bit 60). - */ - dag LE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDC8 (LI8 8), $Idx))); - - // Number 2. above: - // - Now that we set up the shift amount, we shift in the VMX register - dag LE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, LE_VBYTE_PERM_VEC)); - - // Number 3. above: - // - The doubleword containing our element is moved to a GPR - dag LE_MV_VBYTE = (MFVSRD - (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)), - sub_64)); - - /* Number 4. above: - - Truncate the element number to the range 0-7 (8-15 are symmetrical - and out of range values are truncated accordingly) - - Multiply by 8 as we need to shift right by the number of bits, not bytes - - Shift right in the GPR by the calculated value - */ - dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60), - sub_32); - dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT), - sub_32); - - /* LE variable halfword - Number 1. above: - - For elements 0-3, we shift left by 8 since they're on the right - - For elements 4-7, we need not shift (shift left by zero bytes) - Similarly to the byte pattern, we invert the bits of the index, but we - AND with 0x4 (i.e. clear all bits of the index and invert bit 61). - Of course, the shift is still by 8 bytes, so we must multiply by 2. - */ - dag LE_VHALF_PERM_VEC = - (v16i8 (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62))); - - // Number 2. above: - // - Now that we set up the shift amount, we shift in the VMX register - dag LE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, LE_VHALF_PERM_VEC)); - - // Number 3. above: - // - The doubleword containing our element is moved to a GPR - dag LE_MV_VHALF = (MFVSRD - (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)), - sub_64)); - - /* Number 4. above: - - Truncate the element number to the range 0-3 (4-7 are symmetrical - and out of range values are truncated accordingly) - - Multiply by 16 as we need to shift right by the number of bits - - Shift right in the GPR by the calculated value - */ - dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59), - sub_32); - dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT), - sub_32); - - /* LE variable word - Number 1. above: - - For elements 0-1, we shift left by 8 since they're on the right - - For elements 2-3, we need not shift - */ - dag LE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8, - (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61))); - - // Number 2. above: - // - Now that we set up the shift amount, we shift in the VMX register - dag LE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VWORD_PERM_VEC)); - - // Number 3. above: - // - The doubleword containing our element is moved to a GPR - dag LE_MV_VWORD = (MFVSRD - (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)), - sub_64)); - - /* Number 4. above: - - Truncate the element number to the range 0-1 (2-3 are symmetrical - and out of range values are truncated accordingly) - - Multiply by 32 as we need to shift right by the number of bits - - Shift right in the GPR by the calculated value - */ - dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58), - sub_32); - dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT), - sub_32); - - /* LE variable doubleword - Number 1. above: - - For element 0, we shift left by 8 since it's on the right - - For element 1, we need not shift - */ - dag LE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8, - (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60))); - - // Number 2. above: - // - Now that we set up the shift amount, we shift in the VMX register - dag LE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VDWORD_PERM_VEC)); - - // Number 3. above: - // - The doubleword containing our element is moved to a GPR - // - Number 4. is not needed for the doubleword as the value is 64-bits - dag LE_VARIABLE_DWORD = - (MFVSRD (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)), - sub_64)); - - /* LE variable float - - Shift the vector to line up the desired element to BE Word 0 - - Convert 32-bit float to a 64-bit single precision float - */ - dag LE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, - (RLDICR (XOR8 (LI8 3), $Idx), 2, 61))); - dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC); - dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE); - - /* LE variable double - Same as the LE doubleword except there is no move. - */ - dag LE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)), - (v16i8 (COPY_TO_REGCLASS $S, VRRC)), - LE_VDWORD_PERM_VEC)); - dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC); - - /* BE variable byte - The algorithm here is the same as the LE variable byte except: - - The shift in the VMX register is by 0/8 for opposite element numbers so - we simply AND the element number with 0x8 - - The order of elements after the move to GPR is reversed, so we invert - the bits of the index prior to truncating to the range 0-7 - */ - dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDI8_rec $Idx, 8))); - dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC)); - dag BE_MV_VBYTE = (MFVSRD - (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), - sub_64)); - dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), - sub_32); - dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), - sub_32); - - /* BE variable halfword - The algorithm here is the same as the LE variable halfword except: - - The shift in the VMX register is by 0/8 for opposite element numbers so - we simply AND the element number with 0x4 and multiply by 2 - - The order of elements after the move to GPR is reversed, so we invert - the bits of the index prior to truncating to the range 0-3 - */ - dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8, - (RLDICR (ANDI8_rec $Idx, 4), 1, 62))); - dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC)); - dag BE_MV_VHALF = (MFVSRD - (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)), - sub_64)); - dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59), - sub_32); - dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT), - sub_32); - - /* BE variable word - The algorithm is the same as the LE variable word except: - - The shift in the VMX register happens for opposite element numbers - - The order of elements after the move to GPR is reversed, so we invert - the bits of the index prior to truncating to the range 0-1 - */ - dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8, - (RLDICR (ANDI8_rec $Idx, 2), 2, 61))); - dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC)); - dag BE_MV_VWORD = (MFVSRD - (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)), - sub_64)); - dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58), - sub_32); - dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT), - sub_32); - - /* BE variable doubleword - Same as the LE doubleword except we shift in the VMX register for opposite - element indices. - */ - dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8, - (RLDICR (ANDI8_rec $Idx, 1), 3, 60))); - dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC)); - dag BE_VARIABLE_DWORD = - (MFVSRD (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)), - sub_64)); - - /* BE variable float - - Shift the vector to line up the desired element to BE Word 0 - - Convert 32-bit float to a 64-bit single precision float - */ - dag BE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR $Idx, 2, 61))); - dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC); - dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE); - - /* BE variable double - Same as the BE doubleword except there is no move. - */ - dag BE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)), - (v16i8 (COPY_TO_REGCLASS $S, VRRC)), - BE_VDWORD_PERM_VEC)); - dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); -} - -def NoP9Altivec : Predicate<"!PPCSubTarget->hasP9Altivec()">; -let AddedComplexity = 400 in { -// v4f32 scalar <-> vector conversions (BE) -let Predicates = [IsBigEndian, HasP8Vector] in { - def : Pat<(v4f32 (scalar_to_vector f32:$A)), - (v4f32 (XSCVDPSPN $A))>; - def : Pat<(f32 (vector_extract v4f32:$S, 0)), - (f32 (XSCVSPDPN $S))>; - def : Pat<(f32 (vector_extract v4f32:$S, 1)), - (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; - def : Pat<(f32 (vector_extract v4f32:$S, 2)), - (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; - def : Pat<(f32 (vector_extract v4f32:$S, 3)), - (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; - def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), - (f32 VectorExtractions.BE_VARIABLE_FLOAT)>; -} // IsBigEndian, HasP8Vector - -// Variable index vector_extract for v2f64 does not require P8Vector -let Predicates = [IsBigEndian, HasVSX] in - def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), - (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>; - -let Predicates = [IsBigEndian, HasDirectMove] in { - // v16i8 scalar <-> vector conversions (BE) - def : Pat<(v16i8 (scalar_to_vector i32:$A)), - (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>; - def : Pat<(v8i16 (scalar_to_vector i32:$A)), - (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>; - def : Pat<(v4i32 (scalar_to_vector i32:$A)), - (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>; - def : Pat<(v2i64 (scalar_to_vector i64:$A)), - (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; - - // v2i64 scalar <-> vector conversions (BE) - def : Pat<(i64 (vector_extract v2i64:$S, 0)), - (i64 VectorExtractions.LE_DWORD_1)>; - def : Pat<(i64 (vector_extract v2i64:$S, 1)), - (i64 VectorExtractions.LE_DWORD_0)>; - def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), - (i64 VectorExtractions.BE_VARIABLE_DWORD)>; -} // IsBigEndian, HasDirectMove - -let Predicates = [IsBigEndian, HasDirectMove, NoP9Altivec] in { - def : Pat<(i32 (vector_extract v16i8:$S, 0)), - (i32 VectorExtractions.LE_BYTE_15)>; - def : Pat<(i32 (vector_extract v16i8:$S, 1)), - (i32 VectorExtractions.LE_BYTE_14)>; - def : Pat<(i32 (vector_extract v16i8:$S, 2)), - (i32 VectorExtractions.LE_BYTE_13)>; - def : Pat<(i32 (vector_extract v16i8:$S, 3)), - (i32 VectorExtractions.LE_BYTE_12)>; - def : Pat<(i32 (vector_extract v16i8:$S, 4)), - (i32 VectorExtractions.LE_BYTE_11)>; - def : Pat<(i32 (vector_extract v16i8:$S, 5)), - (i32 VectorExtractions.LE_BYTE_10)>; - def : Pat<(i32 (vector_extract v16i8:$S, 6)), - (i32 VectorExtractions.LE_BYTE_9)>; - def : Pat<(i32 (vector_extract v16i8:$S, 7)), - (i32 VectorExtractions.LE_BYTE_8)>; - def : Pat<(i32 (vector_extract v16i8:$S, 8)), - (i32 VectorExtractions.LE_BYTE_7)>; - def : Pat<(i32 (vector_extract v16i8:$S, 9)), - (i32 VectorExtractions.LE_BYTE_6)>; - def : Pat<(i32 (vector_extract v16i8:$S, 10)), - (i32 VectorExtractions.LE_BYTE_5)>; - def : Pat<(i32 (vector_extract v16i8:$S, 11)), - (i32 VectorExtractions.LE_BYTE_4)>; - def : Pat<(i32 (vector_extract v16i8:$S, 12)), - (i32 VectorExtractions.LE_BYTE_3)>; - def : Pat<(i32 (vector_extract v16i8:$S, 13)), - (i32 VectorExtractions.LE_BYTE_2)>; - def : Pat<(i32 (vector_extract v16i8:$S, 14)), - (i32 VectorExtractions.LE_BYTE_1)>; - def : Pat<(i32 (vector_extract v16i8:$S, 15)), - (i32 VectorExtractions.LE_BYTE_0)>; - def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), - (i32 VectorExtractions.BE_VARIABLE_BYTE)>; - - // v8i16 scalar <-> vector conversions (BE) - def : Pat<(i32 (vector_extract v8i16:$S, 0)), - (i32 VectorExtractions.LE_HALF_7)>; - def : Pat<(i32 (vector_extract v8i16:$S, 1)), - (i32 VectorExtractions.LE_HALF_6)>; - def : Pat<(i32 (vector_extract v8i16:$S, 2)), - (i32 VectorExtractions.LE_HALF_5)>; - def : Pat<(i32 (vector_extract v8i16:$S, 3)), - (i32 VectorExtractions.LE_HALF_4)>; - def : Pat<(i32 (vector_extract v8i16:$S, 4)), - (i32 VectorExtractions.LE_HALF_3)>; - def : Pat<(i32 (vector_extract v8i16:$S, 5)), - (i32 VectorExtractions.LE_HALF_2)>; - def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 VectorExtractions.LE_HALF_1)>; - def : Pat<(i32 (vector_extract v8i16:$S, 7)), - (i32 VectorExtractions.LE_HALF_0)>; - def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), - (i32 VectorExtractions.BE_VARIABLE_HALF)>; - - // v4i32 scalar <-> vector conversions (BE) - def : Pat<(i32 (vector_extract v4i32:$S, 0)), - (i32 VectorExtractions.LE_WORD_3)>; - def : Pat<(i32 (vector_extract v4i32:$S, 1)), - (i32 VectorExtractions.LE_WORD_2)>; - def : Pat<(i32 (vector_extract v4i32:$S, 2)), - (i32 VectorExtractions.LE_WORD_1)>; - def : Pat<(i32 (vector_extract v4i32:$S, 3)), - (i32 VectorExtractions.LE_WORD_0)>; - def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), - (i32 VectorExtractions.BE_VARIABLE_WORD)>; -} // IsBigEndian, HasDirectMove, NoP9Altivec - -// v4f32 scalar <-> vector conversions (LE) -let Predicates = [IsLittleEndian, HasP8Vector] in { - def : Pat<(v4f32 (scalar_to_vector f32:$A)), - (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>; - def : Pat<(f32 (vector_extract v4f32:$S, 0)), - (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; - def : Pat<(f32 (vector_extract v4f32:$S, 1)), - (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; - def : Pat<(f32 (vector_extract v4f32:$S, 2)), - (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; - def : Pat<(f32 (vector_extract v4f32:$S, 3)), - (f32 (XSCVSPDPN $S))>; - def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), - (f32 VectorExtractions.LE_VARIABLE_FLOAT)>; -} // IsLittleEndian, HasP8Vector - -// Variable index vector_extract for v2f64 does not require P8Vector -let Predicates = [IsLittleEndian, HasVSX] in - def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), - (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; - -def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), - (STXVD2X $rS, xoaddr:$dst)>; -def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), - (STXVW4X $rS, xoaddr:$dst)>; -def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; -def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; - -// Variable index unsigned vector_extract on Power9 -let Predicates = [HasP9Altivec, IsLittleEndian] in { - def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), - (VEXTUBRX $Idx, $S)>; - - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), - (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), - (VEXTUHRX (LI8 0), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), - (VEXTUHRX (LI8 2), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), - (VEXTUHRX (LI8 4), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), - (VEXTUHRX (LI8 6), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), - (VEXTUHRX (LI8 8), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), - (VEXTUHRX (LI8 10), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), - (VEXTUHRX (LI8 12), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), - (VEXTUHRX (LI8 14), $S)>; - - def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), - (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>; - def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), - (VEXTUWRX (LI8 0), $S)>; - def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), - (VEXTUWRX (LI8 4), $S)>; - // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX - def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (i32 VectorExtractions.LE_WORD_2), sub_32)>; - def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), - (VEXTUWRX (LI8 12), $S)>; - - def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), - (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>; - def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), - (EXTSW (VEXTUWRX (LI8 0), $S))>; - def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), - (EXTSW (VEXTUWRX (LI8 4), $S))>; - // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX - def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), - (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (i32 VectorExtractions.LE_WORD_2), sub_32))>; - def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), - (EXTSW (VEXTUWRX (LI8 12), $S))>; - - def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), - (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 0)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 1)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 2)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 3)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 4)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 5)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 6)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 7)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 8)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 9)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 10)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 11)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 12)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 13)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 14)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 15)), - (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>; - - def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), - (i32 (EXTRACT_SUBREG (VEXTUHRX - (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 0)), - (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 1)), - (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 2)), - (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 3)), - (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 4)), - (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 5)), - (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>; - - def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), - (i32 (EXTRACT_SUBREG (VEXTUWRX - (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; - def : Pat<(i32 (vector_extract v4i32:$S, 0)), - (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>; - def : Pat<(i32 (vector_extract v4i32:$S, 1)), - (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>; - // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX - def : Pat<(i32 (vector_extract v4i32:$S, 2)), - (i32 VectorExtractions.LE_WORD_2)>; - def : Pat<(i32 (vector_extract v4i32:$S, 3)), - (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>; -} - -let Predicates = [HasP9Altivec, IsBigEndian] in { - def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), - (VEXTUBLX $Idx, $S)>; - - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), - (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), - (VEXTUHLX (LI8 0), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), - (VEXTUHLX (LI8 2), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), - (VEXTUHLX (LI8 4), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), - (VEXTUHLX (LI8 6), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), - (VEXTUHLX (LI8 8), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), - (VEXTUHLX (LI8 10), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), - (VEXTUHLX (LI8 12), $S)>; - def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), - (VEXTUHLX (LI8 14), $S)>; - - def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), - (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>; - def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), - (VEXTUWLX (LI8 0), $S)>; - - // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX - def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (i32 VectorExtractions.LE_WORD_2), sub_32)>; - def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), - (VEXTUWLX (LI8 8), $S)>; - def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), - (VEXTUWLX (LI8 12), $S)>; - - def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), - (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>; - def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), - (EXTSW (VEXTUWLX (LI8 0), $S))>; - // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX - def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), - (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (i32 VectorExtractions.LE_WORD_2), sub_32))>; - def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), - (EXTSW (VEXTUWLX (LI8 8), $S))>; - def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), - (EXTSW (VEXTUWLX (LI8 12), $S))>; - - def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), - (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 0)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 1)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 2)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 3)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 4)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 5)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 6)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 7)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 8)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 9)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 10)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 11)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 12)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 13)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 14)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>; - def : Pat<(i32 (vector_extract v16i8:$S, 15)), - (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>; - - def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), - (i32 (EXTRACT_SUBREG (VEXTUHLX - (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 0)), - (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 1)), - (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 2)), - (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 3)), - (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 4)), - (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 5)), - (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>; - def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>; - - def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), - (i32 (EXTRACT_SUBREG (VEXTUWLX - (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; - def : Pat<(i32 (vector_extract v4i32:$S, 0)), - (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>; - // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX - def : Pat<(i32 (vector_extract v4i32:$S, 1)), - (i32 VectorExtractions.LE_WORD_2)>; - def : Pat<(i32 (vector_extract v4i32:$S, 2)), - (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>; - def : Pat<(i32 (vector_extract v4i32:$S, 3)), - (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>; -} - -let Predicates = [IsLittleEndian, HasDirectMove] in { - // v16i8 scalar <-> vector conversions (LE) - def : Pat<(v16i8 (scalar_to_vector i32:$A)), - (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; - def : Pat<(v8i16 (scalar_to_vector i32:$A)), - (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; - def : Pat<(v4i32 (scalar_to_vector i32:$A)), - (v4i32 MovesToVSR.LE_WORD_0)>; - def : Pat<(v2i64 (scalar_to_vector i64:$A)), - (v2i64 MovesToVSR.LE_DWORD_0)>; - // v2i64 scalar <-> vector conversions (LE) - def : Pat<(i64 (vector_extract v2i64:$S, 0)), - (i64 VectorExtractions.LE_DWORD_0)>; - def : Pat<(i64 (vector_extract v2i64:$S, 1)), - (i64 VectorExtractions.LE_DWORD_1)>; - def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), - (i64 VectorExtractions.LE_VARIABLE_DWORD)>; -} // IsLittleEndian, HasDirectMove - -let Predicates = [IsLittleEndian, HasDirectMove, NoP9Altivec] in { - def : Pat<(i32 (vector_extract v16i8:$S, 0)), - (i32 VectorExtractions.LE_BYTE_0)>; - def : Pat<(i32 (vector_extract v16i8:$S, 1)), - (i32 VectorExtractions.LE_BYTE_1)>; - def : Pat<(i32 (vector_extract v16i8:$S, 2)), - (i32 VectorExtractions.LE_BYTE_2)>; - def : Pat<(i32 (vector_extract v16i8:$S, 3)), - (i32 VectorExtractions.LE_BYTE_3)>; - def : Pat<(i32 (vector_extract v16i8:$S, 4)), - (i32 VectorExtractions.LE_BYTE_4)>; - def : Pat<(i32 (vector_extract v16i8:$S, 5)), - (i32 VectorExtractions.LE_BYTE_5)>; - def : Pat<(i32 (vector_extract v16i8:$S, 6)), - (i32 VectorExtractions.LE_BYTE_6)>; - def : Pat<(i32 (vector_extract v16i8:$S, 7)), - (i32 VectorExtractions.LE_BYTE_7)>; - def : Pat<(i32 (vector_extract v16i8:$S, 8)), - (i32 VectorExtractions.LE_BYTE_8)>; - def : Pat<(i32 (vector_extract v16i8:$S, 9)), - (i32 VectorExtractions.LE_BYTE_9)>; - def : Pat<(i32 (vector_extract v16i8:$S, 10)), - (i32 VectorExtractions.LE_BYTE_10)>; - def : Pat<(i32 (vector_extract v16i8:$S, 11)), - (i32 VectorExtractions.LE_BYTE_11)>; - def : Pat<(i32 (vector_extract v16i8:$S, 12)), - (i32 VectorExtractions.LE_BYTE_12)>; - def : Pat<(i32 (vector_extract v16i8:$S, 13)), - (i32 VectorExtractions.LE_BYTE_13)>; - def : Pat<(i32 (vector_extract v16i8:$S, 14)), - (i32 VectorExtractions.LE_BYTE_14)>; - def : Pat<(i32 (vector_extract v16i8:$S, 15)), - (i32 VectorExtractions.LE_BYTE_15)>; - def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), - (i32 VectorExtractions.LE_VARIABLE_BYTE)>; - - // v8i16 scalar <-> vector conversions (LE) - def : Pat<(i32 (vector_extract v8i16:$S, 0)), - (i32 VectorExtractions.LE_HALF_0)>; - def : Pat<(i32 (vector_extract v8i16:$S, 1)), - (i32 VectorExtractions.LE_HALF_1)>; - def : Pat<(i32 (vector_extract v8i16:$S, 2)), - (i32 VectorExtractions.LE_HALF_2)>; - def : Pat<(i32 (vector_extract v8i16:$S, 3)), - (i32 VectorExtractions.LE_HALF_3)>; - def : Pat<(i32 (vector_extract v8i16:$S, 4)), - (i32 VectorExtractions.LE_HALF_4)>; - def : Pat<(i32 (vector_extract v8i16:$S, 5)), - (i32 VectorExtractions.LE_HALF_5)>; - def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 VectorExtractions.LE_HALF_6)>; - def : Pat<(i32 (vector_extract v8i16:$S, 7)), - (i32 VectorExtractions.LE_HALF_7)>; - def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), - (i32 VectorExtractions.LE_VARIABLE_HALF)>; - - // v4i32 scalar <-> vector conversions (LE) - def : Pat<(i32 (vector_extract v4i32:$S, 0)), - (i32 VectorExtractions.LE_WORD_0)>; - def : Pat<(i32 (vector_extract v4i32:$S, 1)), - (i32 VectorExtractions.LE_WORD_1)>; - def : Pat<(i32 (vector_extract v4i32:$S, 2)), - (i32 VectorExtractions.LE_WORD_2)>; - def : Pat<(i32 (vector_extract v4i32:$S, 3)), - (i32 VectorExtractions.LE_WORD_3)>; - def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), - (i32 VectorExtractions.LE_VARIABLE_WORD)>; -} // IsLittleEndian, HasDirectMove, NoP9Altivec - -let Predicates = [HasDirectMove, HasVSX] in { -// bitconvert f32 -> i32 -// (convert to 32-bit fp single, shift right 1 word, move to GPR) -def : Pat<(i32 (bitconvert f32:$S)), - (i32 (MFVSRWZ (EXTRACT_SUBREG - (XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3), - sub_64)))>; -// bitconvert i32 -> f32 -// (move to FPR, shift left 1 word, convert to 64-bit fp single) -def : Pat<(f32 (bitconvert i32:$A)), - (f32 (XSCVSPDPN - (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>; - -// bitconvert f64 -> i64 -// (move to GPR, nothing else needed) -def : Pat<(i64 (bitconvert f64:$S)), - (i64 (MFVSRD $S))>; - -// bitconvert i64 -> f64 -// (move to FPR, nothing else needed) -def : Pat<(f64 (bitconvert i64:$S)), - (f64 (MTVSRD $S))>; - -// Rounding to integer. -def : Pat<(i64 (lrint f64:$S)), - (i64 (MFVSRD (FCTID $S)))>; -def : Pat<(i64 (lrint f32:$S)), - (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>; -def : Pat<(i64 (llrint f64:$S)), - (i64 (MFVSRD (FCTID $S)))>; -def : Pat<(i64 (llrint f32:$S)), - (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>; -def : Pat<(i64 (lround f64:$S)), - (i64 (MFVSRD (FCTID (XSRDPI $S))))>; -def : Pat<(i64 (lround f32:$S)), - (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; -def : Pat<(i64 (llround f64:$S)), - (i64 (MFVSRD (FCTID (XSRDPI $S))))>; -def : Pat<(i64 (llround f32:$S)), - (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; -} + let BaseName = "XSNMSUBASP" in { + let isCommutable = 1 in + def XSNMSUBASP : XX3Form<60, 145, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + (fneg f32:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let IsVSXFMAAlt = 1, hasSideEffects = 1 in + def XSNMSUBMSP : XX3Form<60, 153, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } -let Predicates = [HasVSX] in { -// Rounding for single precision. -def : Pat<(f32 (fround f32:$S)), - (f32 (COPY_TO_REGCLASS (XSRDPI - (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (fnearbyint f32:$S)), - (f32 (COPY_TO_REGCLASS (XSRDPIC - (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (ffloor f32:$S)), - (f32 (COPY_TO_REGCLASS (XSRDPIM - (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (fceil f32:$S)), - (f32 (COPY_TO_REGCLASS (XSRDPIP - (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (ftrunc f32:$S)), - (f32 (COPY_TO_REGCLASS (XSRDPIZ - (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f32 (frint f32:$S)), - (f32 (COPY_TO_REGCLASS (XSRDPIC - (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>; + // Single Precision Conversions (FP <-> INT) + def XSCVSXDSP : XX2Form<60, 312, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvsxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfids f64:$XB))]>; + def XSCVUXDSP : XX2Form<60, 296, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvuxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfidus f64:$XB))]>; -// Rounding for double precision. -def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>; -def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>; -} + // Conversions between vector and scalar single precision + def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB), + "xscvdpspn $XT, $XB", IIC_VecFP, []>; + def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), + "xscvspdpn $XT, $XB", IIC_VecFP, []>; -// Materialize a zero-vector of long long -def : Pat<(v2i64 immAllZerosV), - (v2i64 (XXLXORz))>; -} + let Predicates = [HasVSX, HasDirectMove] in { + // VSX direct move instructions + def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), + "mfvsrd $rA, $XT", IIC_VecGeneral, + [(set i64:$rA, (PPCmfvsr f64:$XT))]>, + Requires<[In64BitMode]>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let isCodeGenOnly = 1, hasSideEffects = 1 in + def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT), + "mfvsrd $rA, $XT", IIC_VecGeneral, + []>, + Requires<[In64BitMode]>; + def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + [(set i32:$rA, (PPCmfvsr f64:$XT))]>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let isCodeGenOnly = 1, hasSideEffects = 1 in + def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + []>; + def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i64:$rA))]>, + Requires<[In64BitMode]>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let isCodeGenOnly = 1, hasSideEffects = 1 in + def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + []>, + Requires<[In64BitMode]>; + def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i32:$rA))]>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let isCodeGenOnly = 1, hasSideEffects = 1 in + def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + []>; + def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let isCodeGenOnly = 1, hasSideEffects = 1 in + def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + []>; + } // HasDirectMove -def AlignValues { - dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3)); - dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC); -} +} // HasVSX, HasP8Vector -// The following VSX instructions were introduced in Power ISA 3.0 -def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">; -let AddedComplexity = 400, Predicates = [HasP9Vector] in { +let Predicates = [HasVSX, IsISA3_0, HasDirectMove] in { +def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA), + "mtvsrws $XT, $rA", IIC_VecGeneral, []>; - // [PO VRT XO VRB XO /] - class X_VT5_XO5_VB5 opcode, bits<5> xo2, bits<10> xo, string opc, - list pattern> - : X_RD5_XO5_RS5; - - // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] - class X_VT5_XO5_VB5_Ro opcode, bits<5> xo2, bits<10> xo, string opc, - list pattern> - : X_VT5_XO5_VB5, isRecordForm; - - // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less), - // So we use different operand class for VRB - class X_VT5_XO5_VB5_TyVB opcode, bits<5> xo2, bits<10> xo, string opc, - RegisterOperand vbtype, list pattern> - : X_RD5_XO5_RS5; - - // [PO VRT XO VRB XO /] - class X_VT5_XO5_VB5_VSFR opcode, bits<5> xo2, bits<10> xo, string opc, - list pattern> - : X_RD5_XO5_RS5; +def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB), + "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral, + []>, Requires<[In64BitMode]>; - // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] - class X_VT5_XO5_VB5_VSFR_Ro opcode, bits<5> xo2, bits<10> xo, string opc, - list pattern> - : X_VT5_XO5_VB5_VSFR, isRecordForm; +def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT), + "mfvsrld $rA, $XT", IIC_VecGeneral, + []>, Requires<[In64BitMode]>; - // [PO T XO B XO BX /] - class XX2_RT5_XO5_XB6 opcode, bits<5> xo2, bits<9> xo, string opc, - list pattern> - : XX2_RD5_XO5_RS6; - - // [PO T XO B XO BX TX] - class XX2_XT6_XO5_XB6 opcode, bits<5> xo2, bits<9> xo, string opc, - RegisterOperand vtype, list pattern> - : XX2_RD6_XO5_RS6; - - // [PO T A B XO AX BX TX], src and dest register use different operand class - class XX3_XT5_XA5_XB5 opcode, bits<8> xo, string opc, - RegisterOperand xty, RegisterOperand aty, RegisterOperand bty, - InstrItinClass itin, list pattern> - : XX3Form; - - // [PO VRT VRA VRB XO /] - class X_VT5_VA5_VB5 opcode, bits<10> xo, string opc, - list pattern> - : XForm_1; - - // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] - class X_VT5_VA5_VB5_Ro opcode, bits<10> xo, string opc, - list pattern> - : X_VT5_VA5_VB5, isRecordForm; - - // [PO VRT VRA VRB XO /] - class X_VT5_VA5_VB5_FMA opcode, bits<10> xo, string opc, - list pattern> - : XForm_1, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">; - - // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] - class X_VT5_VA5_VB5_FMA_Ro opcode, bits<10> xo, string opc, - list pattern> - : X_VT5_VA5_VB5_FMA, isRecordForm; +} // HasVSX, IsISA3_0, HasDirectMove - //===--------------------------------------------------------------------===// +let Predicates = [HasVSX, HasP9Vector] in { // Quad-Precision Scalar Move Instructions: - // Copy Sign def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp", [(set f128:$vT, @@ -2769,40 +1430,28 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (fneg (int_ppc_fmaf128_round_to_odd f128:$vA, f128:$vB, (fneg f128:$vTi))))]>; - // Additional fnmsub patterns: -a*b + c == -(a*b - c) - def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>; - def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>; - - //===--------------------------------------------------------------------===// - // Quad/Double-Precision Compare Instructions: - - // [PO BF // VRA VRB XO /] - class X_BF3_VA5_VB5 opcode, bits<10> xo, string opc, - list pattern> - : XForm_17 { - let Pattern = pattern; - } - + // FIXME: Setting the hasSideEffects flag here to match current behaviour. // QP Compare Ordered/Unordered - def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>; - def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>; - - // DP/QP Compare Exponents - def XSCMPEXPDP : XX3Form_1<60, 59, - (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), - "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>; - def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>; - - // DP Compare ==, >=, >, != - // Use vsrc for XT, because the entire register of XT is set. - // XT.dword[1] = 0x0000_0000_0000_0000 - def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc, - IIC_FPCompare, []>; - def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc, - IIC_FPCompare, []>; - def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc, - IIC_FPCompare, []>; + let hasSideEffects = 1 in { + def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>; + def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>; + + // DP/QP Compare Exponents + def XSCMPEXPDP : XX3Form_1<60, 59, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>; + def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>; + + // DP Compare ==, >=, >, != + // Use vsrc for XT, because the entire register of XT is set. + // XT.dword[1] = 0x0000_0000_0000_0000 + def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + } //===--------------------------------------------------------------------===// // Quad-Precision Floating-Point Conversion Instructions: @@ -2818,86 +1467,44 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (int_ppc_truncf128_round_to_odd f128:$vB))]>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero) - def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>; - def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>; - def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>; - def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>; + let hasSideEffects = 1 in { + def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>; + def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>; + def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>; + def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>; + } // Convert (Un)Signed DWord -> QP. def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>; - def : Pat<(f128 (sint_to_fp i64:$src)), - (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; - def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))), - (f128 (XSCVSDQP $src))>; - def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))), - (f128 (XSCVSDQP (VEXTSW2Ds $src)))>; - def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>; - def : Pat<(f128 (uint_to_fp i64:$src)), - (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; - def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))), - (f128 (XSCVUDQP $src))>; - - // Convert (Un)Signed Word -> QP. - def : Pat<(f128 (sint_to_fp i32:$src)), - (f128 (XSCVSDQP (MTVSRWA $src)))>; - def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))), - (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>; - def : Pat<(f128 (uint_to_fp i32:$src)), - (f128 (XSCVUDQP (MTVSRWZ $src)))>; - def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))), - (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>; - - //===--------------------------------------------------------------------===// - // Round to Floating-Point Integer Instructions // (Round &) Convert DP <-> HP // Note! xscvdphp's src and dest register both use the left 64 bits, so we use // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits, // but we still use vsfrc for it. - def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>; - def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in { + def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>; + def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>; + } // Vector HP -> SP + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>; def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc, [(set v4f32:$XT, (int_ppc_vsx_xvcvsphp v4f32:$XB))]>; - // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a - // separate pattern so that it can convert the input register class from - // VRRC(v8i16) to VSRC. - def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)), - (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>; - - class Z23_VT5_R1_VB5_RMC2_EX1 opcode, bits<8> xo, bit ex, string opc, - list pattern> - : Z23Form_8 { - let RC = ex; - } - // Round to Quad-Precision Integer [with Inexact] def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>; def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>; - // Use current rounding mode - def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; - // Round to nearest, ties away from zero - def : Pat<(f128 (fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>; - // Round towards Zero - def : Pat<(f128 (ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>; - // Round towards +Inf - def : Pat<(f128 (fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>; - // Round towards -Inf - def : Pat<(f128 (ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>; - - // Use current rounding mode, [with Inexact] - def : Pat<(f128 (frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>; - // Round Quad-Precision to Double-Extended Precision (fp80) + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>; //===--------------------------------------------------------------------===// @@ -2905,26 +1512,25 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Insert Exponent DP/QP // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU - def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB), - "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>; - // vB NOTE: only vB.dword[0] is used, that's why we don't use - // X_VT5_VA5_VB5 form - def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB), - "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>; - - def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)), - (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in { + def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB), + "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>; + // vB NOTE: only vB.dword[0] is used, that's why we don't use + // X_VT5_VA5_VB5 form + def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB), + "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>; + } // Extract Exponent/Significand DP/QP - def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>; - def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>; - - def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>; - def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in { + def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>; + def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>; - def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)), - (i64 (MFVSRD (EXTRACT_SUBREG - (v2i64 (XSXEXPQP $vA)), sub_64)))>; + def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>; + def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>; + } // Vector Insert Word // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB. @@ -2937,6 +1543,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; // Vector Extract Unsigned Word + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165, (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM), "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>; @@ -2961,26 +1569,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { [(set v4i32: $XT, (int_ppc_vsx_xvxsigsp v4f32:$XB))]>; - let AddedComplexity = 400, Predicates = [HasP9Vector] in { - // Extra patterns expanding to vector Extract Word/Insert Word - def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)), - (v4i32 (XXINSERTW $A, $B, imm:$IMM))>; - def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)), - (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>; - } // AddedComplexity = 400, HasP9Vector - - //===--------------------------------------------------------------------===// - // Test Data Class SP/DP/QP - def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298, - (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), - "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>; - def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362, - (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), - "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>; - def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708, - (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB), - "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in { + def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298, + (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), + "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>; + def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362, + (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), + "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>; + def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708, + (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB), + "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>; + } // Vector Test Data Class SP/DP def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5, @@ -2994,52 +1595,52 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { [(set v2i64: $XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>; - //===--------------------------------------------------------------------===// - // Maximum/Minimum Type-C/Type-J DP def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc, IIC_VecFP, [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>; - def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc, - IIC_VecFP, []>; def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc, IIC_VecFP, [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>; - def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc, - IIC_VecFP, []>; - //===--------------------------------------------------------------------===// + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in { + def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc, + IIC_VecFP, []>; + def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc, + IIC_VecFP, []>; + } // Vector Byte-Reverse H/W/D/Q Word + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>; def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc, [(set v4i32:$XT, (bswap v4i32:$XB))]>; def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, [(set v2i64:$XT, (bswap v2i64:$XB))]>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>; - // Vector Reverse - def : Pat<(v8i16 (bswap v8i16 :$A)), - (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; - def : Pat<(v1i128 (bswap v1i128 :$A)), - (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; - // Vector Permute - def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, - IIC_VecPerm, []>; - def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc, - IIC_VecPerm, []>; + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in { + def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, + IIC_VecPerm, []>; + def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc, + IIC_VecPerm, []>; + } // Vector Splat Immediate Byte + // FIXME: Setting the hasSideEffects flag here to match current behaviour. + let hasSideEffects = 1 in def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8), "xxspltib $XT, $IMM8", IIC_VecPerm, []>; - //===--------------------------------------------------------------------===// - // Vector/Scalar Load/Store Instructions - // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. - let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { + let mayLoad = 1, mayStore = 0 in { // Load Vector def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), "lxv $XT, $src", IIC_LdStLFD, []>; @@ -3050,13 +1651,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src), "lxssp $vD, $src", IIC_LdStLFD, []>; - // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different - // "out" and "in" dag - class X_XT6_RA5_RB5 opcode, bits<10> xo, string opc, - RegisterOperand vtype, list pattern> - : XX1Form_memOp; - // Load as Integer Byte/Halfword & Zero Indexed def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>; @@ -3084,7 +1678,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. - let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { + let mayStore = 1, mayLoad = 0 in { // Store Vector def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), "stxv $XT, $dst", IIC_LdStSTFD, []>; @@ -3095,12 +1689,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst), "stxssp $vS, $dst", IIC_LdStSTFD, []>; - // [PO S RA RB XO SX] - class X_XS6_RA5_RB5 opcode, bits<10> xo, string opc, - RegisterOperand vtype, list pattern> - : XX1Form_memOp; - // Store as Integer Byte/Halfword Indexed def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>; @@ -3132,732 +1720,452 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { i64:$rB)]>; } // mayStore - let Predicates = [IsLittleEndian] in { - def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), - (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; - def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), - (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; - def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), - (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; - def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), - (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; - def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), - (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; - def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), - (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; - def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), - (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; - def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), - (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; - } + def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src), + "#DFLOADf32", + [(set f32:$XT, (load iaddrX4:$src))]>; + def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src), + "#DFLOADf64", + [(set f64:$XT, (load iaddrX4:$src))]>; + def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst), + "#DFSTOREf32", + [(store f32:$XT, iaddrX4:$dst)]>; + def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst), + "#DFSTOREf64", + [(store f64:$XT, iaddrX4:$dst)]>; + + let mayStore = 1 in { + def SPILLTOVSR_STX : PseudoXFormMemOp<(outs), + (ins spilltovsrrc:$XT, memrr:$dst), + "#SPILLTOVSR_STX", []>; + def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst), + "#SPILLTOVSR_ST", []>; + } + let mayLoad = 1 in { + def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT), + (ins memrr:$src), + "#SPILLTOVSR_LDX", []>; + def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src), + "#SPILLTOVSR_LD", []>; + + } + } // HasP9Vector +} // hasSideEffects = 0 + +let PPC970_Single = 1, AddedComplexity = 400 in { + + def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst), + (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC), + "#SELECT_CC_VSRC", + []>; + def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst), + (ins crbitrc:$cond, vsrc:$T, vsrc:$F), + "#SELECT_VSRC", + [(set v2f64:$dst, + (select i1:$cond, v2f64:$T, v2f64:$F))]>; + def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst), + (ins crrc:$cond, f8rc:$T, f8rc:$F, + i32imm:$BROPC), "#SELECT_CC_VSFRC", + []>; + def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst), + (ins crbitrc:$cond, f8rc:$T, f8rc:$F), + "#SELECT_VSFRC", + [(set f64:$dst, + (select i1:$cond, f64:$T, f64:$F))]>; + def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst), + (ins crrc:$cond, f4rc:$T, f4rc:$F, + i32imm:$BROPC), "#SELECT_CC_VSSRC", + []>; + def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst), + (ins crbitrc:$cond, f4rc:$T, f4rc:$F), + "#SELECT_VSSRC", + [(set f32:$dst, + (select i1:$cond, f32:$T, f32:$F))]>; +} +} + +//----------------------------- DAG Definitions ------------------------------// +def FpMinMax { + dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC), + (COPY_TO_REGCLASS $B, VSFRC)), + VSSRC); + dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC), + (COPY_TO_REGCLASS $B, VSFRC)), + VSSRC); +} + +def ScalarLoads { + dag Li8 = (i32 (extloadi8 xoaddr:$src)); + dag ZELi8 = (i32 (zextloadi8 xoaddr:$src)); + dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src)); + dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8)); + dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8)); + + dag Li16 = (i32 (extloadi16 xoaddr:$src)); + dag ZELi16 = (i32 (zextloadi16 xoaddr:$src)); + dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src)); + dag SELi16 = (i32 (sextloadi16 xoaddr:$src)); + dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src)); + + dag Li32 = (i32 (load xoaddr:$src)); +} + +def DWToSPExtractConv { + dag El0US1 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0)))))); + dag El1US1 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1)))))); + dag El0US2 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0)))))); + dag El1US2 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1)))))); + dag El0SS1 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0)))))); + dag El1SS1 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1)))))); + dag El0SS2 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0)))))); + dag El1SS2 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1)))))); + dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2)); + dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2)); +} + +def WToDPExtractConv { + dag El0S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 0)))); + dag El1S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 1)))); + dag El2S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 2)))); + dag El3S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 3)))); + dag El0U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 0)))); + dag El1U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 1)))); + dag El2U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 2)))); + dag El3U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 3)))); + dag BV02S = (v2f64 (build_vector El0S, El2S)); + dag BV13S = (v2f64 (build_vector El1S, El3S)); + dag BV02U = (v2f64 (build_vector El0U, El2U)); + dag BV13U = (v2f64 (build_vector El1U, El3U)); +} + +/* Direct moves of various widths from GPR's into VSR's. Each move lines + the value up into element 0 (both BE and LE). Namely, entities smaller than + a doubleword are shifted left and moved for BE. For LE, they're moved, then + swapped to go into the least significant element of the VSR. +*/ +def MovesToVSR { + dag BE_BYTE_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7)); + dag BE_HALF_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15)); + dag BE_WORD_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31)); + dag BE_DWORD_0 = (MTVSRD $A); + + dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32)); + dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + LE_MTVSRW, sub_64)); + dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2); + dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + BE_DWORD_0, sub_64)); + dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2); +} + +/* Patterns for extracting elements out of vectors. Integer elements are + extracted using direct move operations. Patterns for extracting elements + whose indices are not available at compile time are also provided with + various _VARIABLE_ patterns. + The numbering for the DAG's is for LE, but when used on BE, the correct + LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13). +*/ +def VectorExtractions { + // Doubleword extraction + dag LE_DWORD_0 = + (MFVSRD + (EXTRACT_SUBREG + (XXPERMDI (COPY_TO_REGCLASS $S, VSRC), + (COPY_TO_REGCLASS $S, VSRC), 2), sub_64)); + dag LE_DWORD_1 = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); + + // Word extraction + dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64)); + dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64)); + dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); + dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64)); + + // Halfword extraction + dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32)); + dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32)); + dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32)); + dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32)); + dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32)); + dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32)); + dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32)); + dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32)); + + // Byte extraction + dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32)); + dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32)); + dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32)); + dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32)); + dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32)); + dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32)); + dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32)); + dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32)); + dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32)); + dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32)); + dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32)); + dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32)); + dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32)); + dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32)); + dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32)); + dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32)); + + /* Variable element number (BE and LE patterns must be specified separately) + This is a rather involved process. - let Predicates = [IsBigEndian] in { - def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), - (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; - def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), - (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; - def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), - (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; - def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), - (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; - def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), - (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; - def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), - (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; - def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), - (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; - def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), - (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; - } + Conceptually, this is how the move is accomplished: + 1. Identify which doubleword contains the element + 2. Shift in the VMX register so that the correct doubleword is correctly + lined up for the MFVSRD + 3. Perform the move so that the element (along with some extra stuff) + is in the GPR + 4. Right shift within the GPR so that the element is right-justified - // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead - // of f64 - def : Pat<(v8i16 (PPCmtvsrz i32:$A)), - (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; - def : Pat<(v16i8 (PPCmtvsrz i32:$A)), - (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; - - // Patterns for which instructions from ISA 3.0 are a better match - let Predicates = [IsLittleEndian, HasP9Vector] in { - def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), - (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; - def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), - (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; - def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), - (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; - def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), - (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; - def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), - (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; - def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), - (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; - def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), - (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; - def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), - (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; - def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), - (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; - def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), - (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; - def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), - (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; - def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), - (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; - def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), - (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; - def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), - (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; - def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), - (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; - def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), - (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; - - def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)), - (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>; - def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst), - (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; - - def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)), - (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>; - def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst), - (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; - } // IsLittleEndian, HasP9Vector - - let Predicates = [IsBigEndian, HasP9Vector] in { - def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), - (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; - def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), - (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; - def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), - (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; - def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), - (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; - def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), - (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; - def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), - (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; - def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), - (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; - def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), - (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; - def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), - (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; - def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), - (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; - def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), - (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; - def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), - (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; - def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), - (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; - def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), - (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; - def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), - (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; - def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), - (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; - } // IsBigEndian, HasP9Vector - - // D-Form Load/Store - def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; - def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; - def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; - def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; - def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)), - (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>; - - def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst), - (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>; - def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst), - (STXV $rS, memrix16:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst), - (STXV $rS, memrix16:$dst)>; - - - def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)), - (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>; - def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst), - (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; - def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; - def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; - def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; - def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; + Of course, the index is an element number which has a different meaning + on LE/BE so the patterns have to be specified separately. - let AddedComplexity = 400 in { - // LIWAX - This instruction is used for sign extending i32 -> i64. - // LIWZX - This instruction will be emitted for i32, f32, and when - // zero-extending i32 to i64 (zext i32 -> i64). - let Predicates = [IsLittleEndian] in { + Note: The final result will be the element right-justified with high + order bits being arbitrarily defined (namely, whatever was in the + vector register to the left of the value originally). + */ - def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), - (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2))>; + /* LE variable byte + Number 1. above: + - For elements 0-7, we shift left by 8 bytes since they're on the right + - For elements 8-15, we need not shift (shift left by zero bytes) + This is accomplished by inverting the bits of the index and AND-ing + with 0x8 (i.e. clearing all bits of the index and inverting bit 60). + */ + dag LE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDC8 (LI8 8), $Idx))); - def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), - (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>; + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, LE_VBYTE_PERM_VEC)); - def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), - (v4i32 (XXPERMDIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>; + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VBYTE = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)), + sub_64)); - def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), - (v4f32 (XXPERMDIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>; - } + /* Number 4. above: + - Truncate the element number to the range 0-7 (8-15 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 8 as we need to shift right by the number of bits, not bytes + - Shift right in the GPR by the calculated value + */ + dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60), + sub_32); + dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT), + sub_32); - let Predicates = [IsBigEndian] in { - def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), - (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>; + /* LE variable halfword + Number 1. above: + - For elements 0-3, we shift left by 8 since they're on the right + - For elements 4-7, we need not shift (shift left by zero bytes) + Similarly to the byte pattern, we invert the bits of the index, but we + AND with 0x4 (i.e. clear all bits of the index and invert bit 61). + Of course, the shift is still by 8 bytes, so we must multiply by 2. + */ + dag LE_VHALF_PERM_VEC = + (v16i8 (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62))); - def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), - (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, LE_VHALF_PERM_VEC)); - def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), - (v4i32 (XXSLDWIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VHALF = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)), + sub_64)); - def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), - (v4f32 (XXSLDWIs - (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; - } + /* Number 4. above: + - Truncate the element number to the range 0-3 (4-7 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 16 as we need to shift right by the number of bits + - Shift right in the GPR by the calculated value + */ + dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59), + sub_32); + dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT), + sub_32); - } + /* LE variable word + Number 1. above: + - For elements 0-1, we shift left by 8 since they're on the right + - For elements 2-3, we need not shift + */ + dag LE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61))); - // Build vectors from i8 loads - def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)), - (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>; - def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)), - (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>; - def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)), - (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>; - def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)), - (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>; - def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)), - (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>; - def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)), - (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>; - - // Build vectors from i16 loads - def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)), - (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>; - def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)), - (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>; - def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)), - (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>; - def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)), - (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>; - def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), - (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; - - // Load/convert and convert/store patterns for f16. - def : Pat<(f64 (extloadf16 xoaddr:$src)), - (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>; - def : Pat<(truncstoref16 f64:$src, xoaddr:$dst), - (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>; - def : Pat<(f32 (extloadf16 xoaddr:$src)), - (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; - def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), - (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; - - let Predicates = [IsBigEndian, HasP9Vector] in { - // Scalar stores of i8 - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; - - // Scalar stores of i16 - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; - } // IsBigEndian, HasP9Vector - - let Predicates = [IsLittleEndian, HasP9Vector] in { - // Scalar stores of i8 - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), - (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; - - // Scalar stores of i16 - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; - def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), - (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; - } // IsLittleEndian, HasP9Vector - - - // Vector sign extensions - def : Pat<(f64 (PPCVexts f64:$A, 1)), - (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>; - def : Pat<(f64 (PPCVexts f64:$A, 2)), - (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>; + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VWORD_PERM_VEC)); - def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src), - "#DFLOADf32", - [(set f32:$XT, (load iaddrX4:$src))]>; - def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src), - "#DFLOADf64", - [(set f64:$XT, (load iaddrX4:$src))]>; - def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst), - "#DFSTOREf32", - [(store f32:$XT, iaddrX4:$dst)]>; - def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst), - "#DFSTOREf64", - [(store f64:$XT, iaddrX4:$dst)]>; + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)), + sub_64)); - def : Pat<(f64 (extloadf32 iaddrX4:$src)), - (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>; - def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))), - (f32 (DFLOADf32 iaddrX4:$src))>; - - def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)), - (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>; - def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)), - (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>; - - let AddedComplexity = 400 in { - // The following pseudoinstructions are used to ensure the utilization - // of all 64 VSX registers. - let Predicates = [IsLittleEndian, HasP9Vector] in { - def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), - (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>; - def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), - (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>; - - def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), - (v2f64 (XXPERMDIs - (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>; - def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), - (v2f64 (XXPERMDIs - (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>; - def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; - def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; - def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), iaddrX4:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - iaddrX4:$src)>; - def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; - } // IsLittleEndian, HasP9Vector - - let Predicates = [IsBigEndian, HasP9Vector] in { - def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), - (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; - def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), - (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; - - def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), - (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; - def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), - (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; - def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), xaddrX4:$src)>; - def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; - def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), iaddrX4:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), - sub_64), iaddrX4:$src)>; - def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), - (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; - } // IsBigEndian, HasP9Vector - } + /* Number 4. above: + - Truncate the element number to the range 0-1 (2-3 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 32 as we need to shift right by the number of bits + - Shift right in the GPR by the calculated value + */ + dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58), + sub_32); + dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT), + sub_32); - let Predicates = [IsBigEndian, HasP9Vector] in { - - // (Un)Signed DWord vector extract -> QP - def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), - (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; - def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))), - (f128 (XSCVSDQP - (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; - def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))), - (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; - def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))), - (f128 (XSCVUDQP - (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; - - // (Un)Signed Word vector extract -> QP - def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))), - (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>; - foreach Idx = [0,2,3] in { - def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))), - (f128 (XSCVSDQP (EXTRACT_SUBREG - (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>; - } - foreach Idx = 0-3 in { - def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))), - (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>; - } + /* LE variable doubleword + Number 1. above: + - For element 0, we shift left by 8 since it's on the right + - For element 1, we need not shift + */ + dag LE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60))); - // (Un)Signed HWord vector extract -> QP - foreach Idx = 0-7 in { - def : Pat<(f128 (sint_to_fp - (i32 (sext_inreg - (vector_extract v8i16:$src, Idx), i16)))), - (f128 (XSCVSDQP (EXTRACT_SUBREG - (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)), - sub_64)))>; - // The SDAG adds the `and` since an `i16` is being extracted as an `i32`. - def : Pat<(f128 (uint_to_fp - (and (i32 (vector_extract v8i16:$src, Idx)), 65535))), - (f128 (XSCVUDQP (EXTRACT_SUBREG - (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>; - } + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VDWORD_PERM_VEC)); - // (Un)Signed Byte vector extract -> QP - foreach Idx = 0-15 in { - def : Pat<(f128 (sint_to_fp - (i32 (sext_inreg (vector_extract v16i8:$src, Idx), - i8)))), - (f128 (XSCVSDQP (EXTRACT_SUBREG - (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>; - def : Pat<(f128 (uint_to_fp - (and (i32 (vector_extract v16i8:$src, Idx)), 255))), - (f128 (XSCVUDQP - (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>; - } + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + // - Number 4. is not needed for the doubleword as the value is 64-bits + dag LE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)), + sub_64)); - // Unsigned int in vsx register -> QP - def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), - (f128 (XSCVUDQP - (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>; - } // IsBigEndian, HasP9Vector - - let Predicates = [IsLittleEndian, HasP9Vector] in { - - // (Un)Signed DWord vector extract -> QP - def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), - (f128 (XSCVSDQP - (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; - def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))), - (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; - def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))), - (f128 (XSCVUDQP - (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; - def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))), - (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; - - // (Un)Signed Word vector extract -> QP - foreach Idx = [[0,3],[1,2],[3,0]] in { - def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))), - (f128 (XSCVSDQP (EXTRACT_SUBREG - (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)), - sub_64)))>; - } - def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))), - (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>; + /* LE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag LE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (XOR8 (LI8 3), $Idx), 2, 61))); + dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC); + dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE); - foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in { - def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))), - (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>; - } + /* LE variable double + Same as the LE doubleword except there is no move. + */ + dag LE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)), + (v16i8 (COPY_TO_REGCLASS $S, VRRC)), + LE_VDWORD_PERM_VEC)); + dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC); - // (Un)Signed HWord vector extract -> QP - // The Nested foreach lists identifies the vector element and corresponding - // register byte location. - foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in { - def : Pat<(f128 (sint_to_fp - (i32 (sext_inreg - (vector_extract v8i16:$src, !head(Idx)), i16)))), - (f128 (XSCVSDQP - (EXTRACT_SUBREG (VEXTSH2D - (VEXTRACTUH !head(!tail(Idx)), $src)), - sub_64)))>; - def : Pat<(f128 (uint_to_fp - (and (i32 (vector_extract v8i16:$src, !head(Idx))), - 65535))), - (f128 (XSCVUDQP (EXTRACT_SUBREG - (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>; - } + /* BE variable byte + The algorithm here is the same as the LE variable byte except: + - The shift in the VMX register is by 0/8 for opposite element numbers so + we simply AND the element number with 0x8 + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-7 + */ + dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDI8_rec $Idx, 8))); + dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC)); + dag BE_MV_VBYTE = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), + sub_64)); + dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), + sub_32); + dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), + sub_32); - // (Un)Signed Byte vector extract -> QP - foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7], - [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in { - def : Pat<(f128 (sint_to_fp - (i32 (sext_inreg - (vector_extract v16i8:$src, !head(Idx)), i8)))), - (f128 (XSCVSDQP - (EXTRACT_SUBREG - (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)), - sub_64)))>; - def : Pat<(f128 (uint_to_fp - (and (i32 (vector_extract v16i8:$src, !head(Idx))), - 255))), - (f128 (XSCVUDQP - (EXTRACT_SUBREG - (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>; - } + /* BE variable halfword + The algorithm here is the same as the LE variable halfword except: + - The shift in the VMX register is by 0/8 for opposite element numbers so + we simply AND the element number with 0x4 and multiply by 2 + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-3 + */ + dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (ANDI8_rec $Idx, 4), 1, 62))); + dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC)); + dag BE_MV_VHALF = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)), + sub_64)); + dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59), + sub_32); + dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT), + sub_32); - // Unsigned int in vsx register -> QP - def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), - (f128 (XSCVUDQP - (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>; - } // IsLittleEndian, HasP9Vector - - // Convert (Un)Signed DWord in memory -> QP - def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))), - (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>; - def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))), - (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>; - def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))), - (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>; - def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))), - (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>; - - // Convert Unsigned HWord in memory -> QP - def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)), - (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>; - - // Convert Unsigned Byte in memory -> QP - def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)), - (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; - - // Truncate & Convert QP -> (Un)Signed (D)Word. - def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; - def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; - def : Pat<(i32 (fp_to_sint f128:$src)), - (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>; - def : Pat<(i32 (fp_to_uint f128:$src)), - (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>; - - // Instructions for store(fptosi). - // The 8-byte version is repeated here due to availability of D-Form STXSD. - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8), - (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), - xaddrX4:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8), - (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), - iaddrX4:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4), - (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2), - (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1), - (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8), - (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8), - (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2), - (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1), - (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; - - // Instructions for store(fptoui). - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8), - (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), - xaddrX4:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8), - (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), - iaddrX4:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4), - (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2), - (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1), - (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8), - (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8), - (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2), - (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; - def : Pat<(PPCstore_scal_int_from_vsr - (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1), - (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; - - // Round & Convert QP -> DP/SP - def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>; - def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; - - // Convert SP -> QP - def : Pat<(f128 (fpextend f32:$src)), - (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>; - - def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)), - (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC), - (COPY_TO_REGCLASS $XB, VSSRC)), - VSSRC))>; - def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)), - (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC), - (COPY_TO_REGCLASS $XB, VSSRC)), - VSSRC))>; - -} // end HasP9Vector, AddedComplexity + /* BE variable word + The algorithm is the same as the LE variable word except: + - The shift in the VMX register happens for opposite element numbers + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-1 + */ + dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (ANDI8_rec $Idx, 2), 2, 61))); + dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC)); + dag BE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)), + sub_64)); + dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58), + sub_32); + dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT), + sub_32); -let AddedComplexity = 400 in { - let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsBigEndian] in { - def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)), - (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; - } - let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsLittleEndian] in { - def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)), - (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; - } -} + /* BE variable doubleword + Same as the LE doubleword except we shift in the VMX register for opposite + element indices. + */ + dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (ANDI8_rec $Idx, 1), 3, 60))); + dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC)); + dag BE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)), + sub_64)); -let Predicates = [HasP9Vector], hasSideEffects = 0 in { - let mayStore = 1 in { - def SPILLTOVSR_STX : PseudoXFormMemOp<(outs), - (ins spilltovsrrc:$XT, memrr:$dst), - "#SPILLTOVSR_STX", []>; - def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst), - "#SPILLTOVSR_ST", []>; - } - let mayLoad = 1 in { - def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT), - (ins memrr:$src), - "#SPILLTOVSR_LDX", []>; - def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src), - "#SPILLTOVSR_LD", []>; + /* BE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag BE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR $Idx, 2, 61))); + dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC); + dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE); + + /* BE variable double + Same as the BE doubleword except there is no move. + */ + dag BE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)), + (v16i8 (COPY_TO_REGCLASS $S, VRRC)), + BE_VDWORD_PERM_VEC)); + dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); +} - } +def AlignValues { + dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3)); + dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC); } + // Integer extend helper dags 32 -> 64 def AnyExts { dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32); @@ -4004,511 +2312,2234 @@ def LoadFP { dag D = (f32 (load xoaddr:$D)); } -// FP merge dags (for f32 -> v4f32) -def MrgFP { - dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC); - dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC); - dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC); - dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC); - dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC), - (COPY_TO_REGCLASS $C, VSRC), 0)); - dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC), - (COPY_TO_REGCLASS $D, VSRC), 0)); - dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0)); - dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3)); - dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0)); - dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3)); -} +// FP merge dags (for f32 -> v4f32) +def MrgFP { + dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC); + dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC); + dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC); + dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC); + dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $C, VSRC), 0)); + dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC), + (COPY_TO_REGCLASS $D, VSRC), 0)); + dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0)); + dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3)); + dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0)); + dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3)); +} + +// Word-element merge dags - conversions from f64 to i32 merged into vectors. +def MrgWords { + // For big endian, we merge low and hi doublewords (A, B). + dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0)); + dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3)); + dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1)); + dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0)); + dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1)); + dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0)); + + // For little endian, we merge low and hi doublewords (B, A). + dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0)); + dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3)); + dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1)); + dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0)); + dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1)); + dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0)); + + // For big endian, we merge hi doublewords of (A, C) and (B, D), convert + // then merge. + dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC), + (COPY_TO_REGCLASS f64:$C, VSRC), 0)); + dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC), + (COPY_TO_REGCLASS f64:$D, VSRC), 0)); + dag CVACS = (v4i32 (XVCVDPSXWS AC)); + dag CVBDS = (v4i32 (XVCVDPSXWS BD)); + dag CVACU = (v4i32 (XVCVDPUXWS AC)); + dag CVBDU = (v4i32 (XVCVDPUXWS BD)); + + // For little endian, we merge hi doublewords of (D, B) and (C, A), convert + // then merge. + dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC), + (COPY_TO_REGCLASS f64:$B, VSRC), 0)); + dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC), + (COPY_TO_REGCLASS f64:$A, VSRC), 0)); + dag CVDBS = (v4i32 (XVCVDPSXWS DB)); + dag CVCAS = (v4i32 (XVCVDPSXWS CA)); + dag CVDBU = (v4i32 (XVCVDPUXWS DB)); + dag CVCAU = (v4i32 (XVCVDPUXWS CA)); +} + +//---------------------------- Anonymous Patterns ----------------------------// +// Predicate combinations are kept in roughly chronological order in terms of +// instruction availability in the architecture. For example, VSX came in with +// ISA 2.06 (Power7). There have since been additions in ISA 2.07 (Power8) and +// ISA 3.0 (Power9). However, the granularity of features on later subtargets +// is finer for various reasons. For example, we have Power8Vector, +// Power8Altivec, DirectMove that all came in with ISA 2.07. The situation is +// similar with ISA 3.0 with Power9Vector, Power9Altivec, IsISA3_0. Then there +// are orthogonal predicates such as endianness for which the order was +// arbitrarily chosen to be Big, Little. +// +// Predicate combinations available: +// [HasVSX] +// [HasVSX, IsBigEndian] +// [HasVSX, IsLittleEndian] +// [HasVSX, NoP9Vector] +// [HasVSX, HasOnlySwappingMemOps] +// [HasVSX, HasOnlySwappingMemOps, IsBigEndian] +// [HasVSX, HasP8Vector] +// [HasVSX, HasP8Vector, IsBigEndian] +// [HasVSX, HasP8Vector, IsLittleEndian] +// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian] +// [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] +// [HasVSX, HasDirectMove] +// [HasVSX, HasDirectMove, IsBigEndian] +// [HasVSX, HasDirectMove, IsLittleEndian] +// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian] +// [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian] +// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian] +// [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] +// [HasVSX, HasP9Vector] +// [HasVSX, HasP9Vector, IsBigEndian] +// [HasVSX, HasP9Vector, IsLittleEndian] +// [HasVSX, HasP9Altivec] +// [HasVSX, HasP9Altivec, IsBigEndian] +// [HasVSX, HasP9Altivec, IsLittleEndian] +// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian] +// [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] + +let AddedComplexity = 400 in { +// Valid for any VSX subtarget, regardless of endianness. +let Predicates = [HasVSX] in { +def : Pat<(v4i32 (vnot_ppc v4i32:$A)), + (v4i32 (XXLNOR $A, $A))>; +def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A), + (and v4i32:$B, v4i32:$C))), + (v4i32 (XXSEL $A, $B, $C))>; + +// Additional fnmsub patterns: -a*b + c == -(a*b - c) +def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C), + (XSNMSUBADP $C, $A, $B)>; +def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C), + (XSNMSUBADP $C, $A, $B)>; + +def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C), + (XVNMSUBADP $C, $A, $B)>; +def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C), + (XVNMSUBADP $C, $A, $B)>; + +def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C), + (XVNMSUBASP $C, $A, $B)>; +def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C), + (XVNMSUBASP $C, $A, $B)>; + +def : Pat<(v2f64 (bitconvert v4f32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v4i32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v8i16:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v16i8:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; + +def : Pat<(v4f32 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2i64 (bitconvert v4f32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v4i32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v8i16:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v16i8:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; + +def : Pat<(v4f32 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2f64 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v2i64 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2f64 (bitconvert v1i128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v1i128 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2i64 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)), + (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>; +def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)), + (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>; + +def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)), + (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>; +def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)), + (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; + +def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>; +def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>; + +// Permutes. +def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>; +def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>; +def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; +def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; +def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; + +// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and +// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable. +def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), + (XXPERMDI $src, $src, 2)>; + +// Selects. +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), + (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)), + (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)), + (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)), + (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)), + (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)), + (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)), + (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)), + (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)), + (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)), + (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), + (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), + (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), + (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), + (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), + (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), + (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +// Divides. +def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B), + (XVDIVSP $A, $B)>; +def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), + (XVDIVDP $A, $B)>; + +// Reciprocal estimate +def : Pat<(int_ppc_vsx_xvresp v4f32:$A), + (XVRESP $A)>; +def : Pat<(int_ppc_vsx_xvredp v2f64:$A), + (XVREDP $A)>; + +// Recip. square root estimate +def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A), + (XVRSQRTESP $A)>; +def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A), + (XVRSQRTEDP $A)>; + +// Vector selection +def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)), + (COPY_TO_REGCLASS + (XXSEL (COPY_TO_REGCLASS $vC, VSRC), + (COPY_TO_REGCLASS $vB, VSRC), + (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; +def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)), + (COPY_TO_REGCLASS + (XXSEL (COPY_TO_REGCLASS $vC, VSRC), + (COPY_TO_REGCLASS $vB, VSRC), + (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; +def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC), + (XXSEL $vC, $vB, $vA)>; +def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC), + (XXSEL $vC, $vB, $vA)>; +def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC), + (XXSEL $vC, $vB, $vA)>; +def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC), + (XXSEL $vC, $vB, $vA)>; + +def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)), + (v4f32 (XVMAXSP $src1, $src2))>; +def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)), + (v4f32 (XVMINSP $src1, $src2))>; +def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)), + (v2f64 (XVMAXDP $src1, $src2))>; +def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)), + (v2f64 (XVMINDP $src1, $src2))>; + +// f32 Min. +def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)), + (f32 FpMinMax.F32Min)>; +def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)), + (f32 FpMinMax.F32Min)>; +def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))), + (f32 FpMinMax.F32Min)>; +def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))), + (f32 FpMinMax.F32Min)>; +// F32 Max. +def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)), + (f32 FpMinMax.F32Max)>; +def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)), + (f32 FpMinMax.F32Max)>; +def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))), + (f32 FpMinMax.F32Max)>; +def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))), + (f32 FpMinMax.F32Max)>; + +// f64 Min. +def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)), + (f64 (XSMINDP $A, $B))>; +def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)), + (f64 (XSMINDP $A, $B))>; +def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))), + (f64 (XSMINDP $A, $B))>; +def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), + (f64 (XSMINDP $A, $B))>; +// f64 Max. +def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)), + (f64 (XSMAXDP $A, $B))>; +def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)), + (f64 (XSMAXDP $A, $B))>; +def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))), + (f64 (XSMAXDP $A, $B))>; +def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), + (f64 (XSMAXDP $A, $B))>; + +def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), + (STXVD2X $rS, xoaddr:$dst)>; +def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), + (STXVW4X $rS, xoaddr:$dst)>; +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; + +// Rounding for single precision. +def : Pat<(f32 (fround f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPI + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(f32 (fnearbyint f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPIC + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(f32 (ffloor f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPIM + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(f32 (fceil f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPIP + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(f32 (ftrunc f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPIZ + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(f32 (frint f32:$S)), + (f32 (COPY_TO_REGCLASS (XSRDPIC + (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; +def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>; -// Word-element merge dags - conversions from f64 to i32 merged into vectors. -def MrgWords { - // For big endian, we merge low and hi doublewords (A, B). - dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0)); - dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3)); - dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1)); - dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0)); - dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1)); - dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0)); +// Rounding for double precision. +def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>; +def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>; - // For little endian, we merge low and hi doublewords (B, A). - dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0)); - dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3)); - dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1)); - dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0)); - dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1)); - dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0)); +// Materialize a zero-vector of long long +def : Pat<(v2i64 immAllZerosV), + (v2i64 (XXLXORz))>; - // For big endian, we merge hi doublewords of (A, C) and (B, D), convert - // then merge. - dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC), - (COPY_TO_REGCLASS f64:$C, VSRC), 0)); - dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC), - (COPY_TO_REGCLASS f64:$D, VSRC), 0)); - dag CVACS = (v4i32 (XVCVDPSXWS AC)); - dag CVBDS = (v4i32 (XVCVDPSXWS BD)); - dag CVACU = (v4i32 (XVCVDPUXWS AC)); - dag CVBDU = (v4i32 (XVCVDPUXWS BD)); +// Build vectors of floating point converted to i32. +def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A, + DblToInt.A, DblToInt.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>; +def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A, + DblToUInt.A, DblToUInt.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>; +def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)), + (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), + (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>; +def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)), + (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), + (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>; +def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; +def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; +def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), + (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; +def : Pat<(v2f64 (PPCldsplat xoaddr:$A)), + (v2f64 (LXVDSX xoaddr:$A))>; +def : Pat<(v2i64 (PPCldsplat xoaddr:$A)), + (v2i64 (LXVDSX xoaddr:$A))>; + +// Build vectors of floating point converted to i64. +def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>; +def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>; +def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)), + (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>; +def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)), + (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>; +} // HasVSX - // For little endian, we merge hi doublewords of (D, B) and (C, A), convert - // then merge. - dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC), - (COPY_TO_REGCLASS f64:$B, VSRC), 0)); - dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC), - (COPY_TO_REGCLASS f64:$A, VSRC), 0)); - dag CVDBS = (v4i32 (XVCVDPSXWS DB)); - dag CVCAS = (v4i32 (XVCVDPSXWS CA)); - dag CVDBU = (v4i32 (XVCVDPUXWS DB)); - dag CVCAU = (v4i32 (XVCVDPUXWS CA)); -} +// Any big endian VSX subtarget. +let Predicates = [HasVSX, IsBigEndian] in { +def : Pat<(v2f64 (scalar_to_vector f64:$A)), + (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>; -// Patterns for BUILD_VECTOR nodes. -let AddedComplexity = 400 in { +def : Pat<(f64 (extractelt v2f64:$S, 0)), + (f64 (EXTRACT_SUBREG $S, sub_64))>; +def : Pat<(f64 (extractelt v2f64:$S, 1)), + (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; - let Predicates = [HasVSX] in { - // Build vectors of floating point converted to i32. - def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A, - DblToInt.A, DblToInt.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>; - def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A, - DblToUInt.A, DblToUInt.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>; - def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)), - (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), - (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>; - def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)), - (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), - (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>; - def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; - def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; - def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), - (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; - def : Pat<(v2f64 (PPCldsplat xoaddr:$A)), - (v2f64 (LXVDSX xoaddr:$A))>; - def : Pat<(v2i64 (PPCldsplat xoaddr:$A)), - (v2i64 (LXVDSX xoaddr:$A))>; - - // Build vectors of floating point converted to i64. - def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), - (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>; - def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)), - (v2i64 (XXPERMDIs - (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>; - def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)), - (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>; - def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)), - (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>; - } +def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>; + +def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), + (v2f64 (XXPERMDI + (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $B, VSRC), 0))>; +// Using VMRGEW to assemble the final vector would be a lower latency +// solution. However, we choose to go with the slightly higher latency +// XXPERMDI for 2 reasons: +// 1. This is likely to occur in unrolled loops where regpressure is high, +// so we want to use the latter as it has access to all 64 VSX registers. +// 2. Using Altivec instructions in this sequence would likely cause the +// allocation of Altivec registers even for the loads which in turn would +// force the use of LXSIWZX for the loads, adding a cycle of latency to +// each of the loads which would otherwise be able to use LFIWZX. +def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)), + (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B), + (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>; +def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)), + (VMRGEW MrgFP.AC, MrgFP.BD)>; +def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, + DblToFlt.B0, DblToFlt.B1)), + (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>; + +// Convert 4 doubles to a vector of ints. +def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, + DblToInt.C, DblToInt.D)), + (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>; +def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, + DblToUInt.C, DblToUInt.D)), + (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>; +def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, + ExtDbl.B0S, ExtDbl.B1S)), + (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>; +def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, + ExtDbl.B0U, ExtDbl.B1U)), + (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$A, 1))))), + (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), + (f64 (fpextend (extractelt v4f32:$A, 0))))), + (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)), + (XVCVSPDP (XXMRGHW $A, $A)), 2))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$A, 2))))), + (v2f64 (XVCVSPDP $A))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), + (f64 (fpextend (extractelt v4f32:$A, 3))))), + (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))), + (f64 (fpextend (extractelt v4f32:$A, 3))))), + (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), + (f64 (fpextend (extractelt v4f32:$A, 2))))), + (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)), + (XVCVSPDP (XXMRGLW $A, $A)), 2))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$B, 0))))), + (v2f64 (XVCVSPDP (XXPERMDI $A, $B, 0)))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), + (f64 (fpextend (extractelt v4f32:$B, 3))))), + (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3), + (XXPERMDI $A, $B, 3), 1)))>; +def : Pat; +def : Pat; +def : Pat; +def : Pat; +} // HasVSX, IsBigEndian + +// Any little endian VSX subtarget. +let Predicates = [HasVSX, IsLittleEndian] in { +def : Pat<(v2f64 (scalar_to_vector f64:$A)), + (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64), + (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>; - let Predicates = [HasVSX, NoP9Vector] in { - // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads). - def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; - def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; - def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), - (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS - (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; - def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), - (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS - (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; - } +def : Pat<(f64 (extractelt v2f64:$S, 0)), + (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; +def : Pat<(f64 (extractelt v2f64:$S, 1)), + (f64 (EXTRACT_SUBREG $S, sub_64))>; - let Predicates = [IsBigEndian, HasP8Vector] in { - def : Pat; - def : Pat; - def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; - def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; - - // Elements in a register on a BE system are in order <0, 1, 2, 3>. - // The store instructions store the second word from the left. - // So to align element zero, we need to modulo-left-shift by 3 words. - // Similar logic applies for elements 2 and 3. - foreach Idx = [ [0,3], [2,1], [3,2] ] in { - def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), - (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; - def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), - (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; - } - } +def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; +def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; +def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; +def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; +def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; +def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; +def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; +def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; - let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in { - def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; - def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; - } - - // Big endian, available on all targets with VSX - let Predicates = [IsBigEndian, HasVSX] in { - def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), - (v2f64 (XXPERMDI - (COPY_TO_REGCLASS $A, VSRC), - (COPY_TO_REGCLASS $B, VSRC), 0))>; - // Using VMRGEW to assemble the final vector would be a lower latency - // solution. However, we choose to go with the slightly higher latency - // XXPERMDI for 2 reasons: - // 1. This is likely to occur in unrolled loops where regpressure is high, - // so we want to use the latter as it has access to all 64 VSX registers. - // 2. Using Altivec instructions in this sequence would likely cause the - // allocation of Altivec registers even for the loads which in turn would - // force the use of LXSIWZX for the loads, adding a cycle of latency to - // each of the loads which would otherwise be able to use LFIWZX. - def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)), - (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B), - (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>; - def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)), - (VMRGEW MrgFP.AC, MrgFP.BD)>; - def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, - DblToFlt.B0, DblToFlt.B1)), - (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>; - - // Convert 4 doubles to a vector of ints. - def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, - DblToInt.C, DblToInt.D)), - (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>; - def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, - DblToUInt.C, DblToUInt.D)), - (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>; - def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, - ExtDbl.B0S, ExtDbl.B1S)), - (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>; - def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, - ExtDbl.B0U, ExtDbl.B1U)), - (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), - (f64 (fpextend (extractelt v4f32:$A, 1))))), - (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), - (f64 (fpextend (extractelt v4f32:$A, 0))))), - (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)), - (XVCVSPDP (XXMRGHW $A, $A)), 2))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), - (f64 (fpextend (extractelt v4f32:$A, 2))))), - (v2f64 (XVCVSPDP $A))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), - (f64 (fpextend (extractelt v4f32:$A, 3))))), - (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))), - (f64 (fpextend (extractelt v4f32:$A, 3))))), - (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), - (f64 (fpextend (extractelt v4f32:$A, 2))))), - (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)), - (XVCVSPDP (XXMRGLW $A, $A)), 2))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), - (f64 (fpextend (extractelt v4f32:$B, 0))))), - (v2f64 (XVCVSPDP (XXPERMDI $A, $B, 0)))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), - (f64 (fpextend (extractelt v4f32:$B, 3))))), - (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3), - (XXPERMDI $A, $B, 3), 1)))>; - def : Pat; - def : Pat; - def : Pat; - def : Pat; - } +def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; + +// Little endian, available on all targets with VSX +def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), + (v2f64 (XXPERMDI + (COPY_TO_REGCLASS $B, VSRC), + (COPY_TO_REGCLASS $A, VSRC), 0))>; +// Using VMRGEW to assemble the final vector would be a lower latency +// solution. However, we choose to go with the slightly higher latency +// XXPERMDI for 2 reasons: +// 1. This is likely to occur in unrolled loops where regpressure is high, +// so we want to use the latter as it has access to all 64 VSX registers. +// 2. Using Altivec instructions in this sequence would likely cause the +// allocation of Altivec registers even for the loads which in turn would +// force the use of LXSIWZX for the loads, adding a cycle of latency to +// each of the loads which would otherwise be able to use LFIWZX. +def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)), + (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C), + (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>; +def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)), + (VMRGEW MrgFP.AC, MrgFP.BD)>; +def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, + DblToFlt.B0, DblToFlt.B1)), + (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>; + +// Convert 4 doubles to a vector of ints. +def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, + DblToInt.C, DblToInt.D)), + (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>; +def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, + DblToUInt.C, DblToUInt.D)), + (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>; +def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, + ExtDbl.B0S, ExtDbl.B1S)), + (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>; +def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, + ExtDbl.B0U, ExtDbl.B1U)), + (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$A, 1))))), + (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), + (f64 (fpextend (extractelt v4f32:$A, 0))))), + (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)), + (XVCVSPDP (XXMRGLW $A, $A)), 2))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$A, 2))))), + (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), + (f64 (fpextend (extractelt v4f32:$A, 3))))), + (v2f64 (XVCVSPDP $A))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))), + (f64 (fpextend (extractelt v4f32:$A, 3))))), + (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), + (f64 (fpextend (extractelt v4f32:$A, 2))))), + (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)), + (XVCVSPDP (XXMRGHW $A, $A)), 2))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), + (f64 (fpextend (extractelt v4f32:$B, 0))))), + (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $B, $A, 3), + (XXPERMDI $B, $A, 3), 1)))>; +def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), + (f64 (fpextend (extractelt v4f32:$B, 3))))), + (v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>; +def : Pat; +def : Pat; +def : Pat; +def : Pat; +} // HasVSX, IsLittleEndian + +// Any pre-Power9 VSX subtarget. +let Predicates = [HasVSX, NoP9Vector] in { +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8), + (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8), + (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>; + +// Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads). +def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; +def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; +def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), + (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS + (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; +def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), + (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS + (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; +} // HasVSX, NoP9Vector + +// Any VSX subtarget that only has loads and stores that load in big endian +// order regardless of endianness. This is really pre-Power9 subtargets. +let Predicates = [HasVSX, HasOnlySwappingMemOps] in { + def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; - let Predicates = [IsLittleEndian, HasP8Vector] in { - def : Pat; - def : Pat; - def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; - def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src), - (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; - - // Elements in a register on a LE system are in order <3, 2, 1, 0>. - // The store instructions store the second word from the left. - // So to align element 3, we need to modulo-left-shift by 3 words. - // Similar logic applies for elements 0 and 1. - foreach Idx = [ [0,2], [1,1], [3,3] ] in { - def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), - (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; - def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), - (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), - sub_64), xoaddr:$src)>; - } - } + // Stores. + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), + (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; +} // HasVSX, HasOnlySwappingMemOps - let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in { - def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), - xoaddr:$src)>; - def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; - def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), - (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; - } - - let Predicates = [IsLittleEndian, HasVSX] in { - // Little endian, available on all targets with VSX - def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), - (v2f64 (XXPERMDI - (COPY_TO_REGCLASS $B, VSRC), - (COPY_TO_REGCLASS $A, VSRC), 0))>; - // Using VMRGEW to assemble the final vector would be a lower latency - // solution. However, we choose to go with the slightly higher latency - // XXPERMDI for 2 reasons: - // 1. This is likely to occur in unrolled loops where regpressure is high, - // so we want to use the latter as it has access to all 64 VSX registers. - // 2. Using Altivec instructions in this sequence would likely cause the - // allocation of Altivec registers even for the loads which in turn would - // force the use of LXSIWZX for the loads, adding a cycle of latency to - // each of the loads which would otherwise be able to use LFIWZX. - def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)), - (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C), - (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>; - def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)), - (VMRGEW MrgFP.AC, MrgFP.BD)>; - def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, - DblToFlt.B0, DblToFlt.B1)), - (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>; - - // Convert 4 doubles to a vector of ints. - def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, - DblToInt.C, DblToInt.D)), - (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>; - def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, - DblToUInt.C, DblToUInt.D)), - (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>; - def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, - ExtDbl.B0S, ExtDbl.B1S)), - (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>; - def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, - ExtDbl.B0U, ExtDbl.B1U)), - (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), - (f64 (fpextend (extractelt v4f32:$A, 1))))), - (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), - (f64 (fpextend (extractelt v4f32:$A, 0))))), - (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)), - (XVCVSPDP (XXMRGLW $A, $A)), 2))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), - (f64 (fpextend (extractelt v4f32:$A, 2))))), - (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))), - (f64 (fpextend (extractelt v4f32:$A, 3))))), - (v2f64 (XVCVSPDP $A))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))), - (f64 (fpextend (extractelt v4f32:$A, 3))))), - (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), - (f64 (fpextend (extractelt v4f32:$A, 2))))), - (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)), - (XVCVSPDP (XXMRGHW $A, $A)), 2))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))), - (f64 (fpextend (extractelt v4f32:$B, 0))))), - (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $B, $A, 3), - (XXPERMDI $B, $A, 3), 1)))>; - def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))), - (f64 (fpextend (extractelt v4f32:$B, 3))))), - (v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>; - def : Pat; - def : Pat; - def : Pat; - def : Pat; - } +// Big endian VSX subtarget that only has loads and stores that always load +// in big endian order. Really big endian pre-Power9 subtargets. +let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in { + def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVW4X $rS, xoaddr:$dst)>; +} // HasVSX, HasOnlySwappingMemOps, IsBigEndian + +// Any Power8 VSX subtarget. +let Predicates = [HasVSX, HasP8Vector] in { +def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), + (XXLEQV $A, $B)>; +def : Pat<(f64 (extloadf32 xoaddr:$src)), + (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>; +def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))), + (f32 (XFLOADf32 xoaddr:$src))>; +def : Pat<(f64 (fpextend f32:$src)), + (COPY_TO_REGCLASS $src, VSFRC)>; + +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), + (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), + (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), + (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), + (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), + (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), + (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +// Additional xsnmsubasp patterns: -a*b + c == -(a*b - c) +def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C), + (XSNMSUBASP $C, $A, $B)>; +def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C), + (XSNMSUBASP $C, $A, $B)>; + +// Instructions for converting float to i32 feeding a store. +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4), + (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4), + (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + +def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)), + (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; +def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)), + (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; +def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)), + (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; +def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)), + (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; + +def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))), + (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; +def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))), + (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; +def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))), + (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; +def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))), + (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; +} // HasVSX, HasP8Vector + +// Big endian Power8 VSX subtarget. +let Predicates = [HasVSX, HasP8Vector, IsBigEndian] in { +def : Pat; +def : Pat; +def : Pat; +def : Pat; - let Predicates = [HasDirectMove] in { - // Endianness-neutral constant splat on P8 and newer targets. The reason - // for this pattern is that on targets with direct moves, we don't expand - // BUILD_VECTOR nodes for v4i32. - def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A, - immSExt5NonZero:$A, immSExt5NonZero:$A)), - (v4i32 (VSPLTISW imm:$A))>; - } +// v4f32 scalar <-> vector conversions (BE) +def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XSCVDPSPN $A))>; +def : Pat<(f32 (vector_extract v4f32:$S, 0)), + (f32 (XSCVSPDPN $S))>; +def : Pat<(f32 (vector_extract v4f32:$S, 1)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; +def : Pat<(f32 (vector_extract v4f32:$S, 2)), + (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; +def : Pat<(f32 (vector_extract v4f32:$S, 3)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; +def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.BE_VARIABLE_FLOAT)>; + +def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; +def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; +def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; +def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; +def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; +def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; +def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; +def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; + +// LIWAX - This instruction is used for sign extending i32 -> i64. +// LIWZX - This instruction will be emitted for i32, f32, and when +// zero-extending i32 to i64 (zext i32 -> i64). +def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; +def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (XXSLDWIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; +def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (XXSLDWIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + +def : Pat; +def : Pat; +def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; +def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + +// Elements in a register on a BE system are in order <0, 1, 2, 3>. +// The store instructions store the second word from the left. +// So to align element zero, we need to modulo-left-shift by 3 words. +// Similar logic applies for elements 2 and 3. +foreach Idx = [ [0,3], [2,1], [3,2] ] in { + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; +} +} // HasVSX, HasP8Vector, IsBigEndian + +// Little endian Power8 VSX subtarget. +let Predicates = [HasVSX, HasP8Vector, IsLittleEndian] in { +def : Pat; +def : Pat; +def : Pat; +def : Pat; - let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in { - // Big endian integer vectors using direct moves. - def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), - (v2i64 (XXPERMDI - (COPY_TO_REGCLASS (MTVSRD $A), VSRC), - (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>; - def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (XXPERMDI - (COPY_TO_REGCLASS - (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC), - (COPY_TO_REGCLASS - (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>; - def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), - (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; - } +// v4f32 scalar <-> vector conversions (LE) +def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>; +def : Pat<(f32 (vector_extract v4f32:$S, 0)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; +def : Pat<(f32 (vector_extract v4f32:$S, 1)), + (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; +def : Pat<(f32 (vector_extract v4f32:$S, 2)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; +def : Pat<(f32 (vector_extract v4f32:$S, 3)), + (f32 (XSCVSPDPN $S))>; +def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.LE_VARIABLE_FLOAT)>; + +def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; +def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; +def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; +def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; +def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; +def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; +def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; +def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; + +// LIWAX - This instruction is used for sign extending i32 -> i64. +// LIWZX - This instruction will be emitted for i32, f32, and when +// zero-extending i32 to i64 (zext i32 -> i64). +def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>; +def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>; +def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>; + +def : Pat; +def : Pat; +def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; +def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src), + (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; + +// Elements in a register on a LE system are in order <3, 2, 1, 0>. +// The store instructions store the second word from the left. +// So to align element 3, we need to modulo-left-shift by 3 words. +// Similar logic applies for elements 0 and 1. +foreach Idx = [ [0,2], [1,1], [3,3] ] in { + def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; + def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), + (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), + sub_64), xoaddr:$src)>; +} +} // HasVSX, HasP8Vector, IsLittleEndian + +// Big endian pre-Power9 VSX subtarget. +let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian] in { +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; +} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian + +// Little endian pre-Power9 VSX subtarget. +let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] in { +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + xoaddr:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; +} // HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian + +// Any VSX target with direct moves. +let Predicates = [HasVSX, HasDirectMove] in { +// bitconvert f32 -> i32 +// (convert to 32-bit fp single, shift right 1 word, move to GPR) +def : Pat<(i32 (bitconvert f32:$S)), + (i32 (MFVSRWZ (EXTRACT_SUBREG + (XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3), + sub_64)))>; +// bitconvert i32 -> f32 +// (move to FPR, shift left 1 word, convert to 64-bit fp single) +def : Pat<(f32 (bitconvert i32:$A)), + (f32 (XSCVSPDPN + (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>; - let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in { - // Little endian integer vectors using direct moves. - def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), - (v2i64 (XXPERMDI - (COPY_TO_REGCLASS (MTVSRD $B), VSRC), - (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>; - def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (XXPERMDI - (COPY_TO_REGCLASS - (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC), - (COPY_TO_REGCLASS - (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>; - def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), - (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; - } +// bitconvert f64 -> i64 +// (move to GPR, nothing else needed) +def : Pat<(i64 (bitconvert f64:$S)), + (i64 (MFVSRD $S))>; - let Predicates = [HasP8Vector] in { - def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))), - (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; - def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))), - (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; - def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))), - (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; - def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))), - (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; - } +// bitconvert i64 -> f64 +// (move to FPR, nothing else needed) +def : Pat<(f64 (bitconvert i64:$S)), + (f64 (MTVSRD $S))>; - let Predicates = [HasP9Vector] in { - // Endianness-neutral patterns for const splats with ISA 3.0 instructions. - def : Pat<(v4i32 (scalar_to_vector i32:$A)), - (v4i32 (MTVSRWS $A))>; - def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), - (v4i32 (MTVSRWS $A))>; - def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, - immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, - immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, - immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, - immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, - immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, - immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, - immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)), - (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; - def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), - (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; - def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), - (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; - def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>; - def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), - (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>; - def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), - (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddrX4:$A), - VSFRC)), 0))>; - def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), - (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS - (DFLOADf32 iaddrX4:$A), - VSFRC)), 0))>; - def : Pat<(v4f32 (PPCldsplat xoaddr:$A)), - (v4f32 (LXVWSX xoaddr:$A))>; - def : Pat<(v4i32 (PPCldsplat xoaddr:$A)), - (v4i32 (LXVWSX xoaddr:$A))>; - } +// Rounding to integer. +def : Pat<(i64 (lrint f64:$S)), + (i64 (MFVSRD (FCTID $S)))>; +def : Pat<(i64 (lrint f32:$S)), + (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>; +def : Pat<(i64 (llrint f64:$S)), + (i64 (MFVSRD (FCTID $S)))>; +def : Pat<(i64 (llrint f32:$S)), + (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>; +def : Pat<(i64 (lround f64:$S)), + (i64 (MFVSRD (FCTID (XSRDPI $S))))>; +def : Pat<(i64 (lround f32:$S)), + (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; +def : Pat<(i64 (llround f64:$S)), + (i64 (MFVSRD (FCTID (XSRDPI $S))))>; +def : Pat<(i64 (llround f32:$S)), + (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; - let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in { - def : Pat<(i64 (extractelt v2i64:$A, 1)), - (i64 (MFVSRLD $A))>; - // Better way to build integer vectors if we have MTVSRDD. Big endian. - def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)), - (v2i64 (MTVSRDD $rB, $rA))>; - def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (MTVSRDD - (RLDIMI AnyExts.B, AnyExts.A, 32, 0), - (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>; - } +// Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead +// of f64 +def : Pat<(v8i16 (PPCmtvsrz i32:$A)), + (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; +def : Pat<(v16i8 (PPCmtvsrz i32:$A)), + (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; + +// Endianness-neutral constant splat on P8 and newer targets. The reason +// for this pattern is that on targets with direct moves, we don't expand +// BUILD_VECTOR nodes for v4i32. +def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A, + immSExt5NonZero:$A, immSExt5NonZero:$A)), + (v4i32 (VSPLTISW imm:$A))>; +} // HasVSX, HasDirectMove + +// Big endian VSX subtarget with direct moves. +let Predicates = [HasVSX, HasDirectMove, IsBigEndian] in { +// v16i8 scalar <-> vector conversions (BE) +def : Pat<(v16i8 (scalar_to_vector i32:$A)), + (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>; +def : Pat<(v8i16 (scalar_to_vector i32:$A)), + (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>; +def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>; +def : Pat<(v2i64 (scalar_to_vector i64:$A)), + (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; + +// v2i64 scalar <-> vector conversions (BE) +def : Pat<(i64 (vector_extract v2i64:$S, 0)), + (i64 VectorExtractions.LE_DWORD_1)>; +def : Pat<(i64 (vector_extract v2i64:$S, 1)), + (i64 VectorExtractions.LE_DWORD_0)>; +def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.BE_VARIABLE_DWORD)>; +} // HasVSX, HasDirectMove, IsBigEndian + +// Little endian VSX subtarget with direct moves. +let Predicates = [HasVSX, HasDirectMove, IsLittleEndian] in { + // v16i8 scalar <-> vector conversions (LE) + def : Pat<(v16i8 (scalar_to_vector i32:$A)), + (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; + def : Pat<(v8i16 (scalar_to_vector i32:$A)), + (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; + def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 MovesToVSR.LE_WORD_0)>; + def : Pat<(v2i64 (scalar_to_vector i64:$A)), + (v2i64 MovesToVSR.LE_DWORD_0)>; + // v2i64 scalar <-> vector conversions (LE) + def : Pat<(i64 (vector_extract v2i64:$S, 0)), + (i64 VectorExtractions.LE_DWORD_0)>; + def : Pat<(i64 (vector_extract v2i64:$S, 1)), + (i64 VectorExtractions.LE_DWORD_1)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.LE_VARIABLE_DWORD)>; +} // HasVSX, HasDirectMove, IsLittleEndian + +// Big endian pre-P9 VSX subtarget with direct moves. +let Predicates = [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian] in { +def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 VectorExtractions.LE_BYTE_15)>; +def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 VectorExtractions.LE_BYTE_14)>; +def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 VectorExtractions.LE_BYTE_13)>; +def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 VectorExtractions.LE_BYTE_12)>; +def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 VectorExtractions.LE_BYTE_11)>; +def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 VectorExtractions.LE_BYTE_10)>; +def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 VectorExtractions.LE_BYTE_9)>; +def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 VectorExtractions.LE_BYTE_8)>; +def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 VectorExtractions.LE_BYTE_7)>; +def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 VectorExtractions.LE_BYTE_6)>; +def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 VectorExtractions.LE_BYTE_5)>; +def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 VectorExtractions.LE_BYTE_4)>; +def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 VectorExtractions.LE_BYTE_3)>; +def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 VectorExtractions.LE_BYTE_2)>; +def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 VectorExtractions.LE_BYTE_1)>; +def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 VectorExtractions.LE_BYTE_0)>; +def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_BYTE)>; + +// v8i16 scalar <-> vector conversions (BE) +def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 VectorExtractions.LE_HALF_7)>; +def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 VectorExtractions.LE_HALF_6)>; +def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 VectorExtractions.LE_HALF_5)>; +def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 VectorExtractions.LE_HALF_4)>; +def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 VectorExtractions.LE_HALF_3)>; +def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 VectorExtractions.LE_HALF_2)>; +def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 VectorExtractions.LE_HALF_1)>; +def : Pat<(i32 (vector_extract v8i16:$S, 7)), + (i32 VectorExtractions.LE_HALF_0)>; +def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_HALF)>; + +// v4i32 scalar <-> vector conversions (BE) +def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 VectorExtractions.LE_WORD_3)>; +def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 VectorExtractions.LE_WORD_2)>; +def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 VectorExtractions.LE_WORD_1)>; +def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 VectorExtractions.LE_WORD_0)>; +def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_WORD)>; +} // HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian + +// Little endian pre-P9 VSX subtarget with direct moves. +let Predicates = [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian] in { +def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 VectorExtractions.LE_BYTE_0)>; +def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 VectorExtractions.LE_BYTE_1)>; +def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 VectorExtractions.LE_BYTE_2)>; +def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 VectorExtractions.LE_BYTE_3)>; +def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 VectorExtractions.LE_BYTE_4)>; +def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 VectorExtractions.LE_BYTE_5)>; +def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 VectorExtractions.LE_BYTE_6)>; +def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 VectorExtractions.LE_BYTE_7)>; +def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 VectorExtractions.LE_BYTE_8)>; +def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 VectorExtractions.LE_BYTE_9)>; +def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 VectorExtractions.LE_BYTE_10)>; +def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 VectorExtractions.LE_BYTE_11)>; +def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 VectorExtractions.LE_BYTE_12)>; +def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 VectorExtractions.LE_BYTE_13)>; +def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 VectorExtractions.LE_BYTE_14)>; +def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 VectorExtractions.LE_BYTE_15)>; +def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_BYTE)>; + +// v8i16 scalar <-> vector conversions (LE) +def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 VectorExtractions.LE_HALF_0)>; +def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 VectorExtractions.LE_HALF_1)>; +def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 VectorExtractions.LE_HALF_2)>; +def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 VectorExtractions.LE_HALF_3)>; +def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 VectorExtractions.LE_HALF_4)>; +def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 VectorExtractions.LE_HALF_5)>; +def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 VectorExtractions.LE_HALF_6)>; +def : Pat<(i32 (vector_extract v8i16:$S, 7)), + (i32 VectorExtractions.LE_HALF_7)>; +def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_HALF)>; + +// v4i32 scalar <-> vector conversions (LE) +def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 VectorExtractions.LE_WORD_0)>; +def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 VectorExtractions.LE_WORD_1)>; +def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 VectorExtractions.LE_WORD_2)>; +def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 VectorExtractions.LE_WORD_3)>; +def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_WORD)>; +} // HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian + +// Big endian pre-Power9 VSX subtarget that has direct moves. +let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian] in { +// Big endian integer vectors using direct moves. +def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), + (v2i64 (XXPERMDI + (COPY_TO_REGCLASS (MTVSRD $A), VSRC), + (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>; +def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (XXPERMDI + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC), + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>; +def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), + (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; +} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian + +// Little endian pre-Power9 VSX subtarget that has direct moves. +let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] in { +// Little endian integer vectors using direct moves. +def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), + (v2i64 (XXPERMDI + (COPY_TO_REGCLASS (MTVSRD $B), VSRC), + (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>; +def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (XXPERMDI + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC), + (COPY_TO_REGCLASS + (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>; +def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), + (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; +} - let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in { - def : Pat<(i64 (extractelt v2i64:$A, 0)), - (i64 (MFVSRLD $A))>; - // Better way to build integer vectors if we have MTVSRDD. Little endian. - def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)), - (v2i64 (MTVSRDD $rB, $rA))>; - def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), - (MTVSRDD - (RLDIMI AnyExts.C, AnyExts.D, 32, 0), - (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>; - } - // P9 Altivec instructions that can be used to build vectors. - // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete - // with complexities of existing build vector patterns in this file. - let Predicates = [HasP9Altivec, IsLittleEndian] in { - def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)), - (v2i64 (VEXTSW2D $A))>; - def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)), - (v2i64 (VEXTSH2D $A))>; - def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1, - HWordToWord.LE_A2, HWordToWord.LE_A3)), - (v4i32 (VEXTSH2W $A))>; - def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1, - ByteToWord.LE_A2, ByteToWord.LE_A3)), - (v4i32 (VEXTSB2W $A))>; - def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)), - (v2i64 (VEXTSB2D $A))>; - } +// Any Power9 VSX subtarget. +let Predicates = [HasVSX, HasP9Vector] in { +// Additional fnmsub patterns: -a*b + c == -(a*b - c) +def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>; +def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>; + +def : Pat<(f128 (sint_to_fp i64:$src)), + (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; +def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))), + (f128 (XSCVSDQP $src))>; +def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))), + (f128 (XSCVSDQP (VEXTSW2Ds $src)))>; +def : Pat<(f128 (uint_to_fp i64:$src)), + (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; +def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))), + (f128 (XSCVUDQP $src))>; + +// Convert (Un)Signed Word -> QP. +def : Pat<(f128 (sint_to_fp i32:$src)), + (f128 (XSCVSDQP (MTVSRWA $src)))>; +def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))), + (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>; +def : Pat<(f128 (uint_to_fp i32:$src)), + (f128 (XSCVUDQP (MTVSRWZ $src)))>; +def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))), + (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>; + +// Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a +// separate pattern so that it can convert the input register class from +// VRRC(v8i16) to VSRC. +def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)), + (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>; + +// Use current rounding mode +def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; +// Round to nearest, ties away from zero +def : Pat<(f128 (fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>; +// Round towards Zero +def : Pat<(f128 (ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>; +// Round towards +Inf +def : Pat<(f128 (fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>; +// Round towards -Inf +def : Pat<(f128 (ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>; +// Use current rounding mode, [with Inexact] +def : Pat<(f128 (frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>; + +def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)), + (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>; + +def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)), + (i64 (MFVSRD (EXTRACT_SUBREG + (v2i64 (XSXEXPQP $vA)), sub_64)))>; + +// Extra patterns expanding to vector Extract Word/Insert Word +def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)), + (v4i32 (XXINSERTW $A, $B, imm:$IMM))>; +def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)), + (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>; + +// Vector Reverse +def : Pat<(v8i16 (bswap v8i16 :$A)), + (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; +def : Pat<(v1i128 (bswap v1i128 :$A)), + (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + +// D-Form Load/Store +def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; +def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; +def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; +def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; +def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)), + (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>; +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>; + +def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst), + (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>; +def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; +def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst), + (STXV $rS, memrix16:$dst)>; +def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst), + (STXV $rS, memrix16:$dst)>; + +def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; +def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; +def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; +def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; +def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)), + (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>; +def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst), + (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; +def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; +def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; +def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; +def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; +def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; +def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + +// Build vectors from i8 loads +def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)), + (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>; +def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)), + (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>; +def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)), + (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>; +def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)), + (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>; +def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)), + (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>; +def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)), + (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>; + +// Build vectors from i16 loads +def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)), + (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>; +def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)), + (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>; +def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)), + (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>; +def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)), + (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>; +def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), + (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; + +// Load/convert and convert/store patterns for f16. +def : Pat<(f64 (extloadf16 xoaddr:$src)), + (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>; +def : Pat<(truncstoref16 f64:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>; +def : Pat<(f32 (extloadf16 xoaddr:$src)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; +def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; + +// Vector sign extensions +def : Pat<(f64 (PPCVexts f64:$A, 1)), + (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>; +def : Pat<(f64 (PPCVexts f64:$A, 2)), + (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>; + +def : Pat<(f64 (extloadf32 iaddrX4:$src)), + (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>; +def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))), + (f32 (DFLOADf32 iaddrX4:$src))>; + +def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)), + (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>; +def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)), + (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>; + +// Convert (Un)Signed DWord in memory -> QP +def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))), + (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>; +def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))), + (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>; +def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))), + (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>; +def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))), + (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>; + +// Convert Unsigned HWord in memory -> QP +def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)), + (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>; + +// Convert Unsigned Byte in memory -> QP +def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)), + (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; + +// Truncate & Convert QP -> (Un)Signed (D)Word. +def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; +def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; +def : Pat<(i32 (fp_to_sint f128:$src)), + (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>; +def : Pat<(i32 (fp_to_uint f128:$src)), + (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>; + +// Instructions for store(fptosi). +// The 8-byte version is repeated here due to availability of D-Form STXSD. +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8), + (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), + xaddrX4:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8), + (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), + iaddrX4:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4), + (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2), + (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1), + (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8), + (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8), + (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2), + (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1), + (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + +// Instructions for store(fptoui). +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8), + (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), + xaddrX4:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8), + (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), + iaddrX4:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4), + (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2), + (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1), + (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8), + (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8), + (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2), + (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; +def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1), + (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + +// Round & Convert QP -> DP/SP +def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>; +def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; + +// Convert SP -> QP +def : Pat<(f128 (fpextend f32:$src)), + (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>; + +def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)), + (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC), + (COPY_TO_REGCLASS $XB, VSSRC)), + VSSRC))>; +def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)), + (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC), + (COPY_TO_REGCLASS $XB, VSSRC)), + VSSRC))>; + +// Endianness-neutral patterns for const splats with ISA 3.0 instructions. +def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 (MTVSRWS $A))>; +def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), + (v4i32 (MTVSRWS $A))>; +def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)), + (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; +def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), + (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; +def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), + (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; +def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>; +def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>; +def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), + (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS + (DFLOADf32 iaddrX4:$A), + VSFRC)), 0))>; +def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), + (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS + (DFLOADf32 iaddrX4:$A), + VSFRC)), 0))>; +def : Pat<(v4f32 (PPCldsplat xoaddr:$A)), + (v4f32 (LXVWSX xoaddr:$A))>; +def : Pat<(v4i32 (PPCldsplat xoaddr:$A)), + (v4i32 (LXVWSX xoaddr:$A))>; +} // HasVSX, HasP9Vector + +// Big endian Power9 subtarget. +let Predicates = [HasVSX, HasP9Vector, IsBigEndian] in { +def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; +def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; +def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; +def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; +def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; +def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; +def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; +def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; +def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; +def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; +def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; +def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; +def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; +def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; +def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; +def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; + +// Scalar stores of i8 +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; + +// Scalar stores of i16 +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; + +def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), + (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), + (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; + +def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), + (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; +def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), + (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddrX4:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddrX4:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), iaddrX4:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), iaddrX4:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; + +// (Un)Signed DWord vector extract -> QP +def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), + (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; +def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))), + (f128 (XSCVSDQP + (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; +def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))), + (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; +def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))), + (f128 (XSCVUDQP + (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; + +// (Un)Signed Word vector extract -> QP +def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))), + (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>; +foreach Idx = [0,2,3] in { + def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))), + (f128 (XSCVSDQP (EXTRACT_SUBREG + (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>; +} +foreach Idx = 0-3 in { + def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))), + (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>; +} - let Predicates = [HasP9Altivec, IsBigEndian] in { - def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)), - (v2i64 (VEXTSW2D $A))>; - def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)), - (v2i64 (VEXTSH2D $A))>; - def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1, - HWordToWord.BE_A2, HWordToWord.BE_A3)), - (v4i32 (VEXTSH2W $A))>; - def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1, - ByteToWord.BE_A2, ByteToWord.BE_A3)), - (v4i32 (VEXTSB2W $A))>; - def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)), - (v2i64 (VEXTSB2D $A))>; - } +// (Un)Signed HWord vector extract -> QP +foreach Idx = 0-7 in { + def : Pat<(f128 (sint_to_fp + (i32 (sext_inreg + (vector_extract v8i16:$src, Idx), i16)))), + (f128 (XSCVSDQP (EXTRACT_SUBREG + (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)), + sub_64)))>; + // The SDAG adds the `and` since an `i16` is being extracted as an `i32`. + def : Pat<(f128 (uint_to_fp + (and (i32 (vector_extract v8i16:$src, Idx)), 65535))), + (f128 (XSCVUDQP (EXTRACT_SUBREG + (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>; } -// Put this P9Altivec related definition here since it's possible to be -// selected to VSX instruction xvnegsp, avoid possible undef. -let Predicates = [HasP9Altivec] in { +// (Un)Signed Byte vector extract -> QP +foreach Idx = 0-15 in { + def : Pat<(f128 (sint_to_fp + (i32 (sext_inreg (vector_extract v16i8:$src, Idx), + i8)))), + (f128 (XSCVSDQP (EXTRACT_SUBREG + (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>; + def : Pat<(f128 (uint_to_fp + (and (i32 (vector_extract v16i8:$src, Idx)), 255))), + (f128 (XSCVUDQP + (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>; +} - def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))), - (v4i32 (VABSDUW $A, $B))>; +// Unsiged int in vsx register -> QP +def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), + (f128 (XSCVUDQP + (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>; +} // HasVSX, HasP9Vector, IsBigEndian + +// Little endian Power9 subtarget. +let Predicates = [HasVSX, HasP9Vector, IsLittleEndian] in { +def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; +def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; +def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; +def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; +def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; +def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; +def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; +def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; +def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; +def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; +def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; +def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; +def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; +def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; +def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; +def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; + +def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)), + (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>; +def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst), + (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; + +def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)), + (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>; +def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst), + (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; + +// Scalar stores of i8 +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), + (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; + +// Scalar stores of i16 +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; +def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), + (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; + +def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>; +def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>; + +def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>; +def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>; +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddrX4:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), xaddrX4:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), + (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), + sub_64), iaddrX4:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), + iaddrX4:$src)>; +def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; +def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), + (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; + +// (Un)Signed DWord vector extract -> QP +def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), + (f128 (XSCVSDQP + (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; +def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))), + (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; +def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))), + (f128 (XSCVUDQP + (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; +def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))), + (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; + +// (Un)Signed Word vector extract -> QP +foreach Idx = [[0,3],[1,2],[3,0]] in { + def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))), + (f128 (XSCVSDQP (EXTRACT_SUBREG + (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)), + sub_64)))>; +} +def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))), + (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>; - def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))), - (v8i16 (VABSDUH $A, $B))>; +foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in { + def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))), + (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>; +} - def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))), - (v16i8 (VABSDUB $A, $B))>; +// (Un)Signed HWord vector extract -> QP +// The Nested foreach lists identifies the vector element and corresponding +// register byte location. +foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in { + def : Pat<(f128 (sint_to_fp + (i32 (sext_inreg + (vector_extract v8i16:$src, !head(Idx)), i16)))), + (f128 (XSCVSDQP + (EXTRACT_SUBREG (VEXTSH2D + (VEXTRACTUH !head(!tail(Idx)), $src)), + sub_64)))>; + def : Pat<(f128 (uint_to_fp + (and (i32 (vector_extract v8i16:$src, !head(Idx))), + 65535))), + (f128 (XSCVUDQP (EXTRACT_SUBREG + (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>; +} - // As PPCVABSD description, the last operand indicates whether do the - // sign bit flip. - def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))), - (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>; +// (Un)Signed Byte vector extract -> QP +foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7], + [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in { + def : Pat<(f128 (sint_to_fp + (i32 (sext_inreg + (vector_extract v16i8:$src, !head(Idx)), i8)))), + (f128 (XSCVSDQP + (EXTRACT_SUBREG + (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)), + sub_64)))>; + def : Pat<(f128 (uint_to_fp + (and (i32 (vector_extract v16i8:$src, !head(Idx))), + 255))), + (f128 (XSCVUDQP + (EXTRACT_SUBREG + (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>; } + +// Unsiged int in vsx register -> QP +def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), + (f128 (XSCVUDQP + (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>; +} // HasVSX, HasP9Vector, IsLittleEndian + +// Any Power9 VSX subtarget that supports Power9 Altivec. +let Predicates = [HasVSX, HasP9Altivec] in { +// Put this P9Altivec related definition here since it's possible to be +// selected to VSX instruction xvnegsp, avoid possible undef. +def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))), + (v4i32 (VABSDUW $A, $B))>; + +def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))), + (v8i16 (VABSDUH $A, $B))>; + +def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))), + (v16i8 (VABSDUB $A, $B))>; + +// As PPCVABSD description, the last operand indicates whether do the +// sign bit flip. +def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))), + (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>; +} // HasVSX, HasP9Altivec + +// Big endian Power9 VSX subtargets with P9 Altivec support. +let Predicates = [HasVSX, HasP9Altivec, IsBigEndian] in { +def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBLX $Idx, $S)>; + +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHLX (LI8 0), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHLX (LI8 2), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHLX (LI8 4), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHLX (LI8 6), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHLX (LI8 8), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHLX (LI8 10), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHLX (LI8 12), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHLX (LI8 14), $S)>; + +def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>; +def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWLX (LI8 0), $S)>; + +// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX +def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32)>; +def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (VEXTUWLX (LI8 8), $S)>; +def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWLX (LI8 12), $S)>; + +def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>; +def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWLX (LI8 0), $S))>; +// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX +def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32))>; +def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (VEXTUWLX (LI8 8), $S))>; +def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWLX (LI8 12), $S))>; + +def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>; + +def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUHLX + (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>; + +def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUWLX + (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; +def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>; +// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX +def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 VectorExtractions.LE_WORD_2)>; +def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>; +def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>; + +// P9 Altivec instructions that can be used to build vectors. +// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete +// with complexities of existing build vector patterns in this file. +def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)), + (v2i64 (VEXTSW2D $A))>; +def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)), + (v2i64 (VEXTSH2D $A))>; +def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1, + HWordToWord.BE_A2, HWordToWord.BE_A3)), + (v4i32 (VEXTSH2W $A))>; +def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1, + ByteToWord.BE_A2, ByteToWord.BE_A3)), + (v4i32 (VEXTSB2W $A))>; +def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)), + (v2i64 (VEXTSB2D $A))>; +} // HasVSX, HasP9Altivec, IsBigEndian + +// Little endian Power9 VSX subtargets with P9 Altivec support. +let Predicates = [HasVSX, HasP9Altivec, IsLittleEndian] in { +def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBRX $Idx, $S)>; + +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHRX (LI8 0), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHRX (LI8 2), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHRX (LI8 4), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHRX (LI8 6), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHRX (LI8 8), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHRX (LI8 10), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHRX (LI8 12), $S)>; +def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHRX (LI8 14), $S)>; + +def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>; +def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWRX (LI8 0), $S)>; +def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (VEXTUWRX (LI8 4), $S)>; +// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX +def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32)>; +def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWRX (LI8 12), $S)>; + +def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>; +def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWRX (LI8 0), $S))>; +def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (VEXTUWRX (LI8 4), $S))>; +// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX +def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32))>; +def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWRX (LI8 12), $S))>; + +def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>; +def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>; + +def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUHRX + (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>; +def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>; + +def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUWRX + (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; +def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>; +def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>; +// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX +def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 VectorExtractions.LE_WORD_2)>; +def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>; + +// P9 Altivec instructions that can be used to build vectors. +// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete +// with complexities of existing build vector patterns in this file. +def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)), + (v2i64 (VEXTSW2D $A))>; +def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)), + (v2i64 (VEXTSH2D $A))>; +def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1, + HWordToWord.LE_A2, HWordToWord.LE_A3)), + (v4i32 (VEXTSH2W $A))>; +def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1, + ByteToWord.LE_A2, ByteToWord.LE_A3)), + (v4i32 (VEXTSB2W $A))>; +def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)), + (v2i64 (VEXTSB2D $A))>; +} // HasVSX, HasP9Altivec, IsLittleEndian + +// Big endian VSX subtarget that supports additional direct moves from ISA3.0. +let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian] in { +def : Pat<(i64 (extractelt v2i64:$A, 1)), + (i64 (MFVSRLD $A))>; +// Better way to build integer vectors if we have MTVSRDD. Big endian. +def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)), + (v2i64 (MTVSRDD $rB, $rA))>; +def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (MTVSRDD + (RLDIMI AnyExts.B, AnyExts.A, 32, 0), + (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>; + +def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)), + (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; +} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian + +// Little endian VSX subtarget that supports direct moves from ISA3.0. +let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] in { +def : Pat<(i64 (extractelt v2i64:$A, 0)), + (i64 (MFVSRLD $A))>; +// Better way to build integer vectors if we have MTVSRDD. Little endian. +def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)), + (v2i64 (MTVSRDD $rB, $rA))>; +def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (MTVSRDD + (RLDIMI AnyExts.C, AnyExts.D, 32, 0), + (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>; + +def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)), + (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; +} // HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian +} // AddedComplexity = 400 + +//---------------------------- Instruction aliases ---------------------------// +def : InstAlias<"xvmovdp $XT, $XB", + (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; +def : InstAlias<"xvmovsp $XT, $XB", + (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; + +def : InstAlias<"xxspltd $XT, $XB, 0", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>; +def : InstAlias<"xxspltd $XT, $XB, 1", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>; +def : InstAlias<"xxmrghd $XT, $XA, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>; +def : InstAlias<"xxmrgld $XT, $XA, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>; +def : InstAlias<"xxswapd $XT, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>; +def : InstAlias<"xxspltd $XT, $XB, 0", + (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>; +def : InstAlias<"xxspltd $XT, $XB, 1", + (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>; +def : InstAlias<"xxswapd $XT, $XB", + (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>; +def : InstAlias<"mfvrd $rA, $XT", + (MFVRD g8rc:$rA, vrrc:$XT), 0>; +def : InstAlias<"mffprd $rA, $src", + (MFVSRD g8rc:$rA, f8rc:$src)>; +def : InstAlias<"mtvrd $XT, $rA", + (MTVRD vrrc:$XT, g8rc:$rA), 0>; +def : InstAlias<"mtfprd $dst, $rA", + (MTVSRD f8rc:$dst, g8rc:$rA)>; +def : InstAlias<"mfvrwz $rA, $XT", + (MFVRWZ gprc:$rA, vrrc:$XT), 0>; +def : InstAlias<"mffprwz $rA, $src", + (MFVSRWZ gprc:$rA, f8rc:$src)>; +def : InstAlias<"mtvrwa $XT, $rA", + (MTVRWA vrrc:$XT, gprc:$rA), 0>; +def : InstAlias<"mtfprwa $dst, $rA", + (MTVSRWA f8rc:$dst, gprc:$rA)>; +def : InstAlias<"mtvrwz $XT, $rA", + (MTVRWZ vrrc:$XT, gprc:$rA), 0>; +def : InstAlias<"mtfprwz $dst, $rA", + (MTVSRWZ f8rc:$dst, gprc:$rA)>; diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index add4de24275f2..236f98f32e18e 100644 --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -86,14 +86,22 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL; const MachineInstr *MI = MO.getParent(); - - if (MI->getOpcode() == PPC::BL8_NOTOC) - RefKind = MCSymbolRefExpr::VK_PPC_NOTOC; - const MachineFunction *MF = MI->getMF(); const Module *M = MF->getFunction().getParent(); const PPCSubtarget *Subtarget = &(MF->getSubtarget()); const TargetMachine &TM = Printer.TM; + + unsigned MIOpcode = MI->getOpcode(); + assert((Subtarget->isUsingPCRelativeCalls() || MIOpcode != PPC::BL8_NOTOC) && + "BL8_NOTOC is only valid when using PC Relative Calls."); + if (Subtarget->isUsingPCRelativeCalls()) { + if (MIOpcode == PPC::TAILB || MIOpcode == PPC::TAILB8 || + MIOpcode == PPC::TCRETURNdi || MIOpcode == PPC::TCRETURNdi8 || + MIOpcode == PPC::BL8_NOTOC) { + RefKind = MCSymbolRefExpr::VK_PPC_NOTOC; + } + } + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx); // If -msecure-plt -fPIC, add 32768 to symbol. if (Subtarget->isSecurePlt() && TM.isPositionIndependent() && diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index ff2329ada82ff..1c7c73803c1e9 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -57,8 +57,6 @@ STATISTIC(NumRotatesCollapsed, "Number of pairs of rotate left, clear left/right collapsed"); STATISTIC(NumEXTSWAndSLDICombined, "Number of pairs of EXTSW and SLDI combined as EXTSWSLI"); -STATISTIC(NumX2FoundForPCRel, "Number of times the X2 TOC pointer has been " - "found when PC relative NOTOC is being used."); static cl::opt FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), @@ -101,11 +99,6 @@ struct PPCMIPeephole : public MachineFunctionPass { // Initialize class variables. void initialize(MachineFunction &MFParm); - // Perform peepholes that cannot be skipped. - // Some peephole simplifications are required for correctness and will not - // be skipped even if skipFunction(MF.getFunction()) returns true. - void unskipableSimplifyCode(void); - // Perform peepholes. bool simplifyCode(void); @@ -132,11 +125,11 @@ struct PPCMIPeephole : public MachineFunctionPass { // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { initialize(MF); - // FIXME: This introduces another complete traversal of the instructions - // in the function in the common case (function is not skipped). Although - // this is less than ideal for compile time, this code will go away once - // our PC-Rel implementation is complete. - unskipableSimplifyCode(); + // At this point, TOC pointer should not be used in a function that uses + // PC-Relative addressing. + assert((MF.getRegInfo().use_empty(PPC::X2) || + !MF.getSubtarget().isUsingPCRelativeCalls()) && + "TOC pointer used in a function using PC-Relative addressing!"); if (skipFunction(MF.getFunction())) return false; return simplifyCode(); @@ -272,41 +265,6 @@ void PPCMIPeephole::UpdateTOCSaves( TOCSaves[MI] = Keep; } -void PPCMIPeephole::unskipableSimplifyCode(void) { - // If this function has no uses of R2 there is nothing to do here. - if(MF->getRegInfo().use_empty(PPC::X2)) - return; - - // This is only for PCRelative calls. - if (!MF->getSubtarget().isUsingPCRelativeCalls()) { - return; - } - - // This function has R2 so we need to mark an implicit def for it. - PPCFunctionInfo *FuncInfo = MF->getInfo(); - FuncInfo->setUsesTOCBasePtr(); - for (MachineBasicBlock &MBB : *MF) { - for (MachineInstr &MI : MBB) { - if (MI.getOpcode() == PPC::BL8_NOTOC) { - // At this point the BL8_NOTOC instruction is not really safe because it - // assumes that the caller does not need the TOC. It will be safe - // later once the full PC relative implementation is complete but it is - // not now. - // Here we are looking for X2. Since this is Pre-RA the only uses of X2 - // would indicate the use of the TOC. We want to detect all uses of the - // TOC. Once the work is done we should not see any uses of the TOC. - // TODO: Once the implementation is complete this should be turned into - // an assert - Register Reg = MF->getSubtarget().getTOCPointerRegister(); - MachineOperand MO = MachineOperand::CreateReg(Reg, false, true); - MI.addOperand(*MF, MO); - MI.setDesc(TII->get(PPC::BL8_NOP)); - ++NumX2FoundForPCRel; - } - } - } -} - // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 85555b109a3fb..698e2d48b53a7 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -59,9 +59,10 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) { return TTI::PSK_Software; } -int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { if (DisablePPCConstHoist) - return BaseT::getIntImmCost(Imm, Ty); + return BaseT::getIntImmCost(Imm, Ty, CostKind); assert(Ty->isIntegerTy()); @@ -89,9 +90,10 @@ int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { } int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { if (DisablePPCConstHoist) - return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty); + return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); assert(Ty->isIntegerTy()); @@ -119,13 +121,14 @@ int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, return TTI::TCC_Free; break; } - return PPCTTIImpl::getIntImmCost(Imm, Ty); + return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind); } int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { if (DisablePPCConstHoist) - return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty); + return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind); assert(Ty->isIntegerTy()); @@ -203,18 +206,19 @@ int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, return TTI::TCC_Free; } - return PPCTTIImpl::getIntImmCost(Imm, Ty); + return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind); } -unsigned PPCTTIImpl::getUserCost(const User *U, - ArrayRef Operands) { +unsigned +PPCTTIImpl::getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind) { if (U->getType()->isVectorTy()) { // Instructions that need to be split should cost more. std::pair LT = TLI->getTypeLegalizationCost(DL, U->getType()); - return LT.first * BaseT::getUserCost(U, Operands); + return LT.first * BaseT::getUserCost(U, Operands, CostKind); } - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); } bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, @@ -271,7 +275,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, J != JE; ++J) { if (CallInst *CI = dyn_cast(J)) { // Inline ASM is okay, unless it clobbers the ctr register. - if (InlineAsm *IA = dyn_cast(CI->getCalledValue())) { + if (InlineAsm *IA = dyn_cast(CI->getCalledOperand())) { if (asmClobbersCTR(IA)) return true; continue; @@ -719,6 +723,7 @@ int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, } int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -728,7 +733,8 @@ int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); // Fallback to the default implementation. - int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, + Op2Info, Opd1PropInfo, Opd2PropInfo); return vectorCostAdjustment(Cost, Opcode, Ty, nullptr); } @@ -748,16 +754,18 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, } int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); - int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src); + int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); return vectorCostAdjustment(Cost, Opcode, Dst, Src); } int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) { - int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr); } @@ -836,13 +844,15 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); - int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr); bool IsAltivecType = ST->hasAltivec() && @@ -912,11 +922,12 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); assert(isa(VecTy) && @@ -927,7 +938,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, // Firstly, the cost of load/store operation. int Cost = - getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace); + getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, + CostKind); // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations // (at least in the sense that there need only be one non-loop-invariant @@ -942,19 +954,21 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF, + TTI::TargetCostKind CostKind, const Instruction *I) { - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { if (ID == Intrinsic::bswap && ST->hasP9Vector()) return TLI->getTypeLegalizationCost(DL, RetTy).first; return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 36ed6ff2e89f0..74fbf2dec6591 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -45,14 +45,16 @@ class PPCTTIImpl : public BasicTTIImplBase { /// @{ using BaseT::getIntImmCost; - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); - unsigned getUserCost(const User *U, ArrayRef Operands); + unsigned getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, @@ -90,6 +92,7 @@ class PPCTTIImpl : public BasicTTIImplBase { int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2); int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -98,25 +101,34 @@ class PPCTTIImpl : public BasicTTIImplBase { const Instruction *CxtI = nullptr); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + unsigned getIntrinsicInstrCost( + Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I = nullptr); + unsigned VF, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr); /// @} diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 0f284710f79cc..e239870662bb0 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1177,6 +1177,8 @@ RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) { return MatchOperand_ParseFail; auto SysReg = RISCVSysReg::lookupSysRegByName(Identifier); + if (!SysReg) + SysReg = RISCVSysReg::lookupSysRegByAltName(Identifier); // Accept a named Sys Reg if the required features are present. if (SysReg) { if (!SysReg->haveRequiredFeatures(getSTI().getFeatureBits())) { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 87ad3956bcc5a..0927a3e2ed0fe 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -116,6 +116,7 @@ class RISCVTargetLowering : public TargetLowering { bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { return VT.isScalarInteger(); } + bool convertSelectOfConstantsToMath(EVT VT) const override { return true; } bool shouldInsertFencesForAtomic(const Instruction *I) const override { return isa(I) || isa(I); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index d32c5f37d630b..6c36f53cd563d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -306,11 +306,15 @@ def : PatFpr64Fpr64; def : Pat<(seto FPR64:$rs1, FPR64:$rs2), (AND (FEQ_D FPR64:$rs1, FPR64:$rs1), (FEQ_D FPR64:$rs2, FPR64:$rs2))>; +def : Pat<(seto FPR64:$rs1, FPR64:$rs1), + (FEQ_D $rs1, $rs1)>; def : Pat<(setuo FPR64:$rs1, FPR64:$rs2), (SLTIU (AND (FEQ_D FPR64:$rs1, FPR64:$rs1), (FEQ_D FPR64:$rs2, FPR64:$rs2)), 1)>; +def : Pat<(setuo FPR64:$rs1, FPR64:$rs1), + (SLTIU (FEQ_D $rs1, $rs1), 1)>; def Select_FPR64_Using_CC_GPR : SelectCC_rrirr; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 190007fe6f1fb..ce5c3abb6a066 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -366,11 +366,15 @@ def : PatFpr32Fpr32; def : Pat<(seto FPR32:$rs1, FPR32:$rs2), (AND (FEQ_S FPR32:$rs1, FPR32:$rs1), (FEQ_S FPR32:$rs2, FPR32:$rs2))>; +def : Pat<(seto FPR32:$rs1, FPR32:$rs1), + (FEQ_S $rs1, $rs1)>; def : Pat<(setuo FPR32:$rs1, FPR32:$rs2), (SLTIU (AND (FEQ_S FPR32:$rs1, FPR32:$rs1), (FEQ_S FPR32:$rs2, FPR32:$rs2)), 1)>; +def : Pat<(setuo FPR32:$rs1, FPR32:$rs1), + (SLTIU (FEQ_S $rs1, $rs1), 1)>; def Select_FPR32_Using_CC_GPR : SelectCC_rrirr; @@ -394,16 +398,6 @@ def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>; def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>; } // Predicates = [HasStdExtF, IsRV32] -let Predicates = [HasStdExtF, IsRV32] in { -// FP->[u]int. Round-to-zero must be used -def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>; -def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>; - -// [u]int->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>; -def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>; -} // Predicates = [HasStdExtF, IsRV32] - let Predicates = [HasStdExtF, IsRV64] in { def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (FMV_W_X GPR:$src)>; def : Pat<(riscv_fmv_x_anyextw_rv64 FPR32:$src), (FMV_X_W FPR32:$src)>; diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td index a46a32c4e7f25..853dca0295c97 100644 --- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td +++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td @@ -20,6 +20,8 @@ include "llvm/TableGen/SearchableTable.td" class SysReg op> { string Name = name; bits<12> Encoding = op; + // A maximum of one alias is supported right now. + string AltName = name; // FIXME: add these additional fields when needed. // Privilege Access: Read and Write = 0, 1, 2; Read-Only = 3. // Privilege Mode: User = 0, System = 1 or Machine = 3. @@ -36,7 +38,7 @@ class SysReg op> { def SysRegsList : GenericTable { let FilterClass = "SysReg"; // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed. - let Fields = [ "Name", "Encoding", "FeaturesRequired", "isRV32Only" ]; + let Fields = [ "Name", "Encoding", "AltName", "FeaturesRequired", "isRV32Only" ]; let PrimaryKey = [ "Encoding" ]; let PrimaryKeyName = "lookupSysRegByEncoding"; @@ -47,6 +49,11 @@ def lookupSysRegByName : SearchIndex { let Key = [ "Name" ]; } +def lookupSysRegByAltName : SearchIndex { + let Table = SysRegsList; + let Key = [ "AltName" ]; +} + // The following CSR encodings match those given in Tables 2.2, // 2.3, 2.4 and 2.5 in the RISC-V Instruction Set Manual // Volume II: Privileged Architecture. @@ -346,4 +353,8 @@ def : SysReg<"tdata3", 0x7A3>; //===----------------------------------------------- def : SysReg<"dcsr", 0x7B0>; def : SysReg<"dpc", 0x7B1>; -def : SysReg<"dscratch", 0x7B2>; +// "dscratch" is an alternative name for "dscratch0" which appeared in earlier +// drafts of the RISC-V debug spec +let AltName = "dscratch" in +def : SysReg<"dscratch0", 0x7B2>; +def : SysReg<"dscratch1", 0x7B3>; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 90fcd679c5234..bd78f801c59ab 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -15,7 +15,8 @@ using namespace llvm; #define DEBUG_TYPE "riscvtti" -int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy() && "getIntImmCost can only estimate cost of materialising integers"); @@ -30,7 +31,7 @@ int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { } int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) { + Type *Ty, TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy() && "getIntImmCost can only estimate cost of materialising integers"); @@ -78,7 +79,7 @@ int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt & } // Otherwise, use the full materialisation cost. - return getIntImmCost(Imm, Ty); + return getIntImmCost(Imm, Ty, CostKind); } // By default, prevent hoisting. @@ -86,7 +87,8 @@ int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt & } int RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { // Prevent hoisting in unknown cases. return TTI::TCC_Free; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index d219ba81bb56b..3927007077604 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -41,12 +41,13 @@ class RISCVTTIImpl : public BasicTTIImplBase { : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} - int getIntImmCost(const APInt &Imm, Type *Ty); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); }; } // end namespace llvm -#endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H \ No newline at end of file +#endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H diff --git a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h index d36c528bba1e7..2aa1b23d24f00 100644 --- a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h @@ -157,6 +157,7 @@ namespace RISCVSysReg { struct SysReg { const char *Name; unsigned Encoding; + const char *AltName; // FIXME: add these additional fields when needed. // Privilege Access: Read, Write, Read-Only. // unsigned ReadWrite; diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 3bf54edab3b1b..a3110248e8e05 100644 --- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -405,10 +405,7 @@ class SystemZAsmParser : public MCTargetAsmParser { bool parseRegister(Register &Reg, bool RestoreOnFailure = false); - bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs); - OperandMatchResultTy parseRegister(OperandVector &Operands, - RegisterGroup Group, const unsigned *Regs, RegisterKind Kind); OperandMatchResultTy parseAnyRegister(OperandVector &Operands); @@ -421,7 +418,7 @@ class SystemZAsmParser : public MCTargetAsmParser { bool ParseDirectiveInsn(SMLoc L); OperandMatchResultTy parseAddress(OperandVector &Operands, - MemoryKind MemKind, const unsigned *Regs, + MemoryKind MemKind, RegisterKind RegKind); OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal, @@ -459,78 +456,78 @@ class SystemZAsmParser : public MCTargetAsmParser { // Used by the TableGen code to parse particular operand types. OperandMatchResultTy parseGR32(OperandVector &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg); + return parseRegister(Operands, GR32Reg); } OperandMatchResultTy parseGRH32(OperandVector &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg); + return parseRegister(Operands, GRH32Reg); } OperandMatchResultTy parseGRX32(OperandVector &Operands) { llvm_unreachable("GRX32 should only be used for pseudo instructions"); } OperandMatchResultTy parseGR64(OperandVector &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg); + return parseRegister(Operands, GR64Reg); } OperandMatchResultTy parseGR128(OperandVector &Operands) { - return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg); + return parseRegister(Operands, GR128Reg); } OperandMatchResultTy parseADDR32(OperandVector &Operands) { // For the AsmParser, we will accept %r0 for ADDR32 as well. - return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg); + return parseRegister(Operands, GR32Reg); } OperandMatchResultTy parseADDR64(OperandVector &Operands) { // For the AsmParser, we will accept %r0 for ADDR64 as well. - return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg); + return parseRegister(Operands, GR64Reg); } OperandMatchResultTy parseADDR128(OperandVector &Operands) { llvm_unreachable("Shouldn't be used as an operand"); } OperandMatchResultTy parseFP32(OperandVector &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg); + return parseRegister(Operands, FP32Reg); } OperandMatchResultTy parseFP64(OperandVector &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg); + return parseRegister(Operands, FP64Reg); } OperandMatchResultTy parseFP128(OperandVector &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg); + return parseRegister(Operands, FP128Reg); } OperandMatchResultTy parseVR32(OperandVector &Operands) { - return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg); + return parseRegister(Operands, VR32Reg); } OperandMatchResultTy parseVR64(OperandVector &Operands) { - return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg); + return parseRegister(Operands, VR64Reg); } OperandMatchResultTy parseVF128(OperandVector &Operands) { llvm_unreachable("Shouldn't be used as an operand"); } OperandMatchResultTy parseVR128(OperandVector &Operands) { - return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg); + return parseRegister(Operands, VR128Reg); } OperandMatchResultTy parseAR32(OperandVector &Operands) { - return parseRegister(Operands, RegAR, SystemZMC::AR32Regs, AR32Reg); + return parseRegister(Operands, AR32Reg); } OperandMatchResultTy parseCR64(OperandVector &Operands) { - return parseRegister(Operands, RegCR, SystemZMC::CR64Regs, CR64Reg); + return parseRegister(Operands, CR64Reg); } OperandMatchResultTy parseAnyReg(OperandVector &Operands) { return parseAnyRegister(Operands); } OperandMatchResultTy parseBDAddr32(OperandVector &Operands) { - return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, GR32Reg); + return parseAddress(Operands, BDMem, GR32Reg); } OperandMatchResultTy parseBDAddr64(OperandVector &Operands) { - return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, GR64Reg); + return parseAddress(Operands, BDMem, GR64Reg); } OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) { - return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, GR64Reg); + return parseAddress(Operands, BDXMem, GR64Reg); } OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) { - return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, GR64Reg); + return parseAddress(Operands, BDLMem, GR64Reg); } OperandMatchResultTy parseBDRAddr64(OperandVector &Operands) { - return parseAddress(Operands, BDRMem, SystemZMC::GR64Regs, GR64Reg); + return parseAddress(Operands, BDRMem, GR64Reg); } OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) { - return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, GR64Reg); + return parseAddress(Operands, BDVMem, GR64Reg); } OperandMatchResultTy parsePCRel12(OperandVector &Operands) { return parsePCRel(Operands, -(1LL << 12), (1LL << 12) - 1, false); @@ -748,37 +745,113 @@ bool SystemZAsmParser::parseRegister(Register &Reg, bool RestoreOnFailure) { return false; } -// Parse a register of group Group. If Regs is nonnull, use it to map -// the raw register number to LLVM numbering, with zero entries -// indicating an invalid register. Allow FP Group if expecting -// RegV Group, since the f-prefix yields the FP group even while used -// with vector instructions. -bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group, - const unsigned *Regs) { - if (parseRegister(Reg)) - return true; - if (Reg.Group != Group && !(Reg.Group == RegFP && Group == RegV)) - return Error(Reg.StartLoc, "invalid operand for instruction"); - if (Regs && Regs[Reg.Num] == 0) - return Error(Reg.StartLoc, "invalid register pair"); - if (Regs) - Reg.Num = Regs[Reg.Num]; - return false; -} - -// Parse a register and add it to Operands. The other arguments are as above. +// Parse a register of kind Kind and add it to Operands. OperandMatchResultTy -SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group, - const unsigned *Regs, RegisterKind Kind) { - if (Parser.getTok().isNot(AsmToken::Percent)) +SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterKind Kind) { + SMLoc StartLoc, EndLoc; + unsigned RegNum; + + // Handle register names of the form %. + if (Parser.getTok().is(AsmToken::Percent)) { + Register Reg; + if (parseRegister(Reg)) + return MatchOperand_ParseFail; + + // Verify that a register prefix appropriate for Kind was used. + bool PrefixMatch; + switch (Kind) { + case GR32Reg: + case GRH32Reg: + case GR64Reg: + case GR128Reg: + PrefixMatch = Reg.Group == RegGR; + break; + case FP32Reg: + case FP64Reg: + case FP128Reg: + PrefixMatch = Reg.Group == RegFP; + break; + case VR32Reg: + case VR64Reg: + case VR128Reg: + // It is OK to use the %f prefix with vector instructions that + // expect some VR..Reg kind, so accept the RegFP group as well. + PrefixMatch = Reg.Group == RegV || Reg.Group == RegFP; + break; + case AR32Reg: + PrefixMatch = Reg.Group == RegAR; + break; + case CR64Reg: + PrefixMatch = Reg.Group == RegCR; + break; + } + if (!PrefixMatch) { + Error(Reg.StartLoc, "invalid operand for instruction"); + return MatchOperand_ParseFail; + } + + RegNum = Reg.Num; + StartLoc = Reg.StartLoc; + EndLoc = Reg.EndLoc; + } + // Also allow specifying just a plain register number as integer. + else if (Parser.getTok().is(AsmToken::Integer)) { + const MCExpr *Register; + StartLoc = Parser.getTok().getLoc(); + if (Parser.parseExpression(Register)) + return MatchOperand_ParseFail; + + auto *CE = dyn_cast(Register); + if (!CE) + return MatchOperand_ParseFail; + + int64_t MaxRegNum; + switch (Kind) { + case VR32Reg: + case VR64Reg: + case VR128Reg: + MaxRegNum = 31; + break; + default: + MaxRegNum = 15; + break; + } + int64_t Value = CE->getValue(); + if (Value < 0 || Value > MaxRegNum) { + Error(StartLoc, "invalid register"); + return MatchOperand_ParseFail; + } + RegNum = (unsigned) Value; + + EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + } + // Otherwise we didn't match a register operand. + else return MatchOperand_NoMatch; - Register Reg; - if (parseRegister(Reg, Group, Regs)) + // Determine the LLVM register number according to Kind. + const unsigned *Regs; + switch (Kind) { + case GR32Reg: Regs = SystemZMC::GR32Regs; break; + case GRH32Reg: Regs = SystemZMC::GRH32Regs; break; + case GR64Reg: Regs = SystemZMC::GR64Regs; break; + case GR128Reg: Regs = SystemZMC::GR128Regs; break; + case FP32Reg: Regs = SystemZMC::FP32Regs; break; + case FP64Reg: Regs = SystemZMC::FP64Regs; break; + case FP128Reg: Regs = SystemZMC::FP128Regs; break; + case VR32Reg: Regs = SystemZMC::VR32Regs; break; + case VR64Reg: Regs = SystemZMC::VR64Regs; break; + case VR128Reg: Regs = SystemZMC::VR128Regs; break; + case AR32Reg: Regs = SystemZMC::AR32Regs; break; + case CR64Reg: Regs = SystemZMC::CR64Regs; break; + } + if (Regs[RegNum] == 0) { + Error(StartLoc, "invalid register pair"); return MatchOperand_ParseFail; + } - Operands.push_back(SystemZOperand::createReg(Kind, Reg.Num, - Reg.StartLoc, Reg.EndLoc)); + Operands.push_back(SystemZOperand::createReg(Kind, Regs[RegNum], + StartLoc, EndLoc)); return MatchOperand_Success; } @@ -903,7 +976,7 @@ SystemZAsmParser::parseAddressRegister(Register &Reg) { // are as above. OperandMatchResultTy SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, - const unsigned *Regs, RegisterKind RegKind) { + RegisterKind RegKind) { SMLoc StartLoc = Parser.getTok().getLoc(); unsigned Base = 0, Index = 0, LengthReg = 0; Register Reg1, Reg2; @@ -913,6 +986,13 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length)) return MatchOperand_ParseFail; + const unsigned *Regs; + switch (RegKind) { + case GR32Reg: Regs = SystemZMC::GR32Regs; break; + case GR64Reg: Regs = SystemZMC::GR64Regs; break; + default: llvm_unreachable("invalid RegKind"); + } + switch (MemKind) { case BDMem: // If we have Reg1, it must be an address register. diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 32a28c5f4e674..9ec7b07fc3f81 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -30,7 +30,8 @@ using namespace llvm; // //===----------------------------------------------------------------------===// -int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -63,7 +64,8 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { } int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -177,11 +179,12 @@ int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, break; } - return SystemZTTIImpl::getIntImmCost(Imm, Ty); + return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind); } int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -226,7 +229,7 @@ int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, return TTI::TCC_Free; break; } - return SystemZTTIImpl::getIntImmCost(Imm, Ty); + return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind); } TargetTransformInfo::PopcntSupportKind @@ -258,7 +261,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, } if (isa(&I)) { Type *MemAccessTy = I.getOperand(0)->getType(); - NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0); + NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0, + TTI::TCK_RecipThroughput); } } @@ -365,7 +369,8 @@ static unsigned getNumVectorRegs(Type *Ty) { } int SystemZTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { @@ -459,7 +464,8 @@ int SystemZTTIImpl::getArithmeticInstrCost( return DivInstrCost; } else if (ST->hasVector()) { - unsigned VF = cast(Ty)->getNumElements(); + auto *VTy = cast(Ty); + unsigned VF = VTy->getNumElements(); unsigned NumVectors = getNumVectorRegs(Ty); // These vector operations are custom handled, but are still supported @@ -472,7 +478,7 @@ int SystemZTTIImpl::getArithmeticInstrCost( if (DivRemConstPow2) return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1)); if (DivRemConst) - return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args); + return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args); if ((SignedDivRem || UnsignedDivRem) && VF > 4) // Temporary hack: disable high vectorization factors with integer // division/remainder, which will get scalarized and handled with @@ -494,8 +500,8 @@ int SystemZTTIImpl::getArithmeticInstrCost( // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. unsigned ScalarCost = - getArithmeticInstrCost(Opcode, Ty->getScalarType()); - unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args); + getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind); + unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(VTy, Args); // FIXME: VF 2 for these FP operations are currently just as // expensive as for VF 4. if (VF == 2) @@ -512,7 +518,7 @@ int SystemZTTIImpl::getArithmeticInstrCost( // There is no native support for FRem. if (Opcode == Instruction::FRem) { - unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args); + unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args); // FIXME: VF 2 for float is currently just as expensive as for VF 4. if (VF == 2 && ScalarBits == 32) Cost *= 2; @@ -521,7 +527,7 @@ int SystemZTTIImpl::getArithmeticInstrCost( } // Fallback to the default implementation. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); } @@ -684,6 +690,7 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, } int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { unsigned DstScalarBits = Dst->getScalarSizeInBits(); unsigned SrcScalarBits = Src->getScalarSizeInBits(); @@ -718,8 +725,9 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, } } else if (ST->hasVector()) { - assert (Dst->isVectorTy()); - unsigned VF = cast(Src)->getNumElements(); + auto *SrcVecTy = cast(Src); + auto *DstVecTy = cast(Dst); + unsigned VF = SrcVecTy->getNumElements(); unsigned NumDstVectors = getNumVectorRegs(Dst); unsigned NumSrcVectors = getNumVectorRegs(Src); @@ -764,7 +772,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // inserting and extracting the values. Base implementation does not // realize float->int gets scalarized. unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(), - Src->getScalarType()); + Src->getScalarType(), CostKind); unsigned TotCost = VF * ScalarCost; bool NeedsInserts = true, NeedsExtracts = true; // FP128 registers do not get inserted or extracted. @@ -775,8 +783,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI)) NeedsExtracts = false; - TotCost += getScalarizationOverhead(Src, false, NeedsExtracts); - TotCost += getScalarizationOverhead(Dst, NeedsInserts, false); + TotCost += getScalarizationOverhead(SrcVecTy, false, NeedsExtracts); + TotCost += getScalarizationOverhead(DstVecTy, NeedsInserts, false); // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4. if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32) @@ -787,7 +795,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (Opcode == Instruction::FPTrunc) { if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements. - return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false); + return VF /*ldxbr/lexbr*/ + + getScalarizationOverhead(DstVecTy, true, false); else // double -> float return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/); } @@ -800,11 +809,11 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, return VF * 2; } // -> fp128. VF * lxdb/lxeb + extraction of elements. - return VF + getScalarizationOverhead(Src, false, true); + return VF + getScalarizationOverhead(SrcVecTy, false, true); } } - return BaseT::getCastInstrCost(Opcode, Dst, Src, I); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); } // Scalar i8 / i16 operations will typically be made after first extending @@ -820,7 +829,9 @@ static unsigned getOperandsExtensionCost(const Instruction *I) { } int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, const Instruction *I) { + Type *CondTy, + TTI::TargetCostKind CostKind, + const Instruction *I) { if (!ValTy->isVectorTy()) { switch (Opcode) { case Instruction::ICmp: { @@ -895,7 +906,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind); } int SystemZTTIImpl:: @@ -1009,6 +1020,7 @@ static bool isBswapIntrinsicCall(const Value *V) { int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { assert(!Src->isVoidTy() && "Invalid type"); @@ -1077,11 +1089,12 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); assert(isa(VecTy) && "Expect a vector type for interleaved memory op"); @@ -1142,21 +1155,23 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) { int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF, + TTI::TargetCostKind CostKind, const Instruction *I) { int Cost = getVectorIntrinsicInstrCost(ID, RetTy); if (Cost != -1) return Cost; - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { int Cost = getVectorIntrinsicInstrCost(ID, RetTy); if (Cost != -1) return Cost; return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index b4dc53bdc70a6..5bb86cf5b9311 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -38,11 +38,12 @@ class SystemZTTIImpl : public BasicTTIImplBase { unsigned getInliningThresholdMultiplier() { return 3; } - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); @@ -75,6 +76,7 @@ class SystemZTTIImpl : public BasicTTIImplBase { int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -88,28 +90,35 @@ class SystemZTTIImpl : public BasicTTIImplBase { unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, const Instruction *I); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, const Instruction *I = nullptr); + unsigned VF = 1, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, + const Instruction *I = nullptr); int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, const Instruction *I = nullptr); /// @} }; diff --git a/llvm/lib/Target/VE/VEAsmPrinter.cpp b/llvm/lib/Target/VE/VEAsmPrinter.cpp index 19392fa1b2463..86e3aa3d3fa13 100644 --- a/llvm/lib/Target/VE/VEAsmPrinter.cpp +++ b/llvm/lib/Target/VE/VEAsmPrinter.cpp @@ -88,9 +88,12 @@ static void emitSIC(MCStreamer &OutStreamer, MCOperand &RD, static void emitBSIC(MCStreamer &OutStreamer, MCOperand &R1, MCOperand &R2, const MCSubtargetInfo &STI) { MCInst BSICInst; - BSICInst.setOpcode(VE::BSIC); + BSICInst.setOpcode(VE::BSICrii); BSICInst.addOperand(R1); BSICInst.addOperand(R2); + MCOperand czero = MCOperand::createImm(0); + BSICInst.addOperand(czero); + BSICInst.addOperand(czero); OutStreamer.emitInstruction(BSICInst, STI); } diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h index 3d27a319ba28a..a3ead990bccf8 100644 --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -27,8 +27,8 @@ enum NodeType : unsigned { Hi, Lo, // Hi/Lo operations, typically on a global address. - GETFUNPLT, // load function address through %plt insturction - GETTLSADDR, // load address for TLS access + GETFUNPLT, // load function address through %plt insturction + GETTLSADDR, // load address for TLS access CALL, // A call instruction. RET_FLAG, // Return with a flag operand. diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp index cec8fb5ad5366..02a63f4aa3656 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -68,25 +68,38 @@ static VECC::CondCode GetOppositeBranchCondition(VECC::CondCode CC) { llvm_unreachable("Invalid cond code"); } -// Treat br.l [BCR AT] as unconditional branch +// Treat br.l [BRCF AT] as unconditional branch static bool isUncondBranchOpcode(int Opc) { - return Opc == VE::BCRLa || Opc == VE::BCRWa || - Opc == VE::BCRDa || Opc == VE::BCRSa; + return Opc == VE::BRCFLa || Opc == VE::BRCFWa || + Opc == VE::BRCFLa_nt || Opc == VE::BRCFWa_nt || + Opc == VE::BRCFLa_t || Opc == VE::BRCFWa_t || + Opc == VE::BRCFDa || Opc == VE::BRCFSa || + Opc == VE::BRCFDa_nt || Opc == VE::BRCFSa_nt || + Opc == VE::BRCFDa_t || Opc == VE::BRCFSa_t; } static bool isCondBranchOpcode(int Opc) { - return Opc == VE::BCRLrr || Opc == VE::BCRLir || - Opc == VE::BCRLrm0 || Opc == VE::BCRLrm1 || - Opc == VE::BCRLim0 || Opc == VE::BCRLim1 || - Opc == VE::BCRWrr || Opc == VE::BCRWir || - Opc == VE::BCRWrm0 || Opc == VE::BCRWrm1 || - Opc == VE::BCRWim0 || Opc == VE::BCRWim1 || - Opc == VE::BCRDrr || Opc == VE::BCRDir || - Opc == VE::BCRDrm0 || Opc == VE::BCRDrm1 || - Opc == VE::BCRDim0 || Opc == VE::BCRDim1 || - Opc == VE::BCRSrr || Opc == VE::BCRSir || - Opc == VE::BCRSrm0 || Opc == VE::BCRSrm1 || - Opc == VE::BCRSim0 || Opc == VE::BCRSim1; + return Opc == VE::BRCFLrr || Opc == VE::BRCFLir || + Opc == VE::BRCFLrr_nt || Opc == VE::BRCFLir_nt || + Opc == VE::BRCFLrr_t || Opc == VE::BRCFLir_t || + Opc == VE::BRCFWrr || Opc == VE::BRCFWir || + Opc == VE::BRCFWrr_nt || Opc == VE::BRCFWir_nt || + Opc == VE::BRCFWrr_t || Opc == VE::BRCFWir_t || + Opc == VE::BRCFDrr || Opc == VE::BRCFDir || + Opc == VE::BRCFDrr_nt || Opc == VE::BRCFDir_nt || + Opc == VE::BRCFDrr_t || Opc == VE::BRCFDir_t || + Opc == VE::BRCFSrr || Opc == VE::BRCFSir || + Opc == VE::BRCFSrr_nt || Opc == VE::BRCFSir_nt || + Opc == VE::BRCFSrr_t || Opc == VE::BRCFSir_t; +} + +static bool isIndirectBranchOpcode(int Opc) { + return Opc == VE::BCFLari || Opc == VE::BCFLari || + Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt || + Opc == VE::BCFLari_t || Opc == VE::BCFLari_t || + Opc == VE::BCFLari || Opc == VE::BCFLari || + Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt || + Opc == VE::BCFLari_t || Opc == VE::BCFLari_t; } static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, @@ -165,14 +178,14 @@ bool VEInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, return false; } - // TODO ...likewise if it ends with an indirect branch followed by an unconditional + // ...likewise if it ends with an indirect branch followed by an unconditional // branch. - // if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - // I = LastInst; - // if (AllowModify) - // I->eraseFromParent(); - // return true; - // } + if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return true; + } // Otherwise, can't handle this. return true; @@ -190,13 +203,13 @@ unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB, if (Cond.empty()) { // Uncondition branch assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, DL, get(VE::BCRLa)) + BuildMI(&MBB, DL, get(VE::BRCFLa_t)) .addMBB(TBB); return 1; } // Conditional branch - // (BCRir CC sy sz addr) + // (BRCFir CC sy sz addr) assert(Cond[0].isImm() && Cond[2].isReg() && "not implemented"); unsigned opc[2]; @@ -206,19 +219,19 @@ unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB, unsigned Reg = Cond[2].getReg(); if (IsIntegerCC(Cond[0].getImm())) { if (TRI->getRegSizeInBits(Reg, MRI) == 32) { - opc[0] = VE::BCRWir; - opc[1] = VE::BCRWrr; + opc[0] = VE::BRCFWir; + opc[1] = VE::BRCFWrr; } else { - opc[0] = VE::BCRLir; - opc[1] = VE::BCRLrr; + opc[0] = VE::BRCFLir; + opc[1] = VE::BRCFLrr; } } else { if (TRI->getRegSizeInBits(Reg, MRI) == 32) { - opc[0] = VE::BCRSir; - opc[1] = VE::BCRSrr; + opc[0] = VE::BRCFSir; + opc[1] = VE::BRCFSrr; } else { - opc[0] = VE::BCRDir; - opc[1] = VE::BCRDrr; + opc[0] = VE::BRCFDir; + opc[1] = VE::BRCFDrr; } } if (Cond[1].isImm()) { @@ -238,7 +251,7 @@ unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB, if (!FBB) return 1; - BuildMI(&MBB, DL, get(VE::BCRLa)) + BuildMI(&MBB, DL, get(VE::BRCFLa_t)) .addMBB(FBB); return 2; } @@ -488,7 +501,7 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const { // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(syscallMBB); BB->addSuccessor(sinkMBB); - BuildMI(BB, dl, TII.get(VE::BCRLrr)) + BuildMI(BB, dl, TII.get(VE::BRCFLrr_t)) .addImm(VECC::CC_IGE) .addReg(VE::SX11) // %sp .addReg(VE::SX8) // %sl diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index 1411c2a8deb06..14c067f646277 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -70,6 +70,47 @@ def mimm : Operand, PatLeaf<(imm), [{ let PrintMethod = "printMImmOperand"; } +// simm7fp - Generic fp immediate value. +def LO7FP : SDNodeXFormgetValueAPF().bitcastToAPInt(); + uint64_t val = imm.getSExtValue(); + if (imm.getBitWidth() == 32) + val <<= 32; // Immediate value of float place at higher bits on VE. + return CurDAG->getTargetConstant(SignExtend32(val, 7), SDLoc(N), MVT::i32); +}]>; +def simm7fp : Operand, PatLeaf<(fpimm), [{ + const APInt& imm = N->getValueAPF().bitcastToAPInt(); + uint64_t val = imm.getSExtValue(); + if (imm.getBitWidth() == 32) + val <<= 32; // Immediate value of float place at higher bits on VE. + return isInt<7>(val); + }], LO7FP> { + let DecoderMethod = "DecodeSIMM7"; +} + +// mimm - Special fp immediate value of sequential bit stream of 0 or 1. +def MIMMFP : SDNodeXFormgetValueAPF().bitcastToAPInt(); + uint64_t Val = Imm.getSExtValue(); + bool M0Flag = isMask_64(Val); + if (Imm.getBitWidth() == 32) + Val <<= 32; // Immediate value of float place at higher bits on VE. + if (M0Flag) { + // bit 6 : If `(m)0`, 1. Otherwise, 0. + Val = countLeadingZeros(Val) | 0x40; + } else + Val = countLeadingOnes(Val); + return CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); +}]>; +def mimmfp : Operand, PatLeaf<(fpimm), [{ + const APInt& Imm = N->getValueAPF().bitcastToAPInt(); + uint64_t Val = Imm.getSExtValue(); + return isMask_64(Val) || + ((Val & (1UL << 63)) && isShiftedMask_64(Val)); }], MIMMFP> { + let PrintMethod = "printMImmOperand"; +} + def simm32 : PatLeaf<(imm), [{ return isInt<32>(N->getSExtValue()); }]>; def uimm32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>; def lomsbzero : PatLeaf<(imm), [{ return (N->getZExtValue() & 0x80000000) @@ -144,6 +185,24 @@ def icond2cc : SDNodeXFormgetTargetConstant(cc, SDLoc(N), MVT::i32); }]>; +def icond2ccSwap : SDNodeXFormget()) { + default: llvm_unreachable("Unknown integer condition code!"); + case ISD::SETEQ: cc = VECC::CC_IEQ; break; + case ISD::SETNE: cc = VECC::CC_INE; break; + case ISD::SETLT: cc = VECC::CC_IG; break; + case ISD::SETGT: cc = VECC::CC_IL; break; + case ISD::SETLE: cc = VECC::CC_IGE; break; + case ISD::SETGE: cc = VECC::CC_ILE; break; + case ISD::SETULT: cc = VECC::CC_IG; break; + case ISD::SETULE: cc = VECC::CC_IGE; break; + case ISD::SETUGT: cc = VECC::CC_IL; break; + case ISD::SETUGE: cc = VECC::CC_ILE; break; + } + return CurDAG->getTargetConstant(cc, SDLoc(N), MVT::i32); +}]>; + def fcond2cc : SDNodeXFormget()) { @@ -174,6 +233,36 @@ def fcond2cc : SDNodeXFormgetTargetConstant(cc, SDLoc(N), MVT::i32); }]>; +def fcond2ccSwap : SDNodeXFormget()) { + default: llvm_unreachable("Unknown float condition code!"); + case ISD::SETFALSE: cc = VECC::CC_AF; break; + case ISD::SETEQ: + case ISD::SETOEQ: cc = VECC::CC_EQ; break; + case ISD::SETNE: + case ISD::SETONE: cc = VECC::CC_NE; break; + case ISD::SETLT: + case ISD::SETOLT: cc = VECC::CC_G; break; + case ISD::SETGT: + case ISD::SETOGT: cc = VECC::CC_L; break; + case ISD::SETLE: + case ISD::SETOLE: cc = VECC::CC_GE; break; + case ISD::SETGE: + case ISD::SETOGE: cc = VECC::CC_LE; break; + case ISD::SETO: cc = VECC::CC_NUM; break; + case ISD::SETUO: cc = VECC::CC_NAN; break; + case ISD::SETUEQ: cc = VECC::CC_EQNAN; break; + case ISD::SETUNE: cc = VECC::CC_NENAN; break; + case ISD::SETULT: cc = VECC::CC_GNAN; break; + case ISD::SETUGT: cc = VECC::CC_LNAN; break; + case ISD::SETULE: cc = VECC::CC_GENAN; break; + case ISD::SETUGE: cc = VECC::CC_LENAN; break; + case ISD::SETTRUE: cc = VECC::CC_AT; break; + } + return CurDAG->getTargetConstant(cc, SDLoc(N), MVT::i32); +}]>; + // Addressing modes. def ADDRri : ComplexPattern; @@ -211,6 +300,7 @@ def MEMASri : Operand { // ASX format: // MEMrri, MEMrii, MEMzri, MEMzii // AS format: +// MEMriASX, MEMziASX : simple AS format // well be added later. def ADDRrri : ComplexPattern; @@ -255,6 +345,26 @@ def MEMzii : Operand { let MIOperandInfo = (ops i32imm /* = 0 */, i32imm, i32imm); let ParserMatchClass = VEMEMziiAsmOperand; } +// AS assembly instrcution format: +def VEMEMriAsmOperand : AsmOperandClass { + let Name = "MEMri"; + let ParserMethod = "parseMEMAsOperand"; +} +def VEMEMziAsmOperand : AsmOperandClass { + let Name = "MEMzi"; + let ParserMethod = "parseMEMAsOperand"; +} +// AS generic assembly instruction format: +def MEMriASX : Operand { + let PrintMethod = "printMemASOperandASX"; + let MIOperandInfo = (ops ptr_rc, i32imm); + let ParserMatchClass = VEMEMriAsmOperand; +} +def MEMziASX : Operand { + let PrintMethod = "printMemASOperandASX"; + let MIOperandInfo = (ops i32imm /* = 0 */, i32imm); + let ParserMatchClass = VEMEMziAsmOperand; +} // Branch targets have OtherVT type. def brtarget32 : Operand { @@ -357,94 +467,79 @@ def RD_RA : RD_VAL<12>; // Round to Nearest (ties to Away) // VE Multiclasses for common instruction formats //===----------------------------------------------------------------------===// -multiclass RRmrropc, - RegisterClass RCo, ValueType Tyo, - RegisterClass RCi, ValueType Tyi, - SDPatternOperator OpNode=null_frag> { +// Multiclass for generic RR type instructions +let hasSideEffects = 0 in +multiclass RRbmopc, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, + SDPatternOperator OpNode = null_frag, + Operand immOp = simm7, Operand mOp = mimm> { def rr : RR - { let cy = 1; let cz = 1; let hasSideEffects = 0; } -} - -multiclass RRmriopc, - RegisterClass RCo, ValueType Tyo, - RegisterClass RCi, ValueType Tyi, Operand immOp, - SDPatternOperator OpNode=null_frag> { + [(set Tyo:$sx, (OpNode Tyi:$sy, Tyi:$sz))]>; // VE calculates (OpNode $sy, $sz), but llvm requires to have immediate // in RHS, so we use following definition. + let cy = 0 in def ri : RR - { let cy = 0; let cz = 1; let hasSideEffects = 0; } -} - -multiclass RRmiropc, - RegisterClass RCo, ValueType Tyo, - RegisterClass RCi, ValueType Tyi, Operand immOp, - SDPatternOperator OpNode=null_frag> { - def ri : RR; + let cz = 0 in + def rm : RR - { let cy = 0; let cz = 1; let hasSideEffects = 0; } + [(set Tyo:$sx, (OpNode Tyi:$sy, (Tyi mOp:$sz)))]>; + let cy = 0, cz = 0 in + def im : RR; } -multiclass RRNDmrmopc, - RegisterClass RCo, ValueType Tyo, - RegisterClass RCi, ValueType Tyi, Operand mOp, - SDPatternOperator OpNode=null_frag> { - let cy = 1, cz = 0, hasSideEffects = 0 in +// Multiclass for non-commutative RR type instructions +let hasSideEffects = 0 in +multiclass RRNCbmopc, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, + SDPatternOperator OpNode = null_frag, + Operand immOp = simm7, Operand mOp = mimm> { + def rr : RR; + let cy = 0 in + def ir : RR; + let cz = 0 in def rm : RR; -} - -multiclass RRNDmimopc, - RegisterClass RCo, ValueType Tyo, - RegisterClass RCi, ValueType Tyi, - Operand immOp, Operand mOp, - SDPatternOperator OpNode=null_frag> { - let cy = 0, cz = 0, hasSideEffects = 0 in + let cy = 0, cz = 0 in def im : RR; } -// Used by add, mul, div, and similar commutative instructions -// The order of operands are "$sx, $sy, $sz" - +// Generic RR multiclass with 2 arguments. +// e.g. ADDUL, ADDSWSX, ADDSWZX, and etc. multiclass RRmopc, RegisterClass RC, ValueType Ty, SDPatternOperator OpNode = null_frag, Operand immOp = simm7, Operand mOp = mimm> : - RRmrr, - RRmri, - RRNDmrm, - RRNDmim; - -// Used by sub, and similar not commutative instructions -// The order of operands are "$sx, $sy, $sz" + RRbm; +// Generic RR multiclass for non-commutative instructions with 2 arguments. +// e.g. SUBUL, SUBUW, SUBSWSX, and etc. multiclass RRNCmopc, - RegisterClass RC, ValueType Ty, - SDPatternOperator OpNode = null_frag, - Operand immOp = simm7, Operand mOp = mimm> : - RRmrr, - RRmir, - RRNDmrm, - RRNDmim; - -// Used by fadd, fsub, and similar floating point instructions -// The order of operands are "$sx, $sy, $sz" + RegisterClass RC, ValueType Ty, + SDPatternOperator OpNode = null_frag, + Operand immOp = simm7, Operand mOp = mimm> : + RRNCbm; +// Generic RR multiclass for floating point instructions with 2 arguments. +// e.g. FADDD, FADDS, FSUBD, and etc. multiclass RRFmopc, - RegisterClass RC, ValueType Ty, - SDPatternOperator OpNode = null_frag, - Operand immOp = simm7, Operand mOp = mimm> : - RRmrr, - RRmir, - RRNDmrm, - RRNDmim; + RegisterClass RC, ValueType Ty, + SDPatternOperator OpNode = null_frag, + Operand immOp = simm7fp, Operand mOp = mimmfp> : + RRNCbm; // Generic RR multiclass for shift instructions with 2 arguments. // e.g. SLL, SRL, SLAWSX, and etc. @@ -554,67 +649,62 @@ multiclass CVTm opc, RegisterClass RCo, ValueType Tyo, !strconcat(opcStr, " $sx, $sy")>; } -// Branch multiclass -let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in -multiclass BCRm opc, - RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { - def rr : CF< - opc, (outs), - (ins CCOp:$cf, RC:$sy, RC:$sz, brtarget32:$imm32), - !strconcat(opcStr, " $sy, $sz, $imm32")> { - let cy = 1; - let cz = 1; - let hasSideEffects = 0; - } - def ir : CF< - opc, (outs), - (ins CCOp:$cf, immOp:$sy, RC:$sz, brtarget32:$imm32), - !strconcat(opcStr, " $sy, $sz, $imm32")> { - let cy = 0; - let cz = 1; - let hasSideEffects = 0; - } - def rm0 : CF< - opc, (outs), (ins CCOp:$cf, RC:$sy, immOp2:$sz, brtarget32:$imm32), - !strconcat(opcStr, " $sy, (${sz})0, $imm32"), []> { - let cy = 1; - let cz = 0; - let sz{6} = 1; - let hasSideEffects = 0; - } - def rm1 : CF< - opc, (outs), (ins CCOp:$cf, RC:$sy, immOp2:$sz, brtarget32:$imm32), - !strconcat(opcStr, " $sy, (${sz})1, $imm32"), []> { - let cy = 1; - let cz = 0; - let hasSideEffects = 0; - } - def im0 : CF< - opc, (outs), (ins CCOp:$cf, immOp:$sy, immOp2:$sz, brtarget32:$imm32), - !strconcat(opcStr, " $sy, (${sz})0, $imm32"), []> { - let cy = 0; - let cz = 0; - let sz{6} = 1; - let hasSideEffects = 0; - } - def im1 : CF< - opc, (outs), (ins CCOp:$cf, immOp:$sy, immOp2:$sz, brtarget32:$imm32), - !strconcat(opcStr, " $sy, (${sz})1, $imm32"), []> { - let cy = 0; - let cz = 0; - let hasSideEffects = 0; - } - def a : CF< - opc, (outs), (ins brtarget32:$imm32), - !strconcat(opcStrAt, " $imm32"), []> { - let cy = 0; - let sy = 0; - let cz = 0; - let sz = 0; - let cf = 15; /* AT */ - let isBarrier = 1; - let hasSideEffects = 0; - } +// Multiclass for branch instructions +// e.g. BCFL, BCFW, BCFD, and etc. +let isBranch = 1, isTerminator = 1, isIndirectBranch = 1, hasSideEffects = 0 in +multiclass BCbpfm opc, dag cond, + Operand ADDR> { + let bpf = 0 /* NONE */ in + def "" : CF; + let bpf = 2 /* NOT TaKEN */ in + def _nt : CF; + let bpf = 3 /* TaKEN */ in + def _t : CF; +} +multiclass BCtgm opc, dag cond> { + defm ri : BCbpfm; + let cz = 0 in defm zi : BCbpfm; +} +multiclass BCm opc, + RegisterClass RC, Operand immOp> { + let DecoderMethod = "DecodeBranchCondition" in + defm r : BCtgm; + let DecoderMethod = "DecodeBranchCondition", cy = 0 in + defm i : BCtgm; + let DecoderMethod = "DecodeBranchConditionAlways", cy = 0, sy = 0, + cf = 15 /* AT */, isBarrier = 1 in + defm a : BCtgm; + let DecoderMethod = "DecodeBranchConditionAlways", cy = 0, sy = 0, + cf = 0 /* AF */ in + defm na : BCtgm; +} + +// Multiclass for relative branch instructions +// e.g. BRCFL, BRCFW, BRCFD, and etc. +let isBranch = 1, isTerminator = 1, hasSideEffects = 0 in +multiclass BCRbpfm opc, dag cond> { + let bpf = 0 /* NONE */ in + def "" : CF; + let bpf = 2 /* NOT TaKEN */ in + def _nt : CF; + let bpf = 3 /* TaKEN */ in + def _t : CF; +} +multiclass BCRm opc, + RegisterClass RC, Operand immOp> { + defm rr : BCRbpfm; + let cy = 0 in + defm ir : BCRbpfm; + let cy = 0, sy = 0, cz = 0, sz = 0, cf = 15 /* AT */, isBarrier = 1 in + defm a : BCRbpfm; + let cy = 0, sy = 0, cz = 0, sz = 0, cf = 0 /* AF */ in + defm na : BCRbpfm; } //===----------------------------------------------------------------------===// @@ -993,59 +1083,50 @@ defm CVTDS : CVTm<"cvt.d.s", 0x0F, I64, f64, F32, f32, fpextend>; // Section 8.7.17 - CVQ (Convert to Single-format) -// Control-flow - -// Jump instruction -let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cy = 1, cz = 1, - isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasSideEffects = 0 in -def BC : CF< - 0x19, (outs), (ins CCOp:$cf, I64:$sy, brtarget32:$imm32), - "b.${cf}.l $sy, $imm32">; - -// Jump always instruction is treated as a special case of jump in order -// to make finding unconditional jump easy. -let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 15 /* AT */, cy = 0, sy = 0, - cz = 1, - isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, - hasDelaySlot = 1, isCodeGenOnly = 1, hasSideEffects = 0 in { -def BAri : CF< - 0x19, (outs), (ins MEMri:$addr), - "b.l $addr", - [(brind ADDRri:$addr)]>; -} +//----------------------------------------------------------------------------- +// Section 8.8 - Branch instructions +//----------------------------------------------------------------------------- + +// Section 8.8.1 - BC (Branch on Codition) +defm BCFL : BCm<"b${cond}.l", "b.l", "baf.l", 0x19, I64, simm7>; + +// Indirect branch aliases +def : Pat<(brind I64:$reg), (BCFLari_t $reg, 0)>; +def : Pat<(brind tblockaddress:$imm), (BCFLazi_t 0, $imm)>; + +// Return instruction is a special case of jump. +let Uses = [SX10], bpf = 3 /* TAKEN */, cf = 15 /* AT */, cy = 0, sy = 0, + sz = 10 /* SX10 */, imm32 = 0, isReturn = 1, isTerminator = 1, + isBarrier = 1, isCodeGenOnly = 1, hasSideEffects = 0 in +def RET : CF<0x19, (outs), (ins), "b.l.t (, %s10)", [(retflag)]>; -// Jump never instruction is also a special case of jump. -let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 0 /* AF */, cy = 1, sy = 0, - cz = 1, - isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasSideEffects = 0 in -def BN : CF< - 0x19, (outs), (ins brtarget32:$imm32), - "b.af.l $imm32">; - -// Return instruction is also a special case of jump. -let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 15 /* AT */, cy = 0, sy = 0, - cz = 1, sz = 0x10 /* SX10 */, imm32 = 0, Uses = [SX10], - isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1, - isCodeGenOnly = 1, hasSideEffects = 0 in -def RET : CF< - 0x19, (outs), (ins), - "b.l (,%lr)", - [(retflag)]>; - -// Branch and Save IC - -let cx = 0, cy = 0, cy = 0, cz = 1, hasSideEffects = 0 /* , Uses = [IC] */ in -def BSIC : RM<0x08, (outs), (ins I64:$sx, I64:$sz), "bsic $sx, (, ${sz})">; - -// Branch instruction -let cx = 0, cx2 = 0, bpf = 0 /* NONE */ in -defm BCRL : BCRm<"br${cf}.l", "br.l", 0x18, I64, i64, simm7, uimm6>; -let cx = 1, cx2 = 0, bpf = 0 /* NONE */ in -defm BCRW : BCRm<"br${cf}.w", "br.w", 0x18, I32, i32, simm7, uimm6>; -let cx = 0, cx2 = 1, bpf = 0 /* NONE */ in -defm BCRD : BCRm<"br${cf}.d", "br.d", 0x18, I64, f64, simm7, uimm6>; -let cx = 1, cx2 = 1, bpf = 0 /* NONE */ in -defm BCRS : BCRm<"br${cf}.s", "br.s", 0x18, F32, f32, simm7, uimm6>; +// Section 8.8.2 - BCS (Branch on Condition Single) +defm BCFW : BCm<"b${cond}.w", "b.w", "baf.w", 0x1B, I32, simm7>; + +// Section 8.8.3 - BCF (Branch on Condition Floating Point) +defm BCFD : BCm<"b${cond}.d", "b.d", "baf.d", 0x1C, I64, simm7fp>; +let cx = 1 in +defm BCFS : BCm<"b${cond}.s", "b.s", "baf.s", 0x1C, F32, simm7fp>; + +// Section 8.8.4 - BCR (Branch on Condition Relative) +let cx = 0, cx2 = 0 in +defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7>; +let cx = 1, cx2 = 0 in +defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7>; +let cx = 0, cx2 = 1 in +defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp>; +let cx = 1, cx2 = 1 in +defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp>; + +// Section 8.8.5 - BSIC (Branch and Save IC) +let isCall = 1, hasSideEffects = 0, DecoderMethod = "DecodeCall" in +defm BSIC : RMm<"bsic", 0x08, I64>; + +// Call instruction is a special case of BSIC. +let Defs = [SX10], sx = 10 /* SX10 */, cy = 0, sy = 0, imm32 = 0, + isCall = 1, isCodeGenOnly = 1, hasSideEffects = 0 in +def CALLr : RM<0x08, (outs), (ins I64:$sz, variable_ops), + "bsic %s10, (, $sz)", [(call i64:$sz)]>; let cx = 0, cy = 0, cz = 1, hasSideEffects = 0 in { let sy = 3 in @@ -1068,23 +1149,6 @@ def SIC : RR<0x28, (outs I32:$sx), (ins), "sic $sx">; // Instructions for CodeGenOnly //===----------------------------------------------------------------------===// -let isCodeGenOnly = 1 in { - -// Call instruction -let Defs = [SX10], Uses = [SX11], hasDelaySlot = 1, isCall = 1, hasSideEffects = 0 in { -let cx = 0, sx = 10, cy = 0, sy = 0, cz = 0, sz = 0 in -def CALL : RM< - 0x08, (outs), (ins calltarget:$imm32, variable_ops), - "bsic %lr, $imm32">; -// use sz to represent a register -let cx = 0, sx = 10, cy = 0, sy = 0, cz = 1, imm32 = 0 in -def CALLr : RM< - 0x08, (outs), (ins I64:$sz, variable_ops), - "bsic %lr, (,$sz)">; -} - -} - //===----------------------------------------------------------------------===// // Pattern Matchings //===----------------------------------------------------------------------===// @@ -1257,28 +1321,38 @@ def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)), (LEASLrii (ANDrm (LEAzii 0, 0, texternalsym:$in2), !add(32, 64)), 0, (texternalsym:$in1))>; -// Calls -def : Pat<(call tglobaladdr:$dst), - (CALL tglobaladdr:$dst)>; -def : Pat<(call i64:$dst), - (CALLr i64:$dst)>; - // Branches -def : Pat<(br bb:$addr), (BCRLa bb:$addr)>; +def : Pat<(br bb:$addr), (BRCFLa bb:$addr)>; // brcc -def : Pat<(brcc CCSIOp:$cond, i32:$l, i32:$r, bb:$addr), - (BCRWrr (icond2cc $cond), $l, $r, bb:$addr)>; -def : Pat<(brcc CCUIOp:$cond, i32:$l, i32:$r, bb:$addr), - (BCRWir (icond2cc $cond), 0, (CMPUWrr $r, $l), bb:$addr)>; -def : Pat<(brcc CCSIOp:$cond, i64:$l, i64:$r, bb:$addr), - (BCRLrr (icond2cc $cond), $l, $r, bb:$addr)>; -def : Pat<(brcc CCUIOp:$cond, i64:$l, i64:$r, bb:$addr), - (BCRLir (icond2cc $cond), 0, (CMPULrr $r, $l), bb:$addr)>; -def : Pat<(brcc cond:$cond, f32:$l, f32:$r, bb:$addr), - (BCRSrr (fcond2cc $cond), $l, $r, bb:$addr)>; -def : Pat<(brcc cond:$cond, f64:$l, f64:$r, bb:$addr), - (BCRDrr (fcond2cc $cond), $l, $r, bb:$addr)>; +// integer brcc +multiclass BRCCIm { + def : Pat<(brcc CCSIOp:$cond, ty:$l, simm7:$r, bb:$addr), + (BrOpNode2 (icond2ccSwap $cond), (LO7 $r), $l, bb:$addr)>; + def : Pat<(brcc CCSIOp:$cond, ty:$l, ty:$r, bb:$addr), + (BrOpNode1 (icond2cc $cond), $l, $r, bb:$addr)>; + def : Pat<(brcc CCUIOp:$cond, ty:$l, simm7:$r, bb:$addr), + (BrOpNode2 (icond2cc $cond), 0, (CmpOpNode2 (LO7 $r), $l), + bb:$addr)>; + def : Pat<(brcc CCUIOp:$cond, ty:$l, ty:$r, bb:$addr), + (BrOpNode2 (icond2cc $cond), 0, (CmpOpNode1 $r, $l), bb:$addr)>; +} +defm : BRCCIm; +defm : BRCCIm; + +// floating point brcc +multiclass BRCCFm { + def : Pat<(brcc cond:$cond, ty:$l, simm7fp:$r, bb:$addr), + (BrOpNode2 (fcond2ccSwap $cond), (LO7FP $r), $l, bb:$addr)>; + def : Pat<(brcc cond:$cond, ty:$l, ty:$r, bb:$addr), + (BrOpNode1 (fcond2cc $cond), $l, $r, bb:$addr)>; +} +defm : BRCCFm; +defm : BRCCFm; //===----------------------------------------------------------------------===// // Pseudo Instructions diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 989c3da109114..bc1c336f52ba3 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -277,11 +277,19 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { #endif } - // All previously inserted BLOCK/TRY markers should be after the BLOCK - // because they are all nested blocks. + // If there is a previously placed BLOCK/TRY marker and its corresponding + // END marker is before the current BLOCK's END marker, that should be + // placed after this BLOCK. Otherwise it should be placed before this BLOCK + // marker. if (MI.getOpcode() == WebAssembly::BLOCK || - MI.getOpcode() == WebAssembly::TRY) - AfterSet.insert(&MI); + MI.getOpcode() == WebAssembly::TRY) { + if (BeginToEnd[&MI]->getParent()->getNumber() <= MBB.getNumber()) + AfterSet.insert(&MI); +#ifndef NDEBUG + else + BeforeSet.insert(&MI); +#endif + } #ifndef NDEBUG // All END_(BLOCK|LOOP|TRY) markers should be before the BLOCK. @@ -866,6 +874,10 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // In new CFG, DenseMap BrDestToExnReg; + // Destinations for branches that will be newly added, for which a new + // BLOCK/END_BLOCK markers are necessary. + SmallVector BrDests; + // Gather possibly throwing calls (i.e., previously invokes) whose current // unwind destination is not the same as the original CFG. for (auto &MBB : reverse(MF)) { @@ -1075,6 +1087,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { ? DebugLoc() : EHPadLayoutPred->rbegin()->getDebugLoc(); BuildMI(EHPadLayoutPred, DL, TII.get(WebAssembly::BR)).addMBB(Cont); + BrDests.push_back(Cont); } } @@ -1178,8 +1191,16 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // Fix predecessor-successor relationship. NestedCont->transferSuccessors(MBB); - if (EHPad) + if (EHPad) { NestedCont->removeSuccessor(EHPad); + // If EHPad does not have any predecessors left after removing + // NextedCont predecessor, remove its successor too, because this EHPad + // is not reachable from the entry BB anyway. We can't remove EHPad BB + // itself because it can contain 'catch' or 'end', which are necessary + // for keeping try-catch-end structure. + if (EHPad->pred_empty()) + EHPad->removeSuccessor(BrDest); + } MBB->addSuccessor(NestedEHPad); MBB->addSuccessor(NestedCont); NestedEHPad->addSuccessor(BrDest); @@ -1211,10 +1232,14 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // Recompute the dominator tree. getAnalysis().runOnMachineFunction(MF); - // Place block markers for newly added branches. - SmallVector BrDests; - for (auto &P : BrDestToTryRanges) - BrDests.push_back(P.first); + // Place block markers for newly added branches, if necessary. + + // If we've created an appendix BB and a branch to it, place a block/end_block + // marker for that. For some new branches, those branch destination BBs start + // with a hoisted end_try marker, so we don't need a new marker there. + if (AppendixBB) + BrDests.push_back(AppendixBB); + llvm::sort(BrDests, [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { auto ANum = A->getNumber(); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp index a511b320b56b1..c75de7aa207f3 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp @@ -46,14 +46,14 @@ bool WebAssemblyExceptionInfo::runOnMachineFunction(MachineFunction &MF) { void WebAssemblyExceptionInfo::recalculate( MachineDominatorTree &MDT, const MachineDominanceFrontier &MDF) { // Postorder traversal of the dominator tree. - SmallVector Exceptions; + SmallVector, 8> Exceptions; for (auto DomNode : post_order(&MDT)) { MachineBasicBlock *EHPad = DomNode->getBlock(); if (!EHPad->isEHPad()) continue; - auto *WE = new WebAssemblyException(EHPad); - discoverAndMapException(WE, MDT, MDF); - Exceptions.push_back(WE); + auto WE = std::make_unique(EHPad); + discoverAndMapException(WE.get(), MDT, MDF); + Exceptions.push_back(std::move(WE)); } // Add BBs to exceptions @@ -64,17 +64,21 @@ void WebAssemblyExceptionInfo::recalculate( WE->addBlock(MBB); } + SmallVector ExceptionPointers; + ExceptionPointers.reserve(Exceptions.size()); + // Add subexceptions to exceptions - for (auto *WE : Exceptions) { + for (auto &WE : Exceptions) { + ExceptionPointers.push_back(WE.get()); if (WE->getParentException()) - WE->getParentException()->getSubExceptions().push_back(WE); + WE->getParentException()->getSubExceptions().push_back(std::move(WE)); else - addTopLevelException(WE); + addTopLevelException(std::move(WE)); } // For convenience, Blocks and SubExceptions are inserted in postorder. // Reverse the lists. - for (auto *WE : Exceptions) { + for (auto *WE : ExceptionPointers) { WE->reverseBlock(); std::reverse(WE->getSubExceptions().begin(), WE->getSubExceptions().end()); } @@ -82,7 +86,6 @@ void WebAssemblyExceptionInfo::recalculate( void WebAssemblyExceptionInfo::releaseMemory() { BBMap.clear(); - DeleteContainerPointers(TopLevelExceptions); TopLevelExceptions.clear(); } @@ -181,6 +184,6 @@ raw_ostream &operator<<(raw_ostream &OS, const WebAssemblyException &WE) { } void WebAssemblyExceptionInfo::print(raw_ostream &OS, const Module *) const { - for (auto *WE : TopLevelExceptions) + for (auto &WE : TopLevelExceptions) WE->print(OS); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h index 9a90d7df7d47d..50151ec8da5aa 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h @@ -43,13 +43,12 @@ class WebAssemblyException { MachineBasicBlock *EHPad = nullptr; WebAssemblyException *ParentException = nullptr; - std::vector SubExceptions; + std::vector> SubExceptions; std::vector Blocks; SmallPtrSet BlockSet; public: WebAssemblyException(MachineBasicBlock *EHPad) : EHPad(EHPad) {} - ~WebAssemblyException() { DeleteContainerPointers(SubExceptions); } WebAssemblyException(const WebAssemblyException &) = delete; const WebAssemblyException &operator=(const WebAssemblyException &) = delete; @@ -83,14 +82,16 @@ class WebAssemblyException { unsigned getNumBlocks() const { return Blocks.size(); } std::vector &getBlocksVector() { return Blocks; } - const std::vector &getSubExceptions() const { + const std::vector> &getSubExceptions() const { return SubExceptions; } - std::vector &getSubExceptions() { + std::vector> &getSubExceptions() { return SubExceptions; } - void addSubException(WebAssemblyException *E) { SubExceptions.push_back(E); } - using iterator = typename std::vector::const_iterator; + void addSubException(std::unique_ptr E) { + SubExceptions.push_back(std::move(E)); + } + using iterator = typename decltype(SubExceptions)::const_iterator; iterator begin() const { return SubExceptions.begin(); } iterator end() const { return SubExceptions.end(); } @@ -117,7 +118,7 @@ raw_ostream &operator<<(raw_ostream &OS, const WebAssemblyException &WE); class WebAssemblyExceptionInfo final : public MachineFunctionPass { // Mapping of basic blocks to the innermost exception they occur in DenseMap BBMap; - std::vector TopLevelExceptions; + std::vector> TopLevelExceptions; void discoverAndMapException(WebAssemblyException *WE, const MachineDominatorTree &MDT, @@ -156,9 +157,9 @@ class WebAssemblyExceptionInfo final : public MachineFunctionPass { BBMap[MBB] = WE; } - void addTopLevelException(WebAssemblyException *WE) { + void addTopLevelException(std::unique_ptr WE) { assert(!WE->getParentException() && "Not a top level exception!"); - TopLevelExceptions.push_back(WE); + TopLevelExceptions.push_back(std::move(WE)); } void print(raw_ostream &OS, const Module *M = nullptr) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index f7e98804bf6e3..063f20b280889 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -761,7 +761,7 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { return false; bool IsDirect = Func != nullptr; - if (!IsDirect && isa(Call->getCalledValue())) + if (!IsDirect && isa(Call->getCalledOperand())) return false; FunctionType *FuncTy = Call->getFunctionType(); @@ -847,7 +847,7 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { unsigned CalleeReg = 0; if (!IsDirect) { - CalleeReg = getRegForValue(Call->getCalledValue()); + CalleeReg = getRegForValue(Call->getCalledOperand()); if (!CalleeReg) return false; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp index 464a54a011fcc..7abb6fa8905cc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp @@ -76,7 +76,7 @@ static void findUses(Value *V, Function &F, if (!CB) // Skip uses that aren't immediately called continue; - Value *Callee = CB->getCalledValue(); + Value *Callee = CB->getCalledOperand(); if (Callee != V) // Skip calls where the function isn't the callee continue; @@ -307,7 +307,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) { if (CallMain) { Main->setName("__original_main"); auto *MainWrapper = - cast(CallMain->getCalledValue()->stripPointerCasts()); + cast(CallMain->getCalledOperand()->stripPointerCasts()); delete CallMain; if (Main->isDeclaration()) { // The wrapper is not needed in this case as we don't need to export diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 79c4800b699d4..303b33e128d53 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -183,11 +183,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setCondCodeAction(static_cast(CC), MVT::v2i64, Custom); // 64x2 conversions are not in the spec - if (!Subtarget->hasUnimplementedSIMD128()) - for (auto Op : - {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}) - for (auto T : {MVT::v2i64, MVT::v2f64}) - setOperationAction(Op, T, Expand); + for (auto Op : + {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}) + for (auto T : {MVT::v2i64, MVT::v2f64}) + setOperationAction(Op, T, Expand); } // As a special case, these operators use the type to mean the type to diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index a9cb9177f5a20..b06a8913b0e27 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -67,10 +67,10 @@ multiclass SIMDLoadSplat simdop> { vec#".load_splat\t$off$p2align", simdop>; } -defm "" : SIMDLoadSplat<"v8x16", 194>; -defm "" : SIMDLoadSplat<"v16x8", 195>; -defm "" : SIMDLoadSplat<"v32x4", 196>; -defm "" : SIMDLoadSplat<"v64x2", 197>; +defm "" : SIMDLoadSplat<"v8x16", 7>; +defm "" : SIMDLoadSplat<"v16x8", 8>; +defm "" : SIMDLoadSplat<"v32x4", 9>; +defm "" : SIMDLoadSplat<"v64x2", 10>; def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>; def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t, @@ -116,9 +116,9 @@ multiclass SIMDLoadExtend simdop> { } } -defm "" : SIMDLoadExtend; -defm "" : SIMDLoadExtend; -defm "" : SIMDLoadExtend; +defm "" : SIMDLoadExtend; +defm "" : SIMDLoadExtend; +defm "" : SIMDLoadExtend; let Predicates = [HasUnimplementedSIMD128] in foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in @@ -144,7 +144,7 @@ defm STORE_V128 : SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), (outs), (ins P2Align:$p2align, offset32_op:$off), [], "v128.store\t${off}(${addr})$p2align, $vec", - "v128.store\t$off$p2align", 1>; + "v128.store\t$off$p2align", 11>; foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { // Def load and store patterns from WebAssemblyInstrMemory.td for vector types @@ -166,7 +166,7 @@ multiclass ConstVec { defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops, [(set V128:$dst, (vec_t pat))], "v128.const\t$dst, "#args, - "v128.const\t"#args, 2>; + "v128.const\t"#args, 12>; } defm "" : ConstVec; + 13>; // Shuffles after custom lowering def wasm_shuffle_t : SDTypeProfile<1, 18, []>; @@ -278,7 +278,7 @@ defm SWIZZLE : SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), [(set (v16i8 V128:$dst), (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], - "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 192>; + "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>; def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), (SWIZZLE V128:$src, V128:$mask)>; @@ -305,12 +305,12 @@ multiclass Splat; } -defm "" : Splat; -defm "" : Splat; -defm "" : Splat; -defm "" : Splat; -defm "" : Splat; -defm "" : Splat; +defm "" : Splat; +defm "" : Splat; +defm "" : Splat; +defm "" : Splat; +defm "" : Splat; +defm "" : Splat; // scalar_to_vector leaves high lanes undefined, so can be a splat class ScalarSplatPat; } -defm "" : ExtractLane; -defm "" : ExtractLane; -defm "" : ExtractLane; -defm "" : ExtractLane; -defm "" : ExtractLane; -defm "" : ExtractLane; -defm "" : ExtractLane; -defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; @@ -387,12 +387,12 @@ multiclass ReplaceLane; } -defm "" : ReplaceLane; -defm "" : ReplaceLane; -defm "" : ReplaceLane; -defm "" : ReplaceLane; -defm "" : ReplaceLane; -defm "" : ReplaceLane; +defm "" : ReplaceLane; +defm "" : ReplaceLane; +defm "" : ReplaceLane; +defm "" : ReplaceLane; +defm "" : ReplaceLane; +defm "" : ReplaceLane; // Lower undef lane indices to zero def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef), @@ -438,35 +438,35 @@ multiclass SIMDConditionFP baseInst> { // Equality: eq let isCommutable = 1 in { -defm EQ : SIMDConditionInt<"eq", SETEQ, 24>; -defm EQ : SIMDConditionFP<"eq", SETOEQ, 64>; +defm EQ : SIMDConditionInt<"eq", SETEQ, 35>; +defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>; } // isCommutable = 1 // Non-equality: ne let isCommutable = 1 in { -defm NE : SIMDConditionInt<"ne", SETNE, 25>; -defm NE : SIMDConditionFP<"ne", SETUNE, 65>; +defm NE : SIMDConditionInt<"ne", SETNE, 36>; +defm NE : SIMDConditionFP<"ne", SETUNE, 66>; } // isCommutable = 1 // Less than: lt_s / lt_u / lt -defm LT_S : SIMDConditionInt<"lt_s", SETLT, 26>; -defm LT_U : SIMDConditionInt<"lt_u", SETULT, 27>; -defm LT : SIMDConditionFP<"lt", SETOLT, 66>; +defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>; +defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>; +defm LT : SIMDConditionFP<"lt", SETOLT, 67>; // Greater than: gt_s / gt_u / gt -defm GT_S : SIMDConditionInt<"gt_s", SETGT, 28>; -defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 29>; -defm GT : SIMDConditionFP<"gt", SETOGT, 67>; +defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>; +defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>; +defm GT : SIMDConditionFP<"gt", SETOGT, 68>; // Less than or equal: le_s / le_u / le -defm LE_S : SIMDConditionInt<"le_s", SETLE, 30>; -defm LE_U : SIMDConditionInt<"le_u", SETULE, 31>; -defm LE : SIMDConditionFP<"le", SETOLE, 68>; +defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>; +defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>; +defm LE : SIMDConditionFP<"le", SETOLE, 69>; // Greater than or equal: ge_s / ge_u / ge -defm GE_S : SIMDConditionInt<"ge_s", SETGE, 32>; -defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 33>; -defm GE : SIMDConditionFP<"ge", SETOGE, 69>; +defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>; +defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>; +defm GE : SIMDConditionFP<"ge", SETOGE, 70>; // Lower float comparisons that don't care about NaN to standard WebAssembly // float comparisons. These instructions are generated with nnan and in the @@ -515,19 +515,19 @@ multiclass SIMDUnary; +defm NOT: SIMDUnary; -// Bitwise logic: v128.and / v128.or / v128.xor +// Bitwise logic: v128.and / v128.andnot / v128.or / v128.xor let isCommutable = 1 in { -defm AND : SIMDBitwise; -defm OR : SIMDBitwise; -defm XOR : SIMDBitwise; +defm AND : SIMDBitwise; +defm OR : SIMDBitwise; +defm XOR : SIMDBitwise; } // isCommutable = 1 // Bitwise logic: v128.andnot def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>; let Predicates = [HasUnimplementedSIMD128] in -defm ANDNOT : SIMDBitwise; +defm ANDNOT : SIMDBitwise; // Bitwise select: v128.bitselect foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in @@ -538,7 +538,7 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c) )) )], - "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 80>; + "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>; // Bitselect is equivalent to (c & v1) | (~c & v2) foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in @@ -553,9 +553,9 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in multiclass SIMDUnaryInt baseInst> { defm "" : SIMDUnary; - defm "" : SIMDUnary; - defm "" : SIMDUnary; - defm "" : SIMDUnary; + defm "" : SIMDUnary; + defm "" : SIMDUnary; + defm "" : SIMDUnary; } multiclass SIMDReduceVec baseInst> { defm "" : SIMDReduceVec; - defm "" : SIMDReduceVec; - defm "" : SIMDReduceVec; - defm "" : SIMDReduceVec; + defm "" : SIMDReduceVec; + defm "" : SIMDReduceVec; + defm "" : SIMDReduceVec; } // Integer vector negation def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; // Integer absolute value: abs -defm ABS : SIMDUnary; -defm ABS : SIMDUnary; -defm ABS : SIMDUnary; +defm ABS : SIMDUnaryInt; // Integer negation: neg -defm NEG : SIMDUnaryInt; +defm NEG : SIMDUnaryInt; // Any lane true: any_true -defm ANYTRUE : SIMDReduce; +defm ANYTRUE : SIMDReduce; // All lanes true: all_true -defm ALLTRUE : SIMDReduce; +defm ALLTRUE : SIMDReduce; // Reductions already return 0 or 1, so and 1, setne 0, and seteq 1 // can be folded out @@ -619,9 +617,9 @@ multiclass SIMDBitmask simdop> { vec#".bitmask\t$dst, $vec", vec#".bitmask", simdop>; } -defm BITMASK : SIMDBitmask; -defm BITMASK : SIMDBitmask; -defm BITMASK : SIMDBitmask; +defm BITMASK : SIMDBitmask; +defm BITMASK : SIMDBitmask; +defm BITMASK : SIMDBitmask; //===----------------------------------------------------------------------===// // Bit shifts @@ -639,19 +637,19 @@ multiclass SIMDShift baseInst> { defm "" : SIMDShift; defm "" : SIMDShift; + !add(baseInst, 32)>; defm "" : SIMDShift; + !add(baseInst, 64)>; defm "" : SIMDShift; + name, !add(baseInst, 96)>; } // Left shift by scalar: shl -defm SHL : SIMDShiftInt; +defm SHL : SIMDShiftInt; // Right shift by scalar: shr_s / shr_u -defm SHR_S : SIMDShiftInt; -defm SHR_U : SIMDShiftInt; +defm SHR_S : SIMDShiftInt; +defm SHR_U : SIMDShiftInt; // Truncate i64 shift operands to i32s, except if they are already i32s foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in { @@ -683,49 +681,49 @@ def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), I32:$x)), multiclass SIMDBinaryIntSmall baseInst> { defm "" : SIMDBinary; - defm "" : SIMDBinary; + defm "" : SIMDBinary; } multiclass SIMDBinaryIntNoI64x2 baseInst> { defm "" : SIMDBinaryIntSmall; - defm "" : SIMDBinary; + defm "" : SIMDBinary; } multiclass SIMDBinaryInt baseInst> { defm "" : SIMDBinaryIntNoI64x2; - defm "" : SIMDBinary; + defm "" : SIMDBinary; } // Integer addition: add / add_saturate_s / add_saturate_u let isCommutable = 1 in { -defm ADD : SIMDBinaryInt; -defm ADD_SAT_S : SIMDBinaryIntSmall; -defm ADD_SAT_U : SIMDBinaryIntSmall; +defm ADD : SIMDBinaryInt; +defm ADD_SAT_S : SIMDBinaryIntSmall; +defm ADD_SAT_U : SIMDBinaryIntSmall; } // isCommutable = 1 // Integer subtraction: sub / sub_saturate_s / sub_saturate_u -defm SUB : SIMDBinaryInt; +defm SUB : SIMDBinaryInt; defm SUB_SAT_S : - SIMDBinaryIntSmall; + SIMDBinaryIntSmall; defm SUB_SAT_U : - SIMDBinaryIntSmall; + SIMDBinaryIntSmall; // Integer multiplication: mul let isCommutable = 1 in -defm MUL : SIMDBinaryIntNoI64x2; +defm MUL : SIMDBinaryIntNoI64x2; // Integer min_s / min_u / max_s / max_u let isCommutable = 1 in { -defm MIN_S : SIMDBinaryIntNoI64x2; -defm MIN_U : SIMDBinaryIntNoI64x2; -defm MAX_S : SIMDBinaryIntNoI64x2; -defm MAX_U : SIMDBinaryIntNoI64x2; +defm MIN_S : SIMDBinaryIntNoI64x2; +defm MIN_U : SIMDBinaryIntNoI64x2; +defm MAX_S : SIMDBinaryIntNoI64x2; +defm MAX_U : SIMDBinaryIntNoI64x2; } // isCommutable = 1 // Integer unsigned rounding average: avgr_u let isCommutable = 1 in { -defm AVGR_U : SIMDBinary; -defm AVGR_U : SIMDBinary; +defm AVGR_U : SIMDBinary; +defm AVGR_U : SIMDBinary; } def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), @@ -747,7 +745,7 @@ let isCommutable = 1 in defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))], "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s", - 219>; + 180>; //===----------------------------------------------------------------------===// // Floating-point unary arithmetic @@ -755,17 +753,17 @@ defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), multiclass SIMDUnaryFP baseInst> { defm "" : SIMDUnary; - defm "" : SIMDUnary; + defm "" : SIMDUnary; } // Absolute value: abs -defm ABS : SIMDUnaryFP; +defm ABS : SIMDUnaryFP; // Negation: neg -defm NEG : SIMDUnaryFP; +defm NEG : SIMDUnaryFP; // Square root: sqrt -defm SQRT : SIMDUnaryFP; +defm SQRT : SIMDUnaryFP; //===----------------------------------------------------------------------===// // Floating-point binary arithmetic @@ -773,28 +771,28 @@ defm SQRT : SIMDUnaryFP; multiclass SIMDBinaryFP baseInst> { defm "" : SIMDBinary; - defm "" : SIMDBinary; + defm "" : SIMDBinary; } // Addition: add let isCommutable = 1 in -defm ADD : SIMDBinaryFP; +defm ADD : SIMDBinaryFP; // Subtraction: sub -defm SUB : SIMDBinaryFP; +defm SUB : SIMDBinaryFP; // Multiplication: mul let isCommutable = 1 in -defm MUL : SIMDBinaryFP; +defm MUL : SIMDBinaryFP; // Division: div -defm DIV : SIMDBinaryFP; +defm DIV : SIMDBinaryFP; // NaN-propagating minimum: min -defm MIN : SIMDBinaryFP; +defm MIN : SIMDBinaryFP; // NaN-propagating maximum: max -defm MAX : SIMDBinaryFP; +defm MAX : SIMDBinaryFP; //===----------------------------------------------------------------------===// // Conversions @@ -808,23 +806,13 @@ multiclass SIMDConvert; } -// Integer to floating point: convert -defm "" : SIMDConvert; -defm "" : SIMDConvert; - -let Predicates = [HasUnimplementedSIMD128] in { -defm "" : SIMDConvert; -defm "" : SIMDConvert; -} - // Floating point to integer with saturation: trunc_sat -defm "" : SIMDConvert; -defm "" : SIMDConvert; +defm "" : SIMDConvert; +defm "" : SIMDConvert; -let Predicates = [HasUnimplementedSIMD128] in { -defm "" : SIMDConvert; -defm "" : SIMDConvert; -} +// Integer to floating point: convert +defm "" : SIMDConvert; +defm "" : SIMDConvert; // Widening operations multiclass SIMDWiden; } -defm "" : SIMDWiden; -defm "" : SIMDWiden; +defm "" : SIMDWiden; +defm "" : SIMDWiden; // Narrowing operations multiclass SIMDNarrow; } -defm "" : SIMDNarrow; -defm "" : SIMDNarrow; +defm "" : SIMDNarrow; +defm "" : SIMDNarrow; // Lower llvm.wasm.trunc.saturate.* to saturating instructions def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>; def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>; -def : Pat<(v2i64 (int_wasm_trunc_saturate_signed (v2f64 V128:$src))), - (fp_to_sint_v2i64_v2f64 (v2f64 V128:$src))>; -def : Pat<(v2i64 (int_wasm_trunc_saturate_unsigned (v2f64 V128:$src))), - (fp_to_uint_v2i64_v2f64 (v2f64 V128:$src))>; // Bitcasts are nops // Matching bitcast t1 to t1 causes strange errors, so avoid repeating types @@ -902,5 +886,5 @@ multiclass SIMDQFM baseInst> { vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>; } -defm "" : SIMDQFM; -defm "" : SIMDQFM; +defm "" : SIMDQFM; +defm "" : SIMDQFM; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index be4fe0b312267..96a3e9a9d2fea 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -208,6 +208,7 @@ ///===----------------------------------------------------------------------===// #include "WebAssembly.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -258,11 +259,11 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { bool runSjLjOnFunction(Function &F); Function *getFindMatchingCatch(Module &M, unsigned NumClauses); - template Value *wrapInvoke(CallOrInvoke *CI); + Value *wrapInvoke(CallBase *CI); void wrapTestSetjmp(BasicBlock *BB, DebugLoc DL, Value *Threw, Value *SetjmpTable, Value *SetjmpTableSize, Value *&Label, Value *&LongjmpResult, BasicBlock *&EndBB); - template Function *getInvokeWrapper(CallOrInvoke *CI); + Function *getInvokeWrapper(CallBase *CI); bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); } bool canLongjmp(Module &M, const Value *Callee) const; @@ -337,7 +338,7 @@ static std::string getSignature(FunctionType *FTy) { if (FTy->isVarArg()) OS << "_..."; Sig = OS.str(); - Sig.erase(remove_if(Sig, isspace), Sig.end()); + Sig.erase(remove_if(Sig, isSpace), Sig.end()); // When s2wasm parses .s file, a comma means the end of an argument. So a // mangled function name can contain any character but a comma. std::replace(Sig.begin(), Sig.end(), ',', '.'); @@ -388,15 +389,14 @@ WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, // %__THREW__.val = __THREW__; __THREW__ = 0; // Returns %__THREW__.val, which indicates whether an exception is thrown (or // whether longjmp occurred), for future use. -template -Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { +Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) { LLVMContext &C = CI->getModule()->getContext(); // If we are calling a function that is noreturn, we must remove that // attribute. The code we insert here does expect it to return, after we // catch the exception. if (CI->doesNotReturn()) { - if (auto *F = dyn_cast(CI->getCalledValue())) + if (auto *F = CI->getCalledFunction()) F->removeFnAttr(Attribute::NoReturn); CI->removeAttribute(AttributeList::FunctionIndex, Attribute::NoReturn); } @@ -412,7 +412,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { SmallVector Args; // Put the pointer to the callee as first argument, so it can be called // within the invoke wrapper later - Args.push_back(CI->getCalledValue()); + Args.push_back(CI->getCalledOperand()); Args.append(CI->arg_begin(), CI->arg_end()); CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args); NewCall->takeName(CI); @@ -460,18 +460,10 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { } // Get matching invoke wrapper based on callee signature -template -Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) { +Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) { Module *M = CI->getModule(); SmallVector ArgTys; - Value *Callee = CI->getCalledValue(); - FunctionType *CalleeFTy; - if (auto *F = dyn_cast(Callee)) - CalleeFTy = F->getFunctionType(); - else { - auto *CalleeTy = cast(Callee->getType())->getElementType(); - CalleeFTy = cast(CalleeTy); - } + FunctionType *CalleeFTy = CI->getFunctionType(); std::string Sig = getSignature(CalleeFTy); if (InvokeWrappers.find(Sig) != InvokeWrappers.end()) @@ -764,7 +756,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { LandingPads.insert(II->getLandingPadInst()); IRB.SetInsertPoint(II); - bool NeedInvoke = AllowExceptions && canThrow(II->getCalledValue()); + bool NeedInvoke = AllowExceptions && canThrow(II->getCalledOperand()); if (NeedInvoke) { // Wrap invoke with invoke wrapper and generate preamble/postamble Value *Threw = wrapInvoke(II); @@ -779,7 +771,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { // call+branch SmallVector Args(II->arg_begin(), II->arg_end()); CallInst *NewCall = - IRB.CreateCall(II->getFunctionType(), II->getCalledValue(), Args); + IRB.CreateCall(II->getFunctionType(), II->getCalledOperand(), Args); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setDebugLoc(II->getDebugLoc()); @@ -1005,7 +997,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { if (!CI) continue; - const Value *Callee = CI->getCalledValue(); + const Value *Callee = CI->getCalledOperand(); if (!canLongjmp(M, Callee)) continue; if (isEmAsmCall(M, Callee)) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index ac8ad927d334d..9f767d391122a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -44,13 +44,14 @@ unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const { } unsigned WebAssemblyTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { unsigned Cost = BasicTTIImplBase::getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); if (auto *VTy = dyn_cast(Ty)) { switch (Opcode) { @@ -64,7 +65,7 @@ unsigned WebAssemblyTTIImpl::getArithmeticInstrCost( Opd2Info != TTI::OK_UniformConstantValue) Cost = VTy->getNumElements() * (TargetTransformInfo::TCC_Basic + - getArithmeticInstrCost(Opcode, VTy->getElementType()) + + getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) + TargetTransformInfo::TCC_Basic); break; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 2731dda10becc..79588a9f56698 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -57,6 +57,7 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { unsigned getRegisterBitWidth(bool Vector) const; unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 20b8b855430d9..b056cab7fa764 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -400,6 +400,9 @@ class X86MCInstrAnalysis : public MCInstrAnalysis { findPltEntries(uint64_t PltSectionVA, ArrayRef PltContents, uint64_t GotSectionVA, const Triple &TargetTriple) const override; + + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const override; Optional evaluateMemoryOperandAddress(const MCInst &Inst, uint64_t Addr, uint64_t Size) const override; @@ -518,6 +521,15 @@ std::vector> X86MCInstrAnalysis::findPltEntries( } } +bool X86MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target) const { + if (Inst.getNumOperands() == 0 || + Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL) + return false; + Target = Addr + Size + Inst.getOperand(0).getImm(); + return true; +} + Optional X86MCInstrAnalysis::evaluateMemoryOperandAddress( const MCInst &Inst, uint64_t Addr, uint64_t Size) const { const MCInstrDesc &MCID = Info->get(Inst.getOpcode()); diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 52dec3959c144..cf2f674aad1aa 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -156,6 +156,7 @@ void initializeX86CondBrFoldingPassPass(PassRegistry &); void initializeX86DomainReassignmentPass(PassRegistry &); void initializeX86ExecutionDomainFixPass(PassRegistry &); void initializeX86ExpandPseudoPass(PassRegistry &); +void initializeX86FixupSetCCPassPass(PassRegistry &); void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &); void initializeX86OptimizeLEAPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index c7990ba5d55aa..921c7793a6b29 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1260,6 +1260,7 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [ FeatureNOPL, Feature64Bit, FeatureSlow3OpsLEA, + FeatureSlowDivide64, FeatureSlowIncDec, FeatureMacroFusion, FeatureInsertVZEROUPPER diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index a9d94344af379..aa03217d155d5 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -407,7 +407,7 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI, static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, char Mode, raw_ostream &O) { Register Reg = MO.getReg(); - bool EmitPercent = true; + bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT; if (!X86::GR8RegClass.contains(Reg) && !X86::GR16RegClass.contains(Reg) && @@ -446,6 +446,42 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, return false; } +static bool printAsmVRegister(X86AsmPrinter &P, const MachineOperand &MO, + char Mode, raw_ostream &O) { + unsigned Reg = MO.getReg(); + bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT; + + unsigned Index; + if (X86::VR128XRegClass.contains(Reg)) + Index = Reg - X86::XMM0; + else if (X86::VR256XRegClass.contains(Reg)) + Index = Reg - X86::YMM0; + else if (X86::VR512RegClass.contains(Reg)) + Index = Reg - X86::ZMM0; + else + return true; + + switch (Mode) { + default: // Unknown mode. + return true; + case 'x': // Print V4SFmode register + Reg = X86::XMM0 + Index; + break; + case 't': // Print V8SFmode register + Reg = X86::YMM0 + Index; + break; + case 'g': // Print V16SFmode register + Reg = X86::ZMM0 + Index; + break; + } + + if (EmitPercent) + O << '%'; + + O << X86ATTInstPrinter::getRegisterName(Reg); + return false; +} + /// PrintAsmOperand - Print out an operand for an inline asm expression. /// bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -520,6 +556,14 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, PrintOperand(MI, OpNo, O); return false; + case 'x': // Print V4SFmode register + case 't': // Print V8SFmode register + case 'g': // Print V16SFmode register + if (MO.isReg()) + return printAsmVRegister(*this, MO, ExtraCode[0], O); + PrintOperand(MI, OpNo, O); + return false; + case 'P': // This is the operand of a call, treat specially. PrintPCRelImm(MI, OpNo, O); return false; diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index 438b9fd8eebb2..488ee51f1d89b 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -283,7 +283,7 @@ class InstrReplaceWithCopy : public InstrConverterBase { // A converter is identified by typedef std::pair InstrConverterBaseKeyTy; -typedef DenseMap +typedef DenseMap> InstrConverterBaseMap; /// A closure is a set of virtual register representing all of the edges in @@ -471,8 +471,8 @@ void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) { // instruction. for (int i = 0; i != NumDomains; ++i) { if (C.isLegal((RegDomain)i)) { - InstrConverterBase *IC = Converters.lookup({i, MI->getOpcode()}); - if (!IC || !IC->isLegal(MI, TII)) + auto I = Converters.find({i, MI->getOpcode()}); + if (I == Converters.end() || !I->second->isLegal(MI, TII)) C.setIllegal((RegDomain)i); } } @@ -484,8 +484,8 @@ double X86DomainReassignment::calculateCost(const Closure &C, double Cost = 0.0; for (auto *MI : C.instructions()) - Cost += - Converters.lookup({DstDomain, MI->getOpcode()})->getExtraCost(MI, MRI); + Cost += Converters.find({DstDomain, MI->getOpcode()}) + ->second->getExtraCost(MI, MRI); return Cost; } @@ -501,8 +501,8 @@ void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const { // appropriate converter. SmallVector ToErase; for (auto *MI : C.instructions()) - if (Converters.lookup({Domain, MI->getOpcode()}) - ->convertInstr(MI, TII, MRI)) + if (Converters.find({Domain, MI->getOpcode()}) + ->second->convertInstr(MI, TII, MRI)) ToErase.push_back(MI); // Iterate all registers in the closure, replace them with registers in the @@ -606,19 +606,21 @@ void X86DomainReassignment::buildClosure(Closure &C, unsigned Reg) { void X86DomainReassignment::initConverters() { Converters[{MaskDomain, TargetOpcode::PHI}] = - new InstrIgnore(TargetOpcode::PHI); + std::make_unique(TargetOpcode::PHI); Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] = - new InstrIgnore(TargetOpcode::IMPLICIT_DEF); + std::make_unique(TargetOpcode::IMPLICIT_DEF); Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] = - new InstrReplaceWithCopy(TargetOpcode::INSERT_SUBREG, 2); + std::make_unique(TargetOpcode::INSERT_SUBREG, 2); Converters[{MaskDomain, TargetOpcode::COPY}] = - new InstrCOPYReplacer(TargetOpcode::COPY, MaskDomain, TargetOpcode::COPY); + std::make_unique(TargetOpcode::COPY, MaskDomain, + TargetOpcode::COPY); auto createReplacerDstCOPY = [&](unsigned From, unsigned To) { - Converters[{MaskDomain, From}] = new InstrReplacerDstCOPY(From, To); + Converters[{MaskDomain, From}] = + std::make_unique(From, To); }; createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm); @@ -638,7 +640,7 @@ void X86DomainReassignment::initConverters() { } auto createReplacer = [&](unsigned From, unsigned To) { - Converters[{MaskDomain, From}] = new InstrReplacer(From, To); + Converters[{MaskDomain, From}] = std::make_unique(From, To); }; createReplacer(X86::MOV16rm, X86::KMOVWkm); @@ -779,8 +781,6 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { } } - DeleteContainerSeconds(Converters); - LLVM_DEBUG( dbgs() << "***** Machine Function after Domain Reassignment *****\n"); LLVM_DEBUG(MF.print(dbgs())); diff --git a/llvm/lib/Target/X86/X86FixupSetCC.cpp b/llvm/lib/Target/X86/X86FixupSetCC.cpp index 924f429fc1388..09668d7c5468c 100644 --- a/llvm/lib/Target/X86/X86FixupSetCC.cpp +++ b/llvm/lib/Target/X86/X86FixupSetCC.cpp @@ -36,6 +36,8 @@ STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted"); namespace { class X86FixupSetCCPass : public MachineFunctionPass { public: + static char ID; + X86FixupSetCCPass() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "X86 Fixup SetCC"; } @@ -47,12 +49,12 @@ class X86FixupSetCCPass : public MachineFunctionPass { const X86InstrInfo *TII = nullptr; enum { SearchBound = 16 }; - - static char ID; }; +} // end anonymous namespace char X86FixupSetCCPass::ID = 0; -} + +INITIALIZE_PASS(X86FixupSetCCPass, DEBUG_TYPE, DEBUG_TYPE, false, false) FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); } diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 4a323d8c90c0a..b538d0b407df2 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -486,7 +486,7 @@ void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, void X86FrameLowering::emitCalleeSavedFrameMoves( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL) const { + const DebugLoc &DL, bool IsPrologue) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); @@ -501,10 +501,15 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); - unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + + if (IsPrologue) { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + } else { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createRestore(nullptr, DwarfReg)); + } } } @@ -1675,7 +1680,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, } // Emit DWARF info specifying the offsets of the callee-saved registers. - emitCalleeSavedFrameMoves(MBB, MBBI, DL); + emitCalleeSavedFrameMoves(MBB, MBBI, DL, true); } // X86 Interrupt handling function cannot assume anything about the direction @@ -1825,6 +1830,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } uint64_t SEHStackAllocAmt = NumBytes; + // AfterPop is the position to insert .cfi_restore. + MachineBasicBlock::iterator AfterPop = MBBI; if (HasFP) { // Pop EBP. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), @@ -1835,6 +1842,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa( nullptr, DwarfStackPtr, -SlotSize)); + if (!MBB.succ_empty() && !MBB.isReturnBlock()) { + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI(MBB, AfterPop, DL, + MCCFIInstruction::createRestore(nullptr, DwarfFramePtr)); + --MBBI; + --AfterPop; + } --MBBI; } } @@ -1934,6 +1948,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } + // Emit DWARF info specifying the restores of the callee-saved registers. + // For epilogue with return inside or being other block without successor, + // no need to generate .cfi_restore for callee-saved registers. + if (NeedsDwarfCFI && !MBB.succ_empty() && !MBB.isReturnBlock()) { + emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false); + } + if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) { // Add the return addr area delta back since we are not tail calling. int Offset = -1 * X86FI->getTCReturnAddrDelta(); diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 700c964f3238a..9326dc9e959ac 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -60,7 +60,7 @@ class X86FrameLowering : public TargetFrameLowering { void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL) const; + const DebugLoc &DL, bool IsPrologue) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b46131305bf16..206abcb8c6656 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1466,6 +1466,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); + if (HasBWI) + setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); } for (MVT VT : { MVT::v16f32, MVT::v8f64 }) { @@ -1509,6 +1511,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); + if (HasBWI) + setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE // to 512-bit rather than use the AVX2 instructions so that we can use @@ -1521,18 +1525,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } - setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal); + setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal); + setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom); + setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); + + if (HasBWI) { + // Extends from v64i1 masks to 512-bit vectors. + setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom); + } for (auto VT : { MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::FFLOOR, VT, Legal); @@ -1662,18 +1674,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); } - if (!Subtarget.hasBWI()) { + if (HasBWI) { + for (auto VT : { MVT::v64i8, MVT::v32i16 }) { + setOperationAction(ISD::MLOAD, VT, Legal); + setOperationAction(ISD::MSTORE, VT, Legal); + } + } else { setOperationAction(ISD::STORE, MVT::v32i16, Custom); setOperationAction(ISD::STORE, MVT::v64i8, Custom); } if (Subtarget.hasVBMI2()) { - for (auto VT : { MVT::v16i32, MVT::v8i64 }) { + for (auto VT : { MVT::v32i16, MVT::v16i32, MVT::v8i64 }) { setOperationAction(ISD::FSHL, VT, Custom); setOperationAction(ISD::FSHR, VT, Custom); } } - }// has AVX-512 + }// useAVX512Regs // This block controls legalization for operations that don't have // pre-AVX512 equivalents. Without VLX we use 512-bit operations for @@ -1793,35 +1810,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); - } - - // This block controls legalization for v32i16 and v64i8. 512-bits can be - // disabled based on prefer-vector-width and required-vector-width function - // attributes. - if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) { - // Extends from v64i1 masks to 512-bit vectors. - setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom); - - setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); - - for (auto VT : { MVT::v64i8, MVT::v32i16 }) { - setOperationAction(ISD::MLOAD, VT, Legal); - setOperationAction(ISD::MSTORE, VT, Legal); - } - - for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { - setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); - } - if (Subtarget.hasVBMI2()) { - setOperationAction(ISD::FSHL, MVT::v32i16, Custom); - setOperationAction(ISD::FSHR, MVT::v32i16, Custom); - } - } - - if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom); @@ -5740,13 +5729,21 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl &Ops) { // TODO - Handle more general insert_subvector chains. if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) && - Idx == (VT.getVectorNumElements() / 2) && - Src.getOpcode() == ISD::INSERT_SUBVECTOR && - Src.getOperand(1).getValueType() == SubVT && - isNullConstant(Src.getOperand(2))) { - Ops.push_back(Src.getOperand(1)); - Ops.push_back(Sub); - return true; + Idx == (VT.getVectorNumElements() / 2)) { + // insert_subvector(insert_subvector(undef, x, lo), y, hi) + if (Src.getOpcode() == ISD::INSERT_SUBVECTOR && + Src.getOperand(1).getValueType() == SubVT && + isNullConstant(Src.getOperand(2))) { + Ops.push_back(Src.getOperand(1)); + Ops.push_back(Sub); + return true; + } + // insert_subvector(x, extract_subvector(x, lo), hi) + if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR && + Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) { + Ops.append(2, Sub); + return true; + } } } @@ -5755,13 +5752,14 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl &Ops) { static std::pair splitVector(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) { - MVT VT = Op.getSimpleValueType(); + EVT VT = Op.getValueType(); unsigned NumElems = VT.getVectorNumElements(); unsigned SizeInBits = VT.getSizeInBits(); + assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 && + "Can't split odd sized vector"); SDValue Lo = extractSubVector(Op, 0, DAG, dl, SizeInBits / 2); SDValue Hi = extractSubVector(Op, NumElems / 2, DAG, dl, SizeInBits / 2); - return std::make_pair(Lo, Hi); } @@ -6101,8 +6099,8 @@ static SDValue getExtendInVec(unsigned Opcode, const SDLoc &DL, EVT VT, // Match (xor X, -1) -> X. // Match extract_subvector(xor X, -1) -> extract_subvector(X). // Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y). -static SDValue IsNOT(SDValue V, SelectionDAG &DAG) { - V = peekThroughBitcasts(V); +static SDValue IsNOT(SDValue V, SelectionDAG &DAG, bool OneUse = false) { + V = OneUse ? peekThroughOneUseBitcasts(V) : peekThroughBitcasts(V); if (V.getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(V.getOperand(1).getNode())) return V.getOperand(0); @@ -15142,37 +15140,13 @@ static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1, int NumElements = VT.getVectorNumElements(); int SplitNumElements = NumElements / 2; MVT ScalarVT = VT.getVectorElementType(); - MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2); + MVT SplitVT = MVT::getVectorVT(ScalarVT, SplitNumElements); - // Rather than splitting build-vectors, just build two narrower build - // vectors. This helps shuffling with splats and zeros. + // Use splitVector/extractSubVector so that split build-vectors just build two + // narrower build vectors. This helps shuffling with splats and zeros. auto SplitVector = [&](SDValue V) { - V = peekThroughBitcasts(V); - - MVT OrigVT = V.getSimpleValueType(); - int OrigNumElements = OrigVT.getVectorNumElements(); - int OrigSplitNumElements = OrigNumElements / 2; - MVT OrigScalarVT = OrigVT.getVectorElementType(); - MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2); - SDValue LoV, HiV; - - auto *BV = dyn_cast(V); - if (!BV) { - LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V, - DAG.getIntPtrConstant(0, DL)); - HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V, - DAG.getIntPtrConstant(OrigSplitNumElements, DL)); - } else { - - SmallVector LoOps, HiOps; - for (int i = 0; i < OrigSplitNumElements; ++i) { - LoOps.push_back(BV->getOperand(i)); - HiOps.push_back(BV->getOperand(i + OrigSplitNumElements)); - } - LoV = DAG.getBuildVector(OrigSplitVT, DL, LoOps); - HiV = DAG.getBuildVector(OrigSplitVT, DL, HiOps); - } + std::tie(LoV, HiV) = splitVector(peekThroughBitcasts(V), DAG, DL); return std::make_pair(DAG.getBitcast(SplitVT, LoV), DAG.getBitcast(SplitVT, HiV)); }; @@ -20294,10 +20268,9 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In, return DAG.getBitcast(DstVT, Res); } - // Extract lower/upper subvectors. - unsigned NumSubElts = NumElems / 2; - SDValue Lo = extractSubVector(In, 0 * NumSubElts, DAG, DL, SrcSizeInBits / 2); - SDValue Hi = extractSubVector(In, 1 * NumSubElts, DAG, DL, SrcSizeInBits / 2); + // Split lower/upper subvectors. + SDValue Lo, Hi; + std::tie(Lo, Hi) = splitVector(In, DAG, DL); unsigned SubSizeInBits = SrcSizeInBits / 2; InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits()); @@ -20337,7 +20310,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In, // Recursively pack lower/upper subvectors, concat result and pack again. assert(SrcSizeInBits >= 256 && "Expected 256-bit vector or greater"); - EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumSubElts); + EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2); Lo = truncateVectorWithPACK(Opcode, PackedVT, Lo, DL, DAG, Subtarget); Hi = truncateVectorWithPACK(Opcode, PackedVT, Hi, DL, DAG, Subtarget); @@ -20442,7 +20415,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { if (!TLI.isTypeLegal(InVT)) { if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) && VT.is128BitVector()) { - assert(Subtarget.hasVLX() && "Unexpected subtarget!"); + assert((InVT == MVT::v16i64 || Subtarget.hasVLX()) && + "Unexpected subtarget!"); // The default behavior is to truncate one step, concatenate, and then // truncate the remainder. We'd rather produce two 64-bit results and // concatenate those. @@ -23252,14 +23226,10 @@ static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) { if (!Store->isSimple()) return SDValue(); - EVT StoreVT = StoredVal.getValueType(); - unsigned NumElems = StoreVT.getVectorNumElements(); - unsigned HalfSize = StoredVal.getValueSizeInBits() / 2; - unsigned HalfAlign = (128 == HalfSize ? 16 : 32); - SDLoc DL(Store); - SDValue Value0 = extractSubVector(StoredVal, 0, DAG, DL, HalfSize); - SDValue Value1 = extractSubVector(StoredVal, NumElems / 2, DAG, DL, HalfSize); + SDValue Value0, Value1; + std::tie(Value0, Value1) = splitVector(StoredVal, DAG, DL); + unsigned HalfAlign = (StoredVal.getValueType().is256BitVector() ? 16 : 32); SDValue Ptr0 = Store->getBasePtr(); SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, HalfAlign, DL); unsigned Alignment = Store->getAlignment(); @@ -30609,7 +30579,7 @@ bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const { } bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) + if (!VT1.isScalarInteger() || !VT2.isScalarInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); @@ -35462,6 +35432,31 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG)) return R; + // Canonicalize UNARYSHUFFLE(XOR(X,-1) -> XOR(UNARYSHUFFLE(X),-1) to + // help expose the 'NOT' pattern further up the DAG. + // TODO: This might be beneficial for any binop with a 'splattable' operand. + switch (Opcode) { + case X86ISD::MOVDDUP: + case X86ISD::PSHUFD: { + SDValue Src = N.getOperand(0); + if (Src.hasOneUse() && Src.getValueType() == VT) { + if (SDValue Not = IsNOT(Src, DAG, /*OneUse*/ true)) { + Not = DAG.getBitcast(VT, Not); + Not = Opcode == X86ISD::MOVDDUP + ? DAG.getNode(Opcode, DL, VT, Not) + : DAG.getNode(Opcode, DL, VT, Not, N.getOperand(1)); + EVT IntVT = Not.getValueType().changeTypeToInteger(); + SDValue AllOnes = DAG.getConstant(-1, DL, IntVT); + Not = DAG.getBitcast(IntVT, Not); + Not = DAG.getNode(ISD::XOR, DL, IntVT, Not, AllOnes); + return DAG.getBitcast(VT, Not); + } + } + break; + } + } + + // Handle specific target shuffles. switch (Opcode) { case X86ISD::MOVDDUP: { SDValue Src = N.getOperand(0); @@ -35667,6 +35662,64 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return SDValue(); } + case X86ISD::VZEXT_MOVL: { + SDValue N0 = N.getOperand(0); + + // If this a vzmovl of a full vector load, replace it with a vzload, unless + // the load is volatile. + if (N0.hasOneUse() && ISD::isNormalLoad(N0.getNode())) { + auto *LN = cast(N0); + if (LN->isSimple()) { + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; + SDValue VZLoad = DAG.getMemIntrinsicNode( + X86ISD::VZEXT_LOAD, DL, Tys, Ops, VT.getVectorElementType(), + LN->getPointerInfo(), LN->getAlign(), + LN->getMemOperand()->getFlags()); + DCI.CombineTo(N.getNode(), VZLoad); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); + DCI.recursivelyDeleteUnusedNodes(LN); + return N; + } + } + + // If this a VZEXT_MOVL of a VBROADCAST_LOAD, we don't need the broadcast + // and can just use a VZEXT_LOAD. + // FIXME: Is there some way to do this with SimplifyDemandedVectorElts? + if (N0.hasOneUse() && N0.getOpcode() == X86ISD::VBROADCAST_LOAD) { + auto *LN = cast(N0); + if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) { + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; + SDValue VZLoad = + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, + LN->getMemoryVT(), LN->getMemOperand()); + DCI.CombineTo(N.getNode(), VZLoad); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); + DCI.recursivelyDeleteUnusedNodes(LN); + return N; + } + } + + // Turn (v2i64 (vzext_movl (scalar_to_vector (i64 X)))) into + // (v2i64 (bitcast (v4i32 (vzext_movl (scalar_to_vector (i32 (trunc X))))))) + // if the upper bits of the i64 are zero. + if (N0.hasOneUse() && N0.getOpcode() == ISD::SCALAR_TO_VECTOR && + N0.getOperand(0).hasOneUse() && + N0.getOperand(0).getValueType() == MVT::i64) { + SDValue In = N0.getOperand(0); + APInt Mask = APInt::getHighBitsSet(64, 32); + if (DAG.MaskedValueIsZero(In, Mask)) { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, In); + MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); + SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Trunc); + SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, SclVec); + return DAG.getBitcast(VT, Movl); + } + } + + return SDValue(); + } case X86ISD::BLENDI: { SDValue N0 = N.getOperand(0); SDValue N1 = N.getOperand(1); @@ -36410,62 +36463,6 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, Movl, N->getOperand(0).getOperand(2)); } - // If this a vzmovl of a full vector load, replace it with a vzload, unless - // the load is volatile. - if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() && - ISD::isNormalLoad(N->getOperand(0).getNode())) { - LoadSDNode *LN = cast(N->getOperand(0)); - if (LN->isSimple()) { - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; - SDValue VZLoad = DAG.getMemIntrinsicNode( - X86ISD::VZEXT_LOAD, dl, Tys, Ops, VT.getVectorElementType(), - LN->getPointerInfo(), LN->getAlign(), - LN->getMemOperand()->getFlags()); - DCI.CombineTo(N, VZLoad); - DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); - DCI.recursivelyDeleteUnusedNodes(LN); - return SDValue(N, 0); - } - } - - // If this a VZEXT_MOVL of a VBROADCAST_LOAD, we don't need the broadcast and - // can just use a VZEXT_LOAD. - // FIXME: Is there some way to do this with SimplifyDemandedVectorElts? - if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() && - N->getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD) { - auto *LN = cast(N->getOperand(0)); - if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) { - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; - SDValue VZLoad = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, - LN->getMemoryVT(), LN->getMemOperand()); - DCI.CombineTo(N, VZLoad); - DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); - DCI.recursivelyDeleteUnusedNodes(LN); - return SDValue(N, 0); - } - } - - // Turn (v2i64 (vzext_movl (scalar_to_vector (i64 X)))) into - // (v2i64 (bitcast (v4i32 (vzext_movl (scalar_to_vector (i32 (trunc X))))))) - // if the upper bits of the i64 are zero. - if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() && - N->getOperand(0)->getOpcode() == ISD::SCALAR_TO_VECTOR && - N->getOperand(0).getOperand(0).hasOneUse() && - N->getOperand(0).getOperand(0).getValueType() == MVT::i64) { - SDValue In = N->getOperand(0).getOperand(0); - APInt Mask = APInt::getHighBitsSet(64, 32); - if (DAG.MaskedValueIsZero(In, Mask)) { - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, In); - MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); - SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Trunc); - SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, VecVT, SclVec); - return DAG.getBitcast(VT, Movl); - } - } - return SDValue(); } @@ -36964,6 +36961,18 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( unsigned BitWidth = OriginalDemandedBits.getBitWidth(); unsigned Opc = Op.getOpcode(); switch(Opc) { + case X86ISD::VTRUNC: { + KnownBits KnownOp; + SDValue Src = Op.getOperand(0); + MVT SrcVT = Src.getSimpleValueType(); + + // Simplify the input, using demanded bit information. + APInt TruncMask = OriginalDemandedBits.zext(SrcVT.getScalarSizeInBits()); + APInt DemandedElts = OriginalDemandedElts.trunc(SrcVT.getVectorNumElements()); + if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, KnownOp, TLO, Depth + 1)) + return true; + break; + } case X86ISD::PMULDQ: case X86ISD::PMULUDQ: { // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element. @@ -37999,12 +38008,9 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, // First, reduce the source down to 128-bit, applying BinOp to lo/hi. while (SrcVT.getSizeInBits() > 128) { - unsigned NumElts = SrcVT.getVectorNumElements(); - unsigned NumSubElts = NumElts / 2; - SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcSVT, NumSubElts); - unsigned SubSizeInBits = SrcVT.getSizeInBits(); - SDValue Lo = extractSubVector(MinPos, 0, DAG, DL, SubSizeInBits); - SDValue Hi = extractSubVector(MinPos, NumSubElts, DAG, DL, SubSizeInBits); + SDValue Lo, Hi; + std::tie(Lo, Hi) = splitVector(MinPos, DAG, DL); + SrcVT = Lo.getValueType(); MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi); } assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) || @@ -38585,12 +38591,10 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG, // vXi8 reduction - sum lo/hi halves then use PSADBW. if (VT == MVT::i8) { while (Rdx.getValueSizeInBits() > 128) { - unsigned HalfSize = VecVT.getSizeInBits() / 2; - unsigned HalfElts = VecVT.getVectorNumElements() / 2; - SDValue Lo = extractSubVector(Rdx, 0, DAG, DL, HalfSize); - SDValue Hi = extractSubVector(Rdx, HalfElts, DAG, DL, HalfSize); - Rdx = DAG.getNode(ISD::ADD, DL, Lo.getValueType(), Lo, Hi); - VecVT = Rdx.getValueType(); + SDValue Lo, Hi; + std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL); + VecVT = Lo.getValueType(); + Rdx = DAG.getNode(ISD::ADD, DL, VecVT, Lo, Hi); } assert(VecVT == MVT::v16i8 && "v16i8 reduction expected"); @@ -41256,10 +41260,27 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - if (SDValue Not = IsNOT(N0, DAG)) { + auto GetNot = [&VT, &DAG](SDValue V) { + // Basic X = NOT(Y) detection. + if (SDValue Not = IsNOT(V, DAG)) + return Not; + // Fold BROADCAST(NOT(Y)) -> BROADCAST(Y). + if (V.getOpcode() == X86ISD::VBROADCAST) { + SDValue Src = V.getOperand(0); + EVT SrcVT = Src.getValueType(); + if (!SrcVT.isVector()) + return SDValue(); + if (SDValue Not = IsNOT(Src, DAG)) + return DAG.getNode(X86ISD::VBROADCAST, SDLoc(V), VT, + DAG.getBitcast(SrcVT, Not)); + } + return SDValue(); + }; + + if (SDValue Not = GetNot(N0)) { X = Not; Y = N1; - } else if (SDValue Not = IsNOT(N1, DAG)) { + } else if (SDValue Not = GetNot(N1)) { X = Not; Y = N0; } else @@ -42052,114 +42073,6 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, return Ret; } -static SDValue combineOrShiftToFunnelShift(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - assert(N->getOpcode() == ISD::OR && "Expected ISD::OR node"); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - EVT VT = N->getValueType(0); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - - if (!TLI.isOperationLegalOrCustom(ISD::FSHL, VT) || - !TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) - return SDValue(); - - // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) - bool OptForSize = DAG.shouldOptForSize(); - unsigned Bits = VT.getScalarSizeInBits(); - - // SHLD/SHRD instructions have lower register pressure, but on some - // platforms they have higher latency than the equivalent - // series of shifts/or that would otherwise be generated. - // Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions - // have higher latencies and we are not optimizing for size. - if (!OptForSize && Subtarget.isSHLDSlow()) - return SDValue(); - - if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) - std::swap(N0, N1); - if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) - return SDValue(); - if (!N0.hasOneUse() || !N1.hasOneUse()) - return SDValue(); - - EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - - SDValue ShAmt0 = N0.getOperand(1); - if (ShAmt0.getValueType() != ShiftVT) - return SDValue(); - SDValue ShAmt1 = N1.getOperand(1); - if (ShAmt1.getValueType() != ShiftVT) - return SDValue(); - - // Peek through any modulo shift masks. - SDValue ShMsk0; - if (ShAmt0.getOpcode() == ISD::AND && - isa(ShAmt0.getOperand(1)) && - ShAmt0.getConstantOperandAPInt(1) == (Bits - 1)) { - ShMsk0 = ShAmt0; - ShAmt0 = ShAmt0.getOperand(0); - } - SDValue ShMsk1; - if (ShAmt1.getOpcode() == ISD::AND && - isa(ShAmt1.getOperand(1)) && - ShAmt1.getConstantOperandAPInt(1) == (Bits - 1)) { - ShMsk1 = ShAmt1; - ShAmt1 = ShAmt1.getOperand(0); - } - - if (ShAmt0.getOpcode() == ISD::TRUNCATE) - ShAmt0 = ShAmt0.getOperand(0); - if (ShAmt1.getOpcode() == ISD::TRUNCATE) - ShAmt1 = ShAmt1.getOperand(0); - - SDLoc DL(N); - unsigned Opc = ISD::FSHL; - SDValue Op0 = N0.getOperand(0); - SDValue Op1 = N1.getOperand(0); - if (ShAmt0.getOpcode() == ISD::SUB || ShAmt0.getOpcode() == ISD::XOR) { - Opc = ISD::FSHR; - std::swap(Op0, Op1); - std::swap(ShAmt0, ShAmt1); - std::swap(ShMsk0, ShMsk1); - } - - auto GetFunnelShift = [&DAG, &DL, VT, Opc, &ShiftVT](SDValue Op0, SDValue Op1, - SDValue Amt) { - if (Opc == ISD::FSHR) - std::swap(Op0, Op1); - return DAG.getNode(Opc, DL, VT, Op0, Op1, - DAG.getNode(ISD::TRUNCATE, DL, ShiftVT, Amt)); - }; - - // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHL( X, Y, C ) - // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHR( Y, X, C ) - if (ShAmt1.getOpcode() == ISD::XOR) { - SDValue Mask = ShAmt1.getOperand(1); - if (auto *MaskC = dyn_cast(Mask)) { - unsigned InnerShift = (ISD::FSHL == Opc ? ISD::SRL : ISD::SHL); - SDValue ShAmt1Op0 = ShAmt1.getOperand(0); - if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE) - ShAmt1Op0 = ShAmt1Op0.getOperand(0); - if (MaskC->getSExtValue() == (Bits - 1) && - (ShAmt1Op0 == ShAmt0 || ShAmt1Op0 == ShMsk0)) { - if (Op1.getOpcode() == InnerShift && - isa(Op1.getOperand(1)) && - Op1.getConstantOperandAPInt(1).isOneValue()) { - return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0); - } - // Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ). - if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD && - Op1.getOperand(0) == Op1.getOperand(1)) { - return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0); - } - } - } - } - - return SDValue(); -} - static SDValue combineOr(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -42215,9 +42128,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget)) return R; - if (SDValue R = combineOrShiftToFunnelShift(N, DAG, Subtarget)) - return R; - // Attempt to recursively combine an OR of shuffles. if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { SDValue Op(N, 0); @@ -42565,18 +42475,9 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, // A lambda checking the given SDValue is a constant vector and each element // is in the range [Min, Max]. auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) { - BuildVectorSDNode *BV = dyn_cast(V); - if (!BV || !BV->isConstant()) - return false; - for (SDValue Op : V->ops()) { - ConstantSDNode *C = dyn_cast(Op); - if (!C) - return false; - const APInt &Val = C->getAPIntValue(); - if (Val.ult(Min) || Val.ugt(Max)) - return false; - } - return true; + return ISD::matchUnaryPredicate(V, [Min, Max](ConstantSDNode *C) { + return !(C->getAPIntValue().ult(Min) || C->getAPIntValue().ugt(Max)); + }); }; // Check if each element of the vector is right-shifted by one. @@ -43448,17 +43349,6 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, // of one truncation. // i.e. if one of the inputs will constant fold or the input is repeated. switch (SrcOpcode) { - case ISD::AND: - case ISD::XOR: - case ISD::OR: { - SDValue Op0 = Src.getOperand(0); - SDValue Op1 = Src.getOperand(1); - if (TLI.isOperationLegalOrPromote(SrcOpcode, VT) && - (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1))) - return TruncateArithmetic(Op0, Op1); - break; - } - case ISD::MUL: // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its // better to truncate if we have the chance. @@ -43467,6 +43357,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, !TLI.isOperationLegal(SrcOpcode, SrcVT)) return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); LLVM_FALLTHROUGH; + case ISD::AND: + case ISD::XOR: + case ISD::OR: case ISD::ADD: { SDValue Op0 = Src.getOperand(0); SDValue Op1 = Src.getOperand(1); @@ -43623,6 +43516,13 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, // Use PACKSS if the input has sign-bits that extend all the way to the // packed/truncated value. e.g. Comparison result, sext_in_reg, etc. unsigned NumSignBits = DAG.ComputeNumSignBits(In); + + // Don't use PACKSS for vXi64 -> vXi32 truncations unless we're dealing with + // a sign splat. ComputeNumSignBits struggles to see through BITCASTs later + // on and combines/simplifications can't then use it. + if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits()) + return SDValue(); + if (NumSignBits > (InSVT.getSizeInBits() - NumPackedSignBits)) return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget); @@ -43862,7 +43762,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, return combineVectorTruncation(N, DAG, Subtarget); } -static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG) { +static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { EVT VT = N->getValueType(0); SDValue In = N->getOperand(0); SDLoc DL(N); @@ -43872,6 +43773,11 @@ static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG) { if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits())); + if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) + return SDValue(N, 0); + return SDValue(); } @@ -46163,7 +46069,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { } if (CC == X86::COND_A) { - SDValue EFLAGS = Y->getOperand(1); + SDValue EFLAGS = Y.getOperand(1); // Try to convert COND_A into COND_B in an attempt to facilitate // materializing "setb reg". // @@ -46176,13 +46082,44 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), EFLAGS.getOperand(1), EFLAGS.getOperand(0)); - SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); + SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, DAG.getVTList(VT, MVT::i32), X, DAG.getConstant(0, DL, VT), NewEFLAGS); } } + if (CC == X86::COND_AE) { + // X + SETAE --> sbb X, -1 + // X - SETAE --> adc X, -1 + return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, + DAG.getVTList(VT, MVT::i32), X, + DAG.getConstant(-1, DL, VT), Y.getOperand(1)); + } + + if (CC == X86::COND_BE) { + // X + SETBE --> sbb X, -1 + // X - SETBE --> adc X, -1 + SDValue EFLAGS = Y.getOperand(1); + // Try to convert COND_BE into COND_AE in an attempt to facilitate + // materializing "setae reg". + // + // Do not flip "e <= c", where "c" is a constant, because Cmp instruction + // cannot take an immediate as its first operand. + // + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && + EFLAGS.getValueType().isInteger() && + !isa(EFLAGS.getOperand(1))) { + SDValue NewSub = DAG.getNode( + X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); + return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, + DAG.getVTList(VT, MVT::i32), X, + DAG.getConstant(-1, DL, VT), NewEFLAGS); + } + } + if (CC != X86::COND_E && CC != X86::COND_NE) return SDValue(); @@ -46729,6 +46666,15 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, (EltSizeInBits >= 32 && MayFoldLoad(Op0.getOperand(0)))) && Op0.getOperand(0).getValueType() == VT.getScalarType()) return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0)); + + // concat_vectors(extract_subvector(broadcast(x)), + // extract_subvector(broadcast(x))) -> broadcast(x) + if (Op0.getOpcode() == ISD::EXTRACT_SUBVECTOR && + Op0.getOperand(0).getValueType() == VT) { + if (Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST || + Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD) + return Op0.getOperand(0); + } } // Repeated opcode. @@ -47570,7 +47516,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); case ISD::FNEG: return combineFneg(N, DAG, DCI, Subtarget); case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); - case X86ISD::VTRUNC: return combineVTRUNC(N, DAG); + case X86ISD::VTRUNC: return combineVTRUNC(N, DAG, DCI); case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget); case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget); @@ -47867,7 +47813,7 @@ static bool clobbersFlagRegisters(const SmallVector &AsmPieces) { } bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { - InlineAsm *IA = cast(CI->getCalledValue()); + InlineAsm *IA = cast(CI->getCalledOperand()); const std::string &AsmStr = IA->getAsmString(); diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index ac7175c63553b..3660975900e33 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2863,17 +2863,6 @@ unsigned X86::getSwappedVCMPImm(unsigned Imm) { return Imm; } -bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { - if (!MI.isTerminator()) return false; - - // Conditional branch is a special case. - if (MI.isBranch() && !MI.isBarrier()) - return true; - if (!MI.isPredicable()) - return true; - return !isPredicated(MI); -} - bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const { switch (MI.getOpcode()) { case X86::TCRETURNdi: @@ -4843,11 +4832,31 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance( // Return true for any instruction the copies the high bits of the first source // operand into the unused high bits of the destination operand. +// Also returns true for instructions that have two inputs where one may +// be undef and we want it to use the same register as the other input. static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum, bool ForLoadFold = false) { // Set the OpNum parameter to the first source operand. OpNum = 1; switch (Opcode) { + case X86::PACKSSWBrr: + case X86::PACKUSWBrr: + case X86::PACKSSDWrr: + case X86::PACKUSDWrr: + case X86::VPACKSSWBrr: + case X86::VPACKUSWBrr: + case X86::VPACKSSDWrr: + case X86::VPACKUSDWrr: + case X86::VPACKSSWBZ128rr: + case X86::VPACKUSWBZ128rr: + case X86::VPACKSSDWZ128rr: + case X86::VPACKUSDWZ128rr: + // These instructions are sometimes used with an undef second source to + // truncate 128-bit vectors to 64-bit with undefined high bits. Return + // true here so BreakFalseDeps will assign this source to the same register + // as the first source to avoid a false dependency. + OpNum = 2; + return true; case X86::VCVTSI2SSrr: case X86::VCVTSI2SSrm: case X86::VCVTSI2SSrr_Int: diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 0780783a9b1fa..fe79073ae3702 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -305,7 +305,6 @@ class X86InstrInfo final : public X86GenInstrInfo { const X86InstrFMA3Group &FMA3Group) const; // Branch analysis. - bool isUnpredicatedTerminator(const MachineInstr &MI) const override; bool isUnconditionalTailCall(const MachineInstr &MI) const override; bool canMakeTailCallConditional(SmallVectorImpl &Cond, const MachineInstr &TailCall) const override; diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index b2551b64eb0db..93d727bc3f82a 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -73,6 +73,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializeEvexToVexInstPassPass(PR); initializeFixupLEAPassPass(PR); initializeFPSPass(PR); + initializeX86FixupSetCCPassPass(PR); initializeX86CallFrameOptimizationPass(PR); initializeX86CmovConverterPassPass(PR); initializeX86ExpandPseudoPass(PR); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index ca2bf9a05fa02..98f6988266057 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -170,6 +170,7 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) { } int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -256,20 +257,25 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, // The OperandValue properties may not be the same as that of the previous // operation; conservatively assume OP_None. int Cost = - 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info, + 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info, + Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info, + Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info, + Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info, + Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info, + Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); if (ISD == ISD::SREM) { // For SREM: (X % C) is the equivalent of (X - (X/C)*C) - Cost += getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info); - Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Op1Info, Op2Info); + Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info, + Op2Info); + Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info, + Op2Info); } return Cost; @@ -277,12 +283,14 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, // Vector unsigned division/remainder will be simplified to shifts/masks. if (ISD == ISD::UDIV) - return getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info, + return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, + Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); else // UREM - return getArithmeticInstrCost(Instruction::And, Ty, Op1Info, Op2Info, + return getArithmeticInstrCost(Instruction::And, Ty, CostKind, + Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); } @@ -596,7 +604,8 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) // On AVX512, a packed v32i16 shift left by a constant build_vector // is lowered into a vector multiply (vpmullw). - return getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info, + return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, + Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); } @@ -608,7 +617,8 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) // On AVX2, a packed v16i16 shift left by a constant build_vector // is lowered into a vector multiply (vpmullw). - return getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info, + return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, + Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); @@ -916,13 +926,13 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, if (LT.second.isVector() && (ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV || ISD == ISD::UREM)) { int ScalarCost = getArithmeticInstrCost( - Opcode, Ty->getScalarType(), Op1Info, Op2Info, + Opcode, Ty->getScalarType(), CostKind, Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost; } // Fallback to the default implementation. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); } int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp, @@ -1069,9 +1079,9 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp, {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb - {TTI::SK_PermuteTwoSrc, MVT::v64i8, 1}, // vpermt2b - {TTI::SK_PermuteTwoSrc, MVT::v32i8, 1}, // vpermt2b - {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1} // vpermt2b + {TTI::SK_PermuteTwoSrc, MVT::v64i8, 2}, // vpermt2b + {TTI::SK_PermuteTwoSrc, MVT::v32i8, 2}, // vpermt2b + {TTI::SK_PermuteTwoSrc, MVT::v16i8, 2} // vpermt2b }; if (ST->hasVBMI()) @@ -1083,22 +1093,18 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp, {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb - {TTI::SK_Reverse, MVT::v32i16, 1}, // vpermw - {TTI::SK_Reverse, MVT::v16i16, 1}, // vpermw + {TTI::SK_Reverse, MVT::v32i16, 2}, // vpermw + {TTI::SK_Reverse, MVT::v16i16, 2}, // vpermw {TTI::SK_Reverse, MVT::v64i8, 2}, // pshufb + vshufi64x2 - {TTI::SK_PermuteSingleSrc, MVT::v32i16, 1}, // vpermw - {TTI::SK_PermuteSingleSrc, MVT::v16i16, 1}, // vpermw - {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // vpermw + {TTI::SK_PermuteSingleSrc, MVT::v32i16, 2}, // vpermw + {TTI::SK_PermuteSingleSrc, MVT::v16i16, 2}, // vpermw {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, // extend to v32i16 - {TTI::SK_PermuteSingleSrc, MVT::v32i8, 3}, // vpermw + zext/trunc - {TTI::SK_PermuteTwoSrc, MVT::v32i16, 1}, // vpermt2w - {TTI::SK_PermuteTwoSrc, MVT::v16i16, 1}, // vpermt2w - {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpermt2w - {TTI::SK_PermuteTwoSrc, MVT::v32i8, 3}, // zext + vpermt2w + trunc + {TTI::SK_PermuteTwoSrc, MVT::v32i16, 2}, // vpermt2w + {TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, // vpermt2w + {TTI::SK_PermuteTwoSrc, MVT::v8i16, 2}, // vpermt2w {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1 - {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3} // zext + vpermt2w + trunc }; if (ST->hasBWI()) @@ -1357,6 +1363,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp, } int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -1394,7 +1401,19 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 }, { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 }, - { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 1 }, + { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // widen to zmm + { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 }, + { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 }, }; static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = { @@ -1419,14 +1438,31 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 }, { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 }, - { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 }, - { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, // zmm vpslld+vptestmd + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // zmm vpslld+vptestmd + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // zmm vpslld+vptestmd + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, 2 }, // vpslld+vptestmd + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, // zmm vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, // zmm vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 2 }, // vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 2 }, + { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 2 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 2 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, // zmm vpmovqd { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 5 },// 2*vpmovqd+concat+vpmovdb - { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 9 }, // FIXME + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, // extend to v16i32 + { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 8 }, // Sign extend is zmm vpternlogd+vptruncdb. // Zero extend is zmm broadcast load+vptruncdw. @@ -1500,12 +1536,17 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f64, 3 }, + { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 }, + { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 3 }, + { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 3 }, + { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 }, - { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 }, - { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 }, + { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 3 }, + { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 3 }, { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 }, - { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 2 }, - { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 }, + { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 3 }, + { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 3 }, }; static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] { @@ -1530,6 +1571,17 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 }, { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 }, + + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // vpsllw+vptestmb + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // vpsllw+vptestmw + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // vpsllw+vptestmb + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // vpsllw+vptestmw + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // vpsllw+vptestmb + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // vpsllw+vptestmw + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // vpsllw+vptestmb + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // vpsllw+vptestmw + { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // vpsllw+vptestmb }; static const TypeConversionCostTblEntry AVX512DQVLConversionTbl[] = { @@ -1555,6 +1607,21 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, }; static const TypeConversionCostTblEntry AVX512VLConversionTbl[] = { + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, // sext+vpslld+vptestmd + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 8 }, // split+2*v8i8 + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 8 }, // split+2*v8i16 + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, // vpslld+vptestmd + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // vpslld+vptestmd + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // vpslld+vptestmd + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, // vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, // vpsllq+vptestmq + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, // vpmovqd + // sign extend is vpcmpeq+maskedmove+vpmovdw+vpacksswb // zero extend is vpcmpeq+maskedmove+vpmovdw+vpsrlw+vpackuswb { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 5 }, @@ -1605,6 +1672,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 }, + + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 3 }, + { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 3 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 }, { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 }, @@ -1637,12 +1708,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 3 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 3 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 }, - { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 }, - { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 }, { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 }, { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 }, @@ -1670,15 +1742,21 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 4 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 5 }, + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 4 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 }, + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 }, - { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 11 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 9 }, - { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 11 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, @@ -1717,8 +1795,15 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 }, - { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, - { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 }, + { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 }, + + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 }, + { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 }, + { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 }, // This node is expanded into scalarized operations but BasicTTI is overly // optimistic estimating its cost. It computes 3 per element (one // vector-extract, one scalar conversion and one vector-insert). The @@ -1758,7 +1843,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, - { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, + // These truncates end up widening elements. + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 1 }, // PMOVXZBQ + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 1 }, // PMOVXZWQ + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 1 }, // PMOVXZBD + + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 1 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 1 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 }, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, @@ -1769,6 +1860,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, + + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 3 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 3 }, + + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 3 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 3 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, }; static const TypeConversionCostTblEntry SSE2ConversionTbl[] = { @@ -1794,16 +1892,26 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 }, + { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 }, - { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 }, { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 6 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 }, { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 }, { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 2 }, + { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 }, { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 }, @@ -1830,11 +1938,19 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 }, + // These truncates are really widening elements. + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 1 }, // PSHUFD + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // PUNPCKLWD+DQ + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // PUNPCKLBW+WD+PSHUFD + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 1 }, // PUNPCKLWD + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // PUNPCKLBW+WD + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 1 }, // PUNPCKLBW + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // PAND+PACKUSWB - { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 }, - { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, // PAND+PACKUSWB + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, // PAND+PACKUSWB { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, - { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 3 }, // PAND+3*PACKUSWB + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 3 }, // PAND+2*PACKUSWB { ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 1 }, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 }, @@ -1861,7 +1977,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // The function getSimpleVT only handles simple value types. if (!SrcTy.isSimple() || !DstTy.isSimple()) - return BaseT::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); MVT SimpleSrcTy = SrcTy.getSimpleVT(); MVT SimpleDstTy = DstTy.getSimpleVT(); @@ -1922,10 +2038,11 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, return Entry->Cost; } - return BaseT::getCastInstrCost(Opcode, Dst, Src, I); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); } int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); @@ -2109,7 +2226,7 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) return LT.first * (ExtraCost + Entry->Cost); - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); } unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } @@ -2117,6 +2234,7 @@ unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { // Costs should match the codegen from: // BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll @@ -2577,12 +2695,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, } return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) { + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I) { static const CostTblEntry AVX512CostTbl[] = { { ISD::ROTL, MVT::v8i64, 1 }, { ISD::ROTL, MVT::v4i64, 1 }, @@ -2672,7 +2792,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, return LT.first * Entry->Cost; } - return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF, CostKind, I); } int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { @@ -2768,13 +2888,66 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; } -unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, - bool Extract) { - return BaseT::getScalarizationOverhead(Ty, Insert, Extract); +unsigned X86TTIImpl::getScalarizationOverhead(VectorType *Ty, + const APInt &DemandedElts, + bool Insert, bool Extract) { + unsigned Cost = 0; + + // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much + // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT. + if (Insert) { + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + MVT MScalarTy = LT.second.getScalarType(); + + if ((MScalarTy == MVT::i16 && ST->hasSSE2()) || + (MScalarTy.isInteger() && ST->hasSSE41()) || + (MScalarTy == MVT::f32 && ST->hasSSE41())) { + // For types we can insert directly, insertion into 128-bit sub vectors is + // cheap, followed by a cheap chain of concatenations. + if (LT.second.getSizeInBits() <= 128) { + Cost += + BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false); + } else { + unsigned NumSubVecs = LT.second.getSizeInBits() / 128; + Cost += (PowerOf2Ceil(NumSubVecs) - 1) * LT.first; + Cost += DemandedElts.countPopulation(); + + // For vXf32 cases, insertion into the 0'th index in each v4f32 + // 128-bit vector is free. + // NOTE: This assumes legalization widens vXf32 vectors. + if (MScalarTy == MVT::f32) + for (unsigned i = 0, e = Ty->getNumElements(); i < e; i += 4) + if (DemandedElts[i]) + Cost--; + } + } else if (LT.second.isVector()) { + // Without fast insertion, we need to use MOVD/MOVQ to pass each demanded + // integer element as a SCALAR_TO_VECTOR, then we build the vector as a + // series of UNPCK followed by CONCAT_VECTORS - all of these can be + // considered cheap. + if (Ty->isIntOrIntVectorTy()) + Cost += DemandedElts.countPopulation(); + + // Get the smaller of the legalized or original pow2-extended number of + // vector elements, which represents the number of unpacks we'll end up + // performing. + unsigned NumElts = LT.second.getVectorNumElements(); + unsigned Pow2Elts = PowerOf2Ceil(Ty->getNumElements()); + Cost += (std::min(NumElts, Pow2Elts) - 1) * LT.first; + } + } + + // TODO: Use default extraction for now, but we should investigate extending this + // to handle repeated subvector extraction. + if (Extract) + Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract); + + return Cost; } int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { // Handle non-power-of-two vectors such as <3 x float> if (VectorType *VTy = dyn_cast(Src)) { @@ -2793,9 +2966,11 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // Assume that all other non-power-of-two numbers are scalarized. if (!isPowerOf2_32(NumElem)) { + APInt DemandedElts = APInt::getAllOnesValue(NumElem); int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment, - AddressSpace); - int SplitCost = getScalarizationOverhead(Src, Opcode == Instruction::Load, + AddressSpace, CostKind); + int SplitCost = getScalarizationOverhead(VTy, DemandedElts, + Opcode == Instruction::Load, Opcode == Instruction::Store); return NumElem * Cost + SplitCost; } @@ -2819,14 +2994,16 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, + TTI::TargetCostKind CostKind) { bool IsLoad = (Instruction::Load == Opcode); bool IsStore = (Instruction::Store == Opcode); VectorType *SrcVTy = dyn_cast(SrcTy); if (!SrcVTy) // To calculate scalar take the regular cost, without mask - return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace); + return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace, + CostKind); unsigned NumElem = SrcVTy->getNumElements(); VectorType *MaskTy = @@ -2835,16 +3012,20 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, (IsStore && !isLegalMaskedStore(SrcVTy, MaybeAlign(Alignment))) || !isPowerOf2_32(NumElem)) { // Scalarization - int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true); + APInt DemandedElts = APInt::getAllOnesValue(NumElem); + int MaskSplitCost = + getScalarizationOverhead(MaskTy, DemandedElts, false, true); int ScalarCompareCost = getCmpSelInstrCost( - Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr); - int BranchCost = getCFInstrCost(Instruction::Br); + Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr, + CostKind); + int BranchCost = getCFInstrCost(Instruction::Br, CostKind); int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); - - int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore); + int ValueSplitCost = + getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore); int MemopCost = NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), - MaybeAlign(Alignment), AddressSpace); + MaybeAlign(Alignment), AddressSpace, + CostKind); return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; } @@ -2899,10 +3080,11 @@ int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, } int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, - bool IsPairwise) { + bool IsPairwise, + TTI::TargetCostKind CostKind) { // Just use the default implementation for pair reductions. if (IsPairwise) - return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise); + return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise, CostKind); // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput // and make it as the cost. @@ -2972,7 +3154,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, // Type needs to be split. We need LT.first - 1 arithmetic ops. VectorType *SingleOpTy = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); - ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy); + ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind); ArithmeticCost *= LT.first - 1; } @@ -3042,7 +3224,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, // Type needs to be split. We need LT.first - 1 arithmetic ops. Type *SingleOpTy = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); - ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy); + ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind); ArithmeticCost *= LT.first - 1; } @@ -3059,7 +3241,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy)) return ArithmeticCost + Entry->Cost; - return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise); + return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise, + CostKind); } unsigned NumVecElts = ValVTy->getNumElements(); @@ -3068,7 +3251,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, // Special case power of 2 reductions where the scalar type isn't changed // by type legalization. if (!isPowerOf2_32(NumVecElts) || ScalarSize != MTy.getScalarSizeInBits()) - return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise); + return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise, + CostKind); unsigned ReductionCost = 0; @@ -3077,7 +3261,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, MTy.getVectorNumElements() < ValVTy->getNumElements()) { // Type needs to be split. We need LT.first - 1 arithmetic ops. Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); - ReductionCost = getArithmeticInstrCost(Opcode, Ty); + ReductionCost = getArithmeticInstrCost(Opcode, Ty, CostKind); ReductionCost *= LT.first - 1; NumVecElts = MTy.getVectorNumElements(); } @@ -3117,13 +3301,14 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, auto *ShiftTy = VectorType::get( Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size); ReductionCost += getArithmeticInstrCost( - Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue, + Instruction::LShr, ShiftTy, CostKind, + TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OK_UniformConstantValue, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); } // Add the arithmetic op for this level. - ReductionCost += getArithmeticInstrCost(Opcode, Ty); + ReductionCost += getArithmeticInstrCost(Opcode, Ty, CostKind); } // Add the final extract element to the cost. @@ -3247,16 +3432,19 @@ int X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned) { CmpOpcode = Instruction::ICmp; } + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // Otherwise fall back to cmp+select. - return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + - getCmpSelInstrCost(Instruction::Select, Ty, CondTy, nullptr); + return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + + getCmpSelInstrCost(Instruction::Select, Ty, CondTy, CostKind); } int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, - bool IsPairwise, bool IsUnsigned) { + bool IsPairwise, bool IsUnsigned, + TTI::TargetCostKind CostKind) { // Just use the default implementation for pair reductions. if (IsPairwise) - return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned); + return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned, + CostKind); std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); @@ -3372,7 +3560,8 @@ int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, // by type legalization. if (!isPowerOf2_32(ValVTy->getNumElements()) || ScalarSize != MTy.getScalarSizeInBits()) - return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned); + return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned, + CostKind); // Now handle reduction with the legal type, taking into account size changes // at each level. @@ -3409,7 +3598,8 @@ int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, VectorType *ShiftTy = VectorType::get( Type::getIntNTy(ValTy->getContext(), Size), 128 / Size); MinMaxCost += getArithmeticInstrCost( - Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue, + Instruction::LShr, ShiftTy, TTI::TCK_RecipThroughput, + TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OK_UniformConstantValue, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); } @@ -3437,7 +3627,8 @@ int X86TTIImpl::getIntImmCost(int64_t Val) { return 2 * TTI::TCC_Basic; } -int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -3472,7 +3663,7 @@ int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { } int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) { + Type *Ty, TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -3559,17 +3750,18 @@ int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im if (Idx == ImmIdx) { int NumConstants = divideCeil(BitSize, 64); - int Cost = X86TTIImpl::getIntImmCost(Imm, Ty); + int Cost = X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); return (Cost <= NumConstants * TTI::TCC_Basic) ? static_cast(TTI::TCC_Free) : Cost; } - return X86TTIImpl::getIntImmCost(Imm, Ty); + return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); } int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -3600,11 +3792,12 @@ int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, return TTI::TCC_Free; break; } - return X86TTIImpl::getIntImmCost(Imm, Ty); + return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); } -unsigned X86TTIImpl::getUserCost(const User *U, - ArrayRef Operands) { +unsigned +X86TTIImpl::getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind) { if (isa(U)) { Value *Ptr = U->getOperand(1); // Store instruction with index and scale costs 2 Uops. @@ -3615,7 +3808,7 @@ unsigned X86TTIImpl::getUserCost(const User *U, } return TTI::TCC_Basic; } - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, CostKind); } // Return an average cost of Gather / Scatter instruction, maybe improved later @@ -3679,7 +3872,8 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr, ? ST->getGatherOverhead() : ST->getScatterOverhead(); return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), - MaybeAlign(Alignment), AddressSpace); + MaybeAlign(Alignment), AddressSpace, + TTI::TCK_RecipThroughput); } /// Return the cost of full scalarization of gather / scatter operation. @@ -3694,22 +3888,26 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy, bool VariableMask, unsigned Alignment, unsigned AddressSpace) { unsigned VF = cast(SrcVTy)->getNumElements(); + APInt DemandedElts = APInt::getAllOnesValue(VF); + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; int MaskUnpackCost = 0; if (VariableMask) { VectorType *MaskTy = VectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF); - MaskUnpackCost = getScalarizationOverhead(MaskTy, false, true); + MaskUnpackCost = + getScalarizationOverhead(MaskTy, DemandedElts, false, true); int ScalarCompareCost = getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), - nullptr); - int BranchCost = getCFInstrCost(Instruction::Br); + nullptr, CostKind); + int BranchCost = getCFInstrCost(Instruction::Br, CostKind); MaskUnpackCost += VF * (BranchCost + ScalarCompareCost); } // The cost of the scalar loads/stores. int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), - MaybeAlign(Alignment), AddressSpace); + MaybeAlign(Alignment), AddressSpace, + CostKind); int InsertExtractCost = 0; if (Opcode == Instruction::Load) @@ -3727,10 +3925,11 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy, } /// Calculate the cost of Gather / Scatter operation -int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy, - Value *Ptr, bool VariableMask, - unsigned Alignment, - const Instruction *I = nullptr) { +int X86TTIImpl::getGatherScatterOpCost( + unsigned Opcode, Type *SrcVTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) { + assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter"); unsigned VF = cast(SrcVTy)->getNumElements(); PointerType *PtrTy = dyn_cast(Ptr->getType()); @@ -4012,19 +4211,21 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); // We currently Support only fully-interleaved groups, with no gaps. // TODO: Support also strided loads (interleaved-groups with gaps). if (Indices.size() && Indices.size() != Factor) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + Alignment, AddressSpace, + CostKind); // VecTy for interleave memop is . // So, for VF=4, Interleave Factor = 3, Element type = i32 we have @@ -4036,7 +4237,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, // (see MachineValueType.h::getVectorVT()). if (!LegalVT.isVector()) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + Alignment, AddressSpace, + CostKind); unsigned VF = cast(VecTy)->getNumElements() / Factor; Type *ScalarTy = cast(VecTy)->getElementType(); @@ -4052,13 +4254,15 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, VectorType::get(cast(VecTy)->getElementType(), LegalVT.getVectorNumElements()); unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, - MaybeAlign(Alignment), AddressSpace); + MaybeAlign(Alignment), AddressSpace, + CostKind); VectorType *VT = VectorType::get(ScalarTy, VF); EVT ETy = TLI->getValueType(DL, VT); if (!ETy.isSimple()) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + Alignment, AddressSpace, + CostKind); // TODO: Complete for other data-types and strides. // Each combination of Stride, ElementTy and VF results in a different @@ -4117,7 +4321,7 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + Alignment, AddressSpace, CostKind); } // Get estimation for interleaved load/store operations and strided load. @@ -4129,12 +4333,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); // VecTy for interleave memop is . @@ -4153,7 +4358,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, VectorType::get(cast(VecTy)->getElementType(), LegalVT.getVectorNumElements()); unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, - MaybeAlign(Alignment), AddressSpace); + MaybeAlign(Alignment), AddressSpace, + CostKind); unsigned VF = cast(VecTy)->getNumElements() / Factor; MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); @@ -4256,6 +4462,7 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) { @@ -4269,14 +4476,14 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, }; if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI())) return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); if (ST->hasAVX2()) return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 1b6f741ed4de8..ee9f3a67cd3be 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -119,6 +119,7 @@ class X86TTIImpl : public BasicTTIImplBase { unsigned getMaxInterleaveFactor(unsigned VF); int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -128,64 +129,82 @@ class X86TTIImpl : public BasicTTIImplBase { int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, VectorType *SubTp); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); + unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, + bool Insert, bool Extract); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency); int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment, + TTI::TargetCostKind CostKind, const Instruction *I); int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr); unsigned getAtomicMemIntrinsicMaxElementSize() const; - int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - const Instruction *I = nullptr); + int getIntrinsicInstrCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, + FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); + int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, const Instruction *I = nullptr); + unsigned VF = 1, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm); + bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency); int getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned); int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, bool IsUnsigned); + bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); int getIntImmCost(int64_t); - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - unsigned getUserCost(const User *U, ArrayRef Operands); + unsigned getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2); bool canMacroFuseCmp(); diff --git a/llvm/lib/Target/X86/X86WinEHState.cpp b/llvm/lib/Target/X86/X86WinEHState.cpp index a409907780db6..553acdf6d9ca1 100644 --- a/llvm/lib/Target/X86/X86WinEHState.cpp +++ b/llvm/lib/Target/X86/X86WinEHState.cpp @@ -754,7 +754,7 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) { auto *Call = dyn_cast(&I); if (!Call) continue; - if (Call->getCalledValue()->stripPointerCasts() != + if (Call->getCalledOperand()->stripPointerCasts() != SetJmp3.getCallee()->stripPointerCasts()) continue; diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 3ea53126d8506..d622c16722031 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -59,10 +59,7 @@ class LibOptTable : public opt::OptTable { } -static std::string getOutputPath(opt::InputArgList *Args, - const NewArchiveMember &FirstMember) { - if (auto *Arg = Args->getLastArg(OPT_out)) - return Arg->getValue(); +static std::string getDefaultOutputPath(const NewArchiveMember &FirstMember) { SmallString<128> Val = StringRef(FirstMember.Buf->getBufferIdentifier()); sys::path::replace_extension(Val, ".lib"); return std::string(Val.str()); @@ -292,8 +289,9 @@ int llvm::libDriverMain(ArrayRef ArgsArr) { return 0; } - // If no input files, silently do nothing to match lib.exe. - if (!Args.hasArgNoClaim(OPT_INPUT)) + // If no input files and not told otherwise, silently do nothing to match + // lib.exe + if (!Args.hasArgNoClaim(OPT_INPUT) && !Args.hasArg(OPT_llvmlibempty)) return 0; if (Args.hasArg(OPT_lst)) { @@ -352,7 +350,15 @@ int llvm::libDriverMain(ArrayRef ArgsArr) { } // Create an archive file. - std::string OutputPath = getOutputPath(&Args, Members[0]); + std::string OutputPath; + if (auto *Arg = Args.getLastArg(OPT_out)) { + OutputPath = Arg->getValue(); + } else if (!Members.empty()) { + OutputPath = getDefaultOutputPath(Members[0]); + } else { + llvm::errs() << "no output path given, and cannot infer with no inputs\n"; + return 1; + } // llvm-lib uses relative paths for both regular and thin archives, unlike // standard GNU ar, which only uses relative paths for thin archives and // basenames for regular archives. diff --git a/llvm/lib/ToolDrivers/llvm-lib/Options.td b/llvm/lib/ToolDrivers/llvm-lib/Options.td index 7863196126a85..5891e238a328d 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/Options.td +++ b/llvm/lib/ToolDrivers/llvm-lib/Options.td @@ -18,6 +18,9 @@ def out : P<"out", "Path to file to write output">; def llvmlibthin : F<"llvmlibthin">, HelpText<"Make .lib point to .obj files instead of copying their contents">; +def llvmlibempty : F<"llvmlibempty">, + HelpText<"When given no contents, produce an empty .lib file">; + def machine: P<"machine", "Specify target platform">; def help : F<"help">; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 7a1d32896f96f..1e0bdc168e5ba 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -584,7 +584,7 @@ void CoroCloner::replaceEntryBlock() { // Move any allocas into Entry that weren't moved into the frame. for (auto IT = OldEntry->begin(), End = OldEntry->end(); IT != End;) { Instruction &I = *IT++; - if (!isa(&I) || I.getNumUses() == 0) + if (!isa(&I) || I.use_empty()) continue; I.moveBefore(*Entry, Entry->getFirstInsertionPt()); @@ -1167,7 +1167,7 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend, if (!CB) return false; - auto *Callee = CB->getCalledValue()->stripPointerCasts(); + auto *Callee = CB->getCalledOperand()->stripPointerCasts(); // See if the callsite is for resumption or destruction of the coroutine. auto *SubFn = dyn_cast(Callee); @@ -1197,7 +1197,7 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend, } // Grab the CalledValue from CB before erasing the CallInstr. - auto *CalledValue = CB->getCalledValue(); + auto *CalledValue = CB->getCalledOperand(); CB->eraseFromParent(); // If no more users remove it. Usually it is a bitcast of SubFn. diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index ba08061be9d3c..71595a8e23419 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -258,8 +258,9 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP, return HasChanged; } -const IRPosition IRPosition::EmptyKey(255); -const IRPosition IRPosition::TombstoneKey(256); +const IRPosition IRPosition::EmptyKey(DenseMapInfo::getEmptyKey()); +const IRPosition + IRPosition::TombstoneKey(DenseMapInfo::getTombstoneKey()); SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { IRPositions.emplace_back(IRP); @@ -401,52 +402,60 @@ bool IRPosition::getAttrsFromAssumes(Attribute::AttrKind AK, void IRPosition::verify() { #ifdef EXPENSIVE_CHECKS - switch (KindOrArgNo) { - default: - assert(KindOrArgNo >= 0 && "Expected argument or call site argument!"); - assert((isa(AnchorVal) || isa(AnchorVal)) && - "Expected call base or argument for positive attribute index!"); - if (isa(AnchorVal)) { - assert(cast(AnchorVal)->getArgNo() == unsigned(getArgNo()) && - "Argument number mismatch!"); - assert(cast(AnchorVal) == &getAssociatedValue() && - "Associated value mismatch!"); - } else { - assert(cast(*AnchorVal).arg_size() > unsigned(getArgNo()) && - "Call site argument number mismatch!"); - assert(cast(*AnchorVal).getArgOperand(getArgNo()) == - &getAssociatedValue() && - "Associated value mismatch!"); - } - break; + switch (getPositionKind()) { case IRP_INVALID: - assert(!AnchorVal && "Expected no value for an invalid position!"); - break; + assert(!Enc.getOpaqueValue() && + "Expected a nullptr for an invalid position!"); + return; case IRP_FLOAT: assert((!isa(&getAssociatedValue()) && !isa(&getAssociatedValue())) && "Expected specialized kind for call base and argument values!"); - break; + return; case IRP_RETURNED: - assert(isa(AnchorVal) && + assert(isa(getAsValuePtr()) && "Expected function for a 'returned' position!"); - assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); - break; + assert(getAsValuePtr() == &getAssociatedValue() && + "Associated value mismatch!"); + return; case IRP_CALL_SITE_RETURNED: - assert((isa(AnchorVal)) && + assert((isa(getAsValuePtr())) && "Expected call base for 'call site returned' position!"); - assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); - break; + assert(getAsValuePtr() == &getAssociatedValue() && + "Associated value mismatch!"); + return; case IRP_CALL_SITE: - assert((isa(AnchorVal)) && + assert((isa(getAsValuePtr())) && "Expected call base for 'call site function' position!"); - assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); - break; + assert(getAsValuePtr() == &getAssociatedValue() && + "Associated value mismatch!"); + return; case IRP_FUNCTION: - assert(isa(AnchorVal) && + assert(isa(getAsValuePtr()) && "Expected function for a 'function' position!"); - assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); - break; + assert(getAsValuePtr() == &getAssociatedValue() && + "Associated value mismatch!"); + return; + case IRP_ARGUMENT: + assert(isa(getAsValuePtr()) && + "Expected argument for a 'argument' position!"); + assert(getAsValuePtr() == &getAssociatedValue() && + "Associated value mismatch!"); + return; + case IRP_CALL_SITE_ARGUMENT: { + Use *U = getAsUsePtr(); + assert(U && "Expected use for a 'call site argument' position!"); + assert(isa(U->getUser()) && + "Expected call base user for a 'call site argument' position!"); + assert(cast(U->getUser())->isArgOperand(U) && + "Expected call base argument operand for a 'call site argument' " + "position"); + assert(cast(U->getUser())->getArgOperandNo(U) == + unsigned(getArgNo()) && + "Argument number mismatch!"); + assert(U->get() == &getAssociatedValue() && "Associated value mismatch!"); + return; + } } #endif } @@ -484,18 +493,10 @@ Attributor::~Attributor() { for (AbstractAttribute *AA : AllAbstractAttributes) AA->~AbstractAttribute(); - // The Kind2AAMap objects are allocated via a BumpPtrAllocator, we call - // the destructor manually. - for (auto &It : AAMap) - It.getSecond()->~Kind2AAMapTy(); - // The QueryMapValueTy objects are allocated via a BumpPtrAllocator, we call // the destructor manually. for (auto &It : QueryMap) It.getSecond()->~QueryMapValueTy(); - - for (auto &It : ArgumentReplacementMap) - DeleteContainerPointers(It.second); } bool Attributor::isAssumedDead(const AbstractAttribute &AA, @@ -970,21 +971,19 @@ ChangeStatus Attributor::run() { // Update all abstract attribute in the work list and record the ones that // changed. - for (AbstractAttribute *AA : Worklist) - if (!AA->getState().isAtFixpoint() && + for (AbstractAttribute *AA : Worklist) { + const auto &AAState = AA->getState(); + if (!AAState.isAtFixpoint() && !isAssumedDead(*AA, nullptr, /* CheckBBLivenessOnly */ true)) { - QueriedNonFixAA = false; - if (AA->update(*this) == ChangeStatus::CHANGED) { + if (updateAA(*AA) == ChangeStatus::CHANGED) { ChangedAAs.push_back(AA); - if (!AA->getState().isValidState()) - InvalidAAs.insert(AA); - } else if (!QueriedNonFixAA) { - // If the attribute did not query any non-fix information, the state - // will not change and we can indicate that right away. - AA->getState().indicateOptimisticFixpoint(); } } - + // Use the InvalidAAs vector to propagate invalid states fast transitively + // without requiring updates. + if (!AAState.isValidState()) + InvalidAAs.insert(AA); + } // Add attributes to the changed set if they have been created in the last // iteration. @@ -1265,6 +1264,31 @@ ChangeStatus Attributor::run() { return ManifestChange; } +ChangeStatus Attributor::updateAA(AbstractAttribute &AA) { + // Use a new dependence vector for this update. + DependenceVector DV; + DependenceStack.push_back(&DV); + + auto &AAState = AA.getState(); + ChangeStatus CS = AA.update(*this); + if (DV.empty()) { + // If the attribute did not query any non-fix information, the state + // will not change and we can indicate that right away. + AAState.indicateOptimisticFixpoint(); + } + + if (!AAState.isAtFixpoint()) + rememberDependences(); + + // Verify the stack was used properly, that is we pop the dependence vector we + // put there earlier. + DependenceVector *PoppedDV = DependenceStack.pop_back_val(); + (void)PoppedDV; + assert(PoppedDV == &DV && "Inconsistent usage of the dependence stack!"); + + return CS; +} + /// Create a shallow wrapper for \p F such that \p F has internal linkage /// afterwards. It also sets the original \p F 's name to anonymous /// @@ -1296,7 +1320,7 @@ static void createShallowWrapper(Function &F) { F.setLinkage(GlobalValue::InternalLinkage); F.replaceAllUsesWith(Wrapper); - assert(F.getNumUses() == 0 && "Uses remained after wrapper was created!"); + assert(F.use_empty() && "Uses remained after wrapper was created!"); // Move the COMDAT section to the wrapper. // TODO: Check if we need to keep it for F as well. @@ -1390,13 +1414,14 @@ bool Attributor::registerFunctionSignatureRewrite( "Cannot register an invalid rewrite"); Function *Fn = Arg.getParent(); - SmallVectorImpl &ARIs = ArgumentReplacementMap[Fn]; + SmallVectorImpl> &ARIs = + ArgumentReplacementMap[Fn]; if (ARIs.empty()) ARIs.resize(Fn->arg_size()); // If we have a replacement already with less than or equal new arguments, // ignore this request. - ArgumentReplacementInfo *&ARI = ARIs[Arg.getArgNo()]; + std::unique_ptr &ARI = ARIs[Arg.getArgNo()]; if (ARI && ARI->getNumReplacementArgs() <= ReplacementTypes.size()) { LLVM_DEBUG(dbgs() << "[Attributor] Existing rewrite is preferred\n"); return false; @@ -1404,17 +1429,16 @@ bool Attributor::registerFunctionSignatureRewrite( // If we have a replacement already but we like the new one better, delete // the old. - if (ARI) - delete ARI; + ARI.reset(); LLVM_DEBUG(dbgs() << "[Attributor] Register new rewrite of " << Arg << " in " << Arg.getParent()->getName() << " with " << ReplacementTypes.size() << " replacements\n"); // Remember the replacement. - ARI = new ArgumentReplacementInfo(*this, Arg, ReplacementTypes, - std::move(CalleeRepairCB), - std::move(ACSRepairCB)); + ARI.reset(new ArgumentReplacementInfo(*this, Arg, ReplacementTypes, + std::move(CalleeRepairCB), + std::move(ACSRepairCB))); return true; } @@ -1430,7 +1454,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures( if (ToBeDeletedFunctions.count(OldFn)) continue; - const SmallVectorImpl &ARIs = It.getSecond(); + const SmallVectorImpl> &ARIs = It.getSecond(); assert(ARIs.size() == OldFn->arg_size() && "Inconsistent state!"); SmallVector NewArgumentTypes; @@ -1439,7 +1463,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures( // Collect replacement argument types and copy over existing attributes. AttributeList OldFnAttributeList = OldFn->getAttributes(); for (Argument &Arg : OldFn->args()) { - if (ArgumentReplacementInfo *ARI = ARIs[Arg.getArgNo()]) { + if (const std::unique_ptr &ARI = ARIs[Arg.getArgNo()]) { NewArgumentTypes.append(ARI->ReplacementTypes.begin(), ARI->ReplacementTypes.end()); NewArgumentAttributes.append(ARI->getNumReplacementArgs(), @@ -1501,7 +1525,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures( for (unsigned OldArgNum = 0; OldArgNum < ARIs.size(); ++OldArgNum) { unsigned NewFirstArgNum = NewArgOperands.size(); (void)NewFirstArgNum; // only used inside assert. - if (ArgumentReplacementInfo *ARI = ARIs[OldArgNum]) { + if (const std::unique_ptr &ARI = ARIs[OldArgNum]) { if (ARI->ACSRepairCB) ARI->ACSRepairCB(*ARI, ACS, NewArgOperands); assert(ARI->getNumReplacementArgs() + NewFirstArgNum == @@ -1566,7 +1590,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures( auto NewFnArgIt = NewFn->arg_begin(); for (unsigned OldArgNum = 0; OldArgNum < ARIs.size(); ++OldArgNum, ++OldFnArgIt) { - if (ArgumentReplacementInfo *ARI = ARIs[OldArgNum]) { + if (const std::unique_ptr &ARI = + ARIs[OldArgNum]) { if (ARI->CalleeRepairCB) ARI->CalleeRepairCB(*ARI, *NewFn, NewFnArgIt); NewFnArgIt += ARI->ReplacementTypes.size(); @@ -1679,18 +1704,29 @@ InformationCache::FunctionInfo::~FunctionInfo() { void Attributor::recordDependence(const AbstractAttribute &FromAA, const AbstractAttribute &ToAA, DepClassTy DepClass) { + // If we are outside of an update, thus before the actual fixpoint iteration + // started (= when we create AAs), we do not track dependences because we will + // put all AAs into the initial worklist anyway. + if (DependenceStack.empty()) + return; if (FromAA.getState().isAtFixpoint()) return; + DependenceStack.back()->push_back({&FromAA, &ToAA, DepClass}); +} - QueryMapValueTy *&DepAAs = QueryMap[&FromAA]; - if (!DepAAs) - DepAAs = new (Allocator) QueryMapValueTy(); +void Attributor::rememberDependences() { + assert(!DependenceStack.empty() && "No dependences to remember!"); - if (DepClass == DepClassTy::REQUIRED) - DepAAs->RequiredAAs.insert(const_cast(&ToAA)); - else - DepAAs->OptionalAAs.insert(const_cast(&ToAA)); - QueriedNonFixAA = true; + for (DepInfo &DI : *DependenceStack.back()) { + QueryMapValueTy *&DepAAs = QueryMap[DI.FromAA]; + if (!DepAAs) + DepAAs = new (Allocator) QueryMapValueTy(); + + if (DI.DepClass == DepClassTy::REQUIRED) + DepAAs->RequiredAAs.insert(const_cast(DI.ToAA)); + else + DepAAs->OptionalAAs.insert(const_cast(DI.ToAA)); + } } void Attributor::identifyDefaultAbstractAttributes(Function &F) { @@ -1818,14 +1854,14 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { } auto CallSitePred = [&](Instruction &I) -> bool { - auto *CB = dyn_cast(&I); - IRPosition CBRetPos = IRPosition::callsite_returned(*CB); + auto &CB = cast(I); + IRPosition CBRetPos = IRPosition::callsite_returned(CB); // Call sites might be dead if they do not have side effects and no live // users. The return value might be dead if there are no live users. getOrCreateAAFor(CBRetPos); - Function *Callee = CB->getCalledFunction(); + Function *Callee = CB.getCalledFunction(); // TODO: Even if the callee is not known now we might be able to simplify // the call/callee. if (!Callee) @@ -1837,18 +1873,18 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { !Callee->hasMetadata(LLVMContext::MD_callback)) return true; - if (!Callee->getReturnType()->isVoidTy() && !CB->use_empty()) { + if (!Callee->getReturnType()->isVoidTy() && !CB.use_empty()) { - IRPosition CBRetPos = IRPosition::callsite_returned(*CB); + IRPosition CBRetPos = IRPosition::callsite_returned(CB); // Call site return integer values might be limited by a constant range. if (Callee->getReturnType()->isIntegerTy()) getOrCreateAAFor(CBRetPos); } - for (int I = 0, E = CB->getNumArgOperands(); I < E; ++I) { + for (int I = 0, E = CB.getNumArgOperands(); I < E; ++I) { - IRPosition CBArgPos = IRPosition::callsite_argument(*CB, I); + IRPosition CBArgPos = IRPosition::callsite_argument(CB, I); // Every call site argument might be dead. getOrCreateAAFor(CBArgPos); @@ -1856,12 +1892,15 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Call site argument might be simplified. getOrCreateAAFor(CBArgPos); - if (!CB->getArgOperand(I)->getType()->isPointerTy()) + if (!CB.getArgOperand(I)->getType()->isPointerTy()) continue; // Call site argument attribute "non-null". getOrCreateAAFor(CBArgPos); + // Call site argument attribute "nocapture". + getOrCreateAAFor(CBArgPos); + // Call site argument attribute "no-alias". getOrCreateAAFor(CBArgPos); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index aeb1029ed002f..a8271e7432455 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -115,6 +115,7 @@ PIPE_OPERATOR(AAMemoryBehavior) PIPE_OPERATOR(AAMemoryLocation) PIPE_OPERATOR(AAValueConstantRange) PIPE_OPERATOR(AAPrivatizablePtr) +PIPE_OPERATOR(AAUndefinedBehavior) #undef PIPE_OPERATOR } // namespace llvm @@ -400,34 +401,12 @@ static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA, S ^= *T; } -/// Helper class to compose two generic deduction -template class F, template class G> -struct AAComposeTwoGenericDeduction - : public F, StateType> { - AAComposeTwoGenericDeduction(const IRPosition &IRP, Attributor &A) - : F, StateType>(IRP, A) {} - - void initialize(Attributor &A) override { - F, StateType>::initialize(A); - G::initialize(A); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - ChangeStatus ChangedF = - F, StateType>::updateImpl(A); - ChangeStatus ChangedG = G::updateImpl(A); - return ChangedF | ChangedG; - } -}; - /// Helper class for generic deduction: return value -> returned position. -template -struct AAReturnedFromReturnedValues : public Base { +template +struct AAReturnedFromReturnedValues : public BaseType { AAReturnedFromReturnedValues(const IRPosition &IRP, Attributor &A) - : Base(IRP, A) {} + : BaseType(IRP, A) {} /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { @@ -487,11 +466,11 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, } /// Helper class for generic deduction: call site argument -> argument position. -template -struct AAArgumentFromCallSiteArguments : public Base { +struct AAArgumentFromCallSiteArguments : public BaseType { AAArgumentFromCallSiteArguments(const IRPosition &IRP, Attributor &A) - : Base(IRP, A) {} + : BaseType(IRP, A) {} /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { @@ -504,11 +483,11 @@ struct AAArgumentFromCallSiteArguments : public Base { }; /// Helper class for generic replication: function returned -> cs returned. -template -struct AACallSiteReturnedFromReturned : public Base { +template +struct AACallSiteReturnedFromReturned : public BaseType { AACallSiteReturnedFromReturned(const IRPosition &IRP, Attributor &A) - : Base(IRP, A) {} + : BaseType(IRP, A) {} /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { @@ -530,151 +509,116 @@ struct AACallSiteReturnedFromReturned : public Base { } }; -/// Helper class for generic deduction using must-be-executed-context -/// Base class is required to have `followUse` method. - -/// bool followUse(Attributor &A, const Use *U, const Instruction *I) -/// U - Underlying use. -/// I - The user of the \p U. -/// `followUse` returns true if the value should be tracked transitively. - -template -struct AAFromMustBeExecutedContext : public Base { - AAFromMustBeExecutedContext(const IRPosition &IRP, Attributor &A) - : Base(IRP, A) {} - - void initialize(Attributor &A) override { - Base::initialize(A); - const IRPosition &IRP = this->getIRPosition(); - Instruction *CtxI = IRP.getCtxI(); - - if (!CtxI) - return; - - for (const Use &U : IRP.getAssociatedValue().uses()) - Uses.insert(&U); - } - - /// Helper function to accumulate uses. - void followUsesInContext(Attributor &A, - MustBeExecutedContextExplorer &Explorer, - const Instruction *CtxI, - SetVector &Uses, StateType &State) { - auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI); - for (unsigned u = 0; u < Uses.size(); ++u) { - const Use *U = Uses[u]; - if (const Instruction *UserI = dyn_cast(U->getUser())) { - bool Found = Explorer.findInContextOf(UserI, EIt, EEnd); - if (Found && Base::followUse(A, U, UserI, State)) - for (const Use &Us : UserI->uses()) - Uses.insert(&Us); - } +/// Helper function to accumulate uses. +template +static void followUsesInContext(AAType &AA, Attributor &A, + MustBeExecutedContextExplorer &Explorer, + const Instruction *CtxI, + SetVector &Uses, + StateType &State) { + auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI); + for (unsigned u = 0; u < Uses.size(); ++u) { + const Use *U = Uses[u]; + if (const Instruction *UserI = dyn_cast(U->getUser())) { + bool Found = Explorer.findInContextOf(UserI, EIt, EEnd); + if (Found && AA.followUseInMBEC(A, U, UserI, State)) + for (const Use &Us : UserI->uses()) + Uses.insert(&Us); } } +} - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - auto BeforeState = this->getState(); - auto &S = this->getState(); - Instruction *CtxI = this->getIRPosition().getCtxI(); - if (!CtxI) - return ChangeStatus::UNCHANGED; +/// Use the must-be-executed-context around \p I to add information into \p S. +/// The AAType class is required to have `followUseInMBEC` method with the +/// following signature and behaviour: +/// +/// bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I) +/// U - Underlying use. +/// I - The user of the \p U. +/// Returns true if the value should be tracked transitively. +/// +template +static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S, + Instruction &CtxI) { - MustBeExecutedContextExplorer &Explorer = - A.getInfoCache().getMustBeExecutedContextExplorer(); + // Container for (transitive) uses of the associated value. + SetVector Uses; + for (const Use &U : AA.getIRPosition().getAssociatedValue().uses()) + Uses.insert(&U); - followUsesInContext(A, Explorer, CtxI, Uses, S); + MustBeExecutedContextExplorer &Explorer = + A.getInfoCache().getMustBeExecutedContextExplorer(); - if (this->isAtFixpoint()) - return ChangeStatus::CHANGED; + followUsesInContext(AA, A, Explorer, &CtxI, Uses, S); - SmallVector BrInsts; - auto Pred = [&](const Instruction *I) { - if (const BranchInst *Br = dyn_cast(I)) - if (Br->isConditional()) - BrInsts.push_back(Br); - return true; - }; + if (S.isAtFixpoint()) + return; - // Here, accumulate conditional branch instructions in the context. We - // explore the child paths and collect the known states. The disjunction of - // those states can be merged to its own state. Let ParentState_i be a state - // to indicate the known information for an i-th branch instruction in the - // context. ChildStates are created for its successors respectively. - // - // ParentS_1 = ChildS_{1, 1} /\ ChildS_{1, 2} /\ ... /\ ChildS_{1, n_1} - // ParentS_2 = ChildS_{2, 1} /\ ChildS_{2, 2} /\ ... /\ ChildS_{2, n_2} - // ... - // ParentS_m = ChildS_{m, 1} /\ ChildS_{m, 2} /\ ... /\ ChildS_{m, n_m} - // - // Known State |= ParentS_1 \/ ParentS_2 \/... \/ ParentS_m - // - // FIXME: Currently, recursive branches are not handled. For example, we - // can't deduce that ptr must be dereferenced in below function. - // - // void f(int a, int c, int *ptr) { - // if(a) - // if (b) { - // *ptr = 0; - // } else { - // *ptr = 1; - // } - // else { - // if (b) { - // *ptr = 0; - // } else { - // *ptr = 1; - // } - // } - // } - - Explorer.checkForAllContext(CtxI, Pred); - for (const BranchInst *Br : BrInsts) { - StateType ParentState; - - // The known state of the parent state is a conjunction of children's - // known states so it is initialized with a best state. - ParentState.indicateOptimisticFixpoint(); - - for (const BasicBlock *BB : Br->successors()) { - StateType ChildState; - - size_t BeforeSize = Uses.size(); - followUsesInContext(A, Explorer, &BB->front(), Uses, ChildState); - - // Erase uses which only appear in the child. - for (auto It = Uses.begin() + BeforeSize; It != Uses.end();) - It = Uses.erase(It); - - ParentState &= ChildState; - } + SmallVector BrInsts; + auto Pred = [&](const Instruction *I) { + if (const BranchInst *Br = dyn_cast(I)) + if (Br->isConditional()) + BrInsts.push_back(Br); + return true; + }; - // Use only known state. - S += ParentState; + // Here, accumulate conditional branch instructions in the context. We + // explore the child paths and collect the known states. The disjunction of + // those states can be merged to its own state. Let ParentState_i be a state + // to indicate the known information for an i-th branch instruction in the + // context. ChildStates are created for its successors respectively. + // + // ParentS_1 = ChildS_{1, 1} /\ ChildS_{1, 2} /\ ... /\ ChildS_{1, n_1} + // ParentS_2 = ChildS_{2, 1} /\ ChildS_{2, 2} /\ ... /\ ChildS_{2, n_2} + // ... + // ParentS_m = ChildS_{m, 1} /\ ChildS_{m, 2} /\ ... /\ ChildS_{m, n_m} + // + // Known State |= ParentS_1 \/ ParentS_2 \/... \/ ParentS_m + // + // FIXME: Currently, recursive branches are not handled. For example, we + // can't deduce that ptr must be dereferenced in below function. + // + // void f(int a, int c, int *ptr) { + // if(a) + // if (b) { + // *ptr = 0; + // } else { + // *ptr = 1; + // } + // else { + // if (b) { + // *ptr = 0; + // } else { + // *ptr = 1; + // } + // } + // } + + Explorer.checkForAllContext(&CtxI, Pred); + for (const BranchInst *Br : BrInsts) { + StateType ParentState; + + // The known state of the parent state is a conjunction of children's + // known states so it is initialized with a best state. + ParentState.indicateOptimisticFixpoint(); + + for (const BasicBlock *BB : Br->successors()) { + StateType ChildState; + + size_t BeforeSize = Uses.size(); + followUsesInContext(AA, A, Explorer, &BB->front(), Uses, ChildState); + + // Erase uses which only appear in the child. + for (auto It = Uses.begin() + BeforeSize; It != Uses.end();) + It = Uses.erase(It); + + ParentState &= ChildState; } - return BeforeState == S ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; + // Use only known state. + S += ParentState; } - -private: - /// Container for (transitive) uses of the associated value. - SetVector Uses; -}; - -template -using AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext = - AAComposeTwoGenericDeduction; - -template -using AACallSiteReturnedFromReturnedAndMustBeExecutedContext = - AAComposeTwoGenericDeduction; +} /// -----------------------NoUnwind Function Attribute-------------------------- @@ -897,7 +841,7 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { // Callback to replace the uses of CB with the constant C. auto ReplaceCallSiteUsersWith = [&A](CallBase &CB, Constant &C) { - if (CB.getNumUses() == 0) + if (CB.use_empty()) return ChangeStatus::UNCHANGED; if (A.changeValueAfterManifest(CB, C)) return ChangeStatus::CHANGED; @@ -908,9 +852,7 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { if (auto *UniqueRVArg = dyn_cast(UniqueRV.getValue())) { if (UniqueRVArg->getType()->canLosslesslyBitCastTo( getAssociatedFunction()->getReturnType())) { - // TODO: This should be handled differently! - this->AnchorVal = UniqueRVArg; - this->KindOrArgNo = UniqueRVArg->getArgNo(); + getIRPosition() = IRPosition::argument(*UniqueRVArg); Changed = IRAttribute::manifest(A); } } else if (auto *RVC = dyn_cast(UniqueRV.getValue())) { @@ -1667,11 +1609,15 @@ struct AANonNullImpl : AANonNull { indicatePessimisticFixpoint(); else AANonNull::initialize(A); + + if (!getState().isAtFixpoint()) + if (Instruction *CtxI = getCtxI()) + followUsesInMBEC(*this, A, getState(), *CtxI); } - /// See AAFromMustBeExecutedContext - bool followUse(Attributor &A, const Use *U, const Instruction *I, - AANonNull::StateType &State) { + /// See followUsesInMBEC + bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I, + AANonNull::StateType &State) { bool IsNonNull = false; bool TrackUse = false; getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I, @@ -1691,22 +1637,17 @@ struct AANonNullImpl : AANonNull { }; /// NonNull attribute for a floating value. -struct AANonNullFloating - : AAFromMustBeExecutedContext { - using Base = AAFromMustBeExecutedContext; - AANonNullFloating(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} +struct AANonNullFloating : public AANonNullImpl { + AANonNullFloating(const IRPosition &IRP, Attributor &A) + : AANonNullImpl(IRP, A) {} /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { - ChangeStatus Change = Base::updateImpl(A); - if (isKnownNonNull()) - return Change; - if (!NullIsDefined) { const auto &DerefAA = A.getAAFor(*this, getIRPosition()); if (DerefAA.getAssumedDereferenceableBytes()) - return Change; + return ChangeStatus::UNCHANGED; } const DataLayout &DL = A.getDataLayout(); @@ -1758,12 +1699,9 @@ struct AANonNullReturned final /// NonNull attribute for function argument. struct AANonNullArgument final - : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext { + : AAArgumentFromCallSiteArguments { AANonNullArgument(const IRPosition &IRP, Attributor &A) - : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext( - IRP, A) {} + : AAArgumentFromCallSiteArguments(IRP, A) {} /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) } @@ -1779,12 +1717,9 @@ struct AANonNullCallSiteArgument final : AANonNullFloating { /// NonNull attribute for a call site return position. struct AANonNullCallSiteReturned final - : AACallSiteReturnedFromReturnedAndMustBeExecutedContext { + : AACallSiteReturnedFromReturned { AANonNullCallSiteReturned(const IRPosition &IRP, Attributor &A) - : AACallSiteReturnedFromReturnedAndMustBeExecutedContext( - IRP, A) {} + : AACallSiteReturnedFromReturned(IRP, A) {} /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) } @@ -2267,7 +2202,7 @@ struct AANoAliasFloating final : AANoAliasImpl { if (!CI) break; Value *Base = CI->getOperand(0); - if (Base->getNumUses() != 1) + if (!Base->hasOneUse()) break; Val = Base; } while (true); @@ -2451,7 +2386,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { Instruction *UserI = cast(U.getUser()); // If user if curr instr and only use. - if ((UserI == getCtxI()) && (UserI->getNumUses() == 1)) + if (UserI == getCtxI() && UserI->hasOneUse()) return true; const Function *ScopeFn = VIRP.getAnchorScope(); @@ -3272,6 +3207,7 @@ struct AADereferenceableImpl : AADereferenceable { : AADereferenceable(IRP, A) {} using StateType = DerefState; + /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { SmallVector Attrs; getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull}, @@ -3285,8 +3221,13 @@ struct AADereferenceableImpl : AADereferenceable { const IRPosition &IRP = this->getIRPosition(); bool IsFnInterface = IRP.isFnInterfaceKind(); Function *FnScope = IRP.getAnchorScope(); - if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) + if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) { indicatePessimisticFixpoint(); + return; + } + + if (Instruction *CtxI = getCtxI()) + followUsesInMBEC(*this, A, getState(), *CtxI); } /// See AbstractAttribute::getState() @@ -3316,9 +3257,9 @@ struct AADereferenceableImpl : AADereferenceable { return; } - /// See AAFromMustBeExecutedContext - bool followUse(Attributor &A, const Use *U, const Instruction *I, - AADereferenceable::StateType &State) { + /// See followUsesInMBEC + bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I, + AADereferenceable::StateType &State) { bool IsNonNull = false; bool TrackUse = false; int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse( @@ -3363,17 +3304,12 @@ struct AADereferenceableImpl : AADereferenceable { }; /// Dereferenceable attribute for a floating value. -struct AADereferenceableFloating - : AAFromMustBeExecutedContext { - using Base = - AAFromMustBeExecutedContext; +struct AADereferenceableFloating : AADereferenceableImpl { AADereferenceableFloating(const IRPosition &IRP, Attributor &A) - : Base(IRP, A) {} + : AADereferenceableImpl(IRP, A) {} /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { - ChangeStatus Change = Base::updateImpl(A); - const DataLayout &DL = A.getDataLayout(); auto VisitValueCB = [&](Value &V, const Instruction *, DerefState &T, @@ -3435,7 +3371,7 @@ struct AADereferenceableFloating A, getIRPosition(), *this, T, VisitValueCB, getCtxI())) return indicatePessimisticFixpoint(); - return Change | clampStateAndIndicateChange(getState(), T); + return clampStateAndIndicateChange(getState(), T); } /// See AbstractAttribute::trackStatistics() @@ -3459,10 +3395,10 @@ struct AADereferenceableReturned final /// Dereferenceable attribute for an argument struct AADereferenceableArgument final - : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext< - AADereferenceable, AADereferenceableImpl> { - using Base = AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext< - AADereferenceable, AADereferenceableImpl>; + : AAArgumentFromCallSiteArguments { + using Base = + AAArgumentFromCallSiteArguments; AADereferenceableArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} @@ -3485,10 +3421,9 @@ struct AADereferenceableCallSiteArgument final : AADereferenceableFloating { /// Dereferenceable attribute deduction for a call site return value. struct AADereferenceableCallSiteReturned final - : AACallSiteReturnedFromReturnedAndMustBeExecutedContext< - AADereferenceable, AADereferenceableImpl> { - using Base = AACallSiteReturnedFromReturnedAndMustBeExecutedContext< - AADereferenceable, AADereferenceableImpl>; + : AACallSiteReturnedFromReturned { + using Base = + AACallSiteReturnedFromReturned; AADereferenceableCallSiteReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} @@ -3594,8 +3529,13 @@ struct AAAlignImpl : AAAlign { if (getIRPosition().isFnInterfaceKind() && (!getAnchorScope() || - !A.isFunctionIPOAmendable(*getAssociatedFunction()))) + !A.isFunctionIPOAmendable(*getAssociatedFunction()))) { indicatePessimisticFixpoint(); + return; + } + + if (Instruction *CtxI = getCtxI()) + followUsesInMBEC(*this, A, getState(), *CtxI); } /// See AbstractAttribute::manifest(...). @@ -3645,9 +3585,10 @@ struct AAAlignImpl : AAAlign { Attrs.emplace_back( Attribute::getWithAlignment(Ctx, Align(getAssumedAlign()))); } - /// See AAFromMustBeExecutedContext - bool followUse(Attributor &A, const Use *U, const Instruction *I, - AAAlign::StateType &State) { + + /// See followUsesInMBEC + bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I, + AAAlign::StateType &State) { bool TrackUse = false; unsigned int KnownAlign = @@ -3666,14 +3607,11 @@ struct AAAlignImpl : AAAlign { }; /// Align attribute for a floating value. -struct AAAlignFloating : AAFromMustBeExecutedContext { - using Base = AAFromMustBeExecutedContext; - AAAlignFloating(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} +struct AAAlignFloating : AAAlignImpl { + AAAlignFloating(const IRPosition &IRP, Attributor &A) : AAAlignImpl(IRP, A) {} /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { - Base::updateImpl(A); - const DataLayout &DL = A.getDataLayout(); auto VisitValueCB = [&](Value &V, const Instruction *, @@ -3719,11 +3657,8 @@ struct AAAlignReturned final /// Align attribute for function argument. struct AAAlignArgument final - : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext { - using Base = - AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext; + : AAArgumentFromCallSiteArguments { + using Base = AAArgumentFromCallSiteArguments; AAAlignArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} /// See AbstractAttribute::manifest(...). @@ -3779,11 +3714,8 @@ struct AAAlignCallSiteArgument final : AAAlignFloating { /// Align attribute deduction for a call site return value. struct AAAlignCallSiteReturned final - : AACallSiteReturnedFromReturnedAndMustBeExecutedContext { - using Base = - AACallSiteReturnedFromReturnedAndMustBeExecutedContext; + : AACallSiteReturnedFromReturned { + using Base = AACallSiteReturnedFromReturned; AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} @@ -4534,9 +4466,13 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl { for (ReturnInst *RI : RetInsts) { if (RI->getFunction() != getAnchorScope()) continue; - LLVM_DEBUG(dbgs() << "[ValueSimplify] " << V << " -> " << *C + auto *RC = C; + if (RC->getType() != RI->getReturnValue()->getType()) + RC = ConstantExpr::getBitCast(RC, + RI->getReturnValue()->getType()); + LLVM_DEBUG(dbgs() << "[ValueSimplify] " << V << " -> " << *RC << " in " << *RI << " :: " << *this << "\n"); - if (A.changeUseAfterManifest(RI->getOperandUse(0), *C)) + if (A.changeUseAfterManifest(RI->getOperandUse(0), *RC)) Changed = ChangeStatus::CHANGED; } return true; diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp index 98a1df5cd4401..74f11fa309592 100644 --- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp +++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -385,7 +385,7 @@ static bool runCVP(Module &M) { bool Changed = false; MDBuilder MDB(M.getContext()); for (CallBase *C : Lattice.getIndirectCalls()) { - auto RegI = CVPLatticeKey(C->getCalledValue(), IPOGrouping::Register); + auto RegI = CVPLatticeKey(C->getCalledOperand(), IPOGrouping::Register); CVPLatticeVal LV = Solver.getExistingValueState(RegI); if (!LV.isFunctionSet() || LV.getFunctions().empty()) continue; diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index c750cbddd69ea..55039f42bdd5c 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -289,7 +289,8 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { bool Changed = false; for (Argument &Arg : Fn.args()) { - if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasByValOrInAllocaAttr()) { + if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && + !Arg.hasPassPointeeByValueAttr()) { if (Arg.isUsedByMetadata()) { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); Changed = true; diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 2f0783017ce62..c5837ea87f34a 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -467,14 +467,19 @@ static bool CanDoGlobalSRA(GlobalVariable *GV) { /// Copy over the debug info for a variable to its SRA replacements. static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, uint64_t FragmentOffsetInBits, - uint64_t FragmentSizeInBits, - unsigned NumElements) { + uint64_t FragmentSizeInBits) { SmallVector GVs; GV->getDebugInfo(GVs); for (auto *GVE : GVs) { DIVariable *Var = GVE->getVariable(); + Optional VarSize = Var->getSizeInBits(); + DIExpression *Expr = GVE->getExpression(); - if (NumElements > 1) { + // If the FragmentSize is smaller than the variable, + // emit a fragment expression. + // If the variable size is unknown a fragment must be + // emitted to be safe. + if (!VarSize || FragmentSizeInBits < *VarSize) { if (auto E = DIExpression::createFragmentExpression( Expr, FragmentOffsetInBits, FragmentSizeInBits)) Expr = *E; @@ -556,8 +561,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Copy over the debug info for the variable. uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType()); uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx); - transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, - STy->getNumElements()); + transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size); } else { uint64_t EltSize = DL.getTypeAllocSize(ElTy); Align EltAlign(DL.getABITypeAlignment(ElTy)); @@ -570,7 +574,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { if (NewAlign > EltAlign) NGV->setAlignment(NewAlign); transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx, - FragmentSizeInBits, GetSRASequentialNumElements(Ty)); + FragmentSizeInBits); } } @@ -658,12 +662,12 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, return false; // Storing the value. } } else if (const CallInst *CI = dyn_cast(U)) { - if (CI->getCalledValue() != V) { + if (CI->getCalledOperand() != V) { //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } } else if (const InvokeInst *II = dyn_cast(U)) { - if (II->getCalledValue() != V) { + if (II->getCalledOperand() != V) { //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } @@ -721,7 +725,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { } } else if (isa(I) || isa(I)) { CallBase *CB = cast(I); - if (CB->getCalledValue() == V) { + if (CB->getCalledOperand() == V) { // Calling through the pointer! Turn into a direct call, but be careful // that the pointer is not also being passed as an argument. CB->setCalledOperand(NewV); diff --git a/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp index a2d7bccadd371..8d05a72d68dac 100644 --- a/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -129,7 +129,7 @@ static bool PropagateConstantsIntoArguments(Function &F) { for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) { // Do we have a constant argument? if (ArgumentConstants[i].getInt() || AI->use_empty() || - AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory())) + (AI->hasByValAttr() && !F.onlyReadsMemory())) continue; Value *V = ArgumentConstants[i].getPointer(); diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 91f8f28a591b5..89eb1159c123c 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -34,8 +34,6 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" @@ -57,8 +55,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include @@ -93,6 +93,13 @@ static cl::opt DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden); +// An integer used to limit the cost of inline deferral. The default negative +// number tells shouldBeDeferred to only take the secondary cost into account. +static cl::opt + InlineDeferralScale("inline-deferral-scale", + cl::desc("Scale to limit the cost of inline deferral"), + cl::init(-1), cl::Hidden); + namespace { enum class InlinerFunctionImportStatsOpts { @@ -158,13 +165,13 @@ using InlinedArrayAllocasTy = DenseMap>; /// *actually make it to the backend*, which is really what we want. /// /// Because we don't have this information, we do this simple and useful hack. -static void mergeInlinedArrayAllocas( - Function *Caller, InlineFunctionInfo &IFI, - InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) { +static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI, + InlinedArrayAllocasTy &InlinedArrayAllocas, + int InlineHistory) { SmallPtrSet UsedAllocas; - // When processing our SCC, check to see if CS was inlined from some other - // call site. For example, if we're processing "A" in this code: + // When processing our SCC, check to see if the call site was inlined from + // some other call site. For example, if we're processing "A" in this code: // A() { B() } // B() { x = alloca ... C() } // C() { y = alloca ... } @@ -272,18 +279,18 @@ static void mergeInlinedArrayAllocas( /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static InlineResult inlineCallIfPossible( - CallBase &CS, InlineFunctionInfo &IFI, + CallBase &CB, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime, function_ref &AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { - Function *Callee = CS.getCalledFunction(); - Function *Caller = CS.getCaller(); + Function *Callee = CB.getCalledFunction(); + Function *Caller = CB.getCaller(); AAResults &AAR = AARGetter(*Callee); // Try to inline the function. Get the list of static allocas that were // inlined. - InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime); + InlineResult IR = InlineFunction(CB, IFI, &AAR, InsertLifetime); if (!IR.isSuccess()) return IR; @@ -298,18 +305,18 @@ static InlineResult inlineCallIfPossible( return IR; // success } -/// Return true if inlining of CS can block the caller from being +/// Return true if inlining of CB can block the caller from being /// inlined which is proved to be more beneficial. \p IC is the -/// estimated inline cost associated with callsite \p CS. +/// estimated inline cost associated with callsite \p CB. /// \p TotalSecondaryCost will be set to the estimated cost of inlining the -/// caller if \p CS is suppressed for inlining. +/// caller if \p CB is suppressed for inlining. static bool shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost, - function_ref GetInlineCost) { + function_ref GetInlineCost) { // For now we only handle local or inline functions. if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage()) return false; - // If the cost of inlining CS is non-positive, it is not going to prevent the + // If the cost of inlining CB is non-positive, it is not going to prevent the // caller from being inlined into its callers and hence we don't need to // defer. if (IC.getCost() <= 0) @@ -338,12 +345,8 @@ shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost, bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse(); // This bool tracks what happens if we DO inline C into B. bool InliningPreventsSomeOuterInline = false; + unsigned NumCallerUsers = 0; for (User *U : Caller->users()) { - // If the caller will not be removed (either because it does not have a - // local linkage or because the LastCallToStaticBonus has been already - // applied), then we can exit the loop early. - if (!ApplyLastCallBonus && TotalSecondaryCost >= IC.getCost()) - return false; CallBase *CS2 = dyn_cast(U); // If this isn't a call to Caller (it could be some other sort @@ -369,8 +372,13 @@ shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost, if (IC2.getCostDelta() <= CandidateCost) { InliningPreventsSomeOuterInline = true; TotalSecondaryCost += IC2.getCost(); + NumCallerUsers++; } } + + if (!InliningPreventsSomeOuterInline) + return false; + // If all outer calls to Caller would get inlined, the cost for the last // one is set very low by getInlineCost, in anticipation that Caller will // be removed entirely. We did not account for this above unless there @@ -378,7 +386,14 @@ shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost, if (ApplyLastCallBonus) TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus; - return InliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost(); + // If InlineDeferralScale is negative, then ignore the cost of primary + // inlining -- IC.getCost() multiplied by the number of callers to Caller. + if (InlineDeferralScale < 0) + return TotalSecondaryCost < IC.getCost(); + + int TotalCost = TotalSecondaryCost + IC.getCost() * NumCallerUsers; + int Allowance = IC.getCost() * InlineDeferralScale; + return TotalCost < Allowance; } static std::basic_ostream &operator<<(std::basic_ostream &R, @@ -408,51 +423,59 @@ static std::string inlineCostStr(const InlineCost &IC) { return Remark.str(); } +static void setInlineRemark(CallBase &CB, StringRef Message) { + if (!InlineRemarkAttribute) + return; + + Attribute Attr = Attribute::get(CB.getContext(), "inline-remark", Message); + CB.addAttribute(AttributeList::FunctionIndex, Attr); +} + /// Return the cost only if the inliner should attempt to inline at the given /// CallSite. If we return the cost, we will emit an optimisation remark later -/// using that cost, so we won't do so from this function. +/// using that cost, so we won't do so from this function. Return None if +/// inlining should not be attempted. static Optional -shouldInline(CallBase &CS, function_ref GetInlineCost, +shouldInline(CallBase &CB, function_ref GetInlineCost, OptimizationRemarkEmitter &ORE) { using namespace ore; - InlineCost IC = GetInlineCost(CS); - Instruction *Call = &CS; - Function *Callee = CS.getCalledFunction(); - Function *Caller = CS.getCaller(); + InlineCost IC = GetInlineCost(CB); + Instruction *Call = &CB; + Function *Callee = CB.getCalledFunction(); + Function *Caller = CB.getCaller(); if (IC.isAlways()) { LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) - << ", Call: " << CS << "\n"); - return IC; - } - - if (IC.isNever()) { - LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) - << ", Call: " << CS << "\n"); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) - << NV("Callee", Callee) << " not inlined into " - << NV("Caller", Caller) << " because it should never be inlined " - << IC; - }); + << ", Call: " << CB << "\n"); return IC; } if (!IC) { LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) - << ", Call: " << CS << "\n"); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) - << NV("Callee", Callee) << " not inlined into " - << NV("Caller", Caller) << " because too costly to inline " << IC; - }); - return IC; + << ", Call: " << CB << "\n"); + if (IC.isNever()) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) + << NV("Callee", Callee) << " not inlined into " + << NV("Caller", Caller) << " because it should never be inlined " + << IC; + }); + } else { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) + << NV("Callee", Callee) << " not inlined into " + << NV("Caller", Caller) << " because too costly to inline " + << IC; + }); + } + setInlineRemark(CB, inlineCostStr(IC)); + return None; } int TotalSecondaryCost = 0; if (shouldBeDeferred(Caller, IC, TotalSecondaryCost, GetInlineCost)) { - LLVM_DEBUG(dbgs() << " NOT Inlining: " << CS + LLVM_DEBUG(dbgs() << " NOT Inlining: " << CB << " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); ORE.emit([&]() { @@ -462,13 +485,13 @@ shouldInline(CallBase &CS, function_ref GetInlineCost, << " increases the cost of inlining " << NV("Caller", Caller) << " in other contexts"; }); - + setInlineRemark(CB, "deferred"); // IC does not bool() to false, so get an InlineCost that will. // This will not be inspected to make an error message. return None; } - LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CS + LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB << '\n'); return IC; } @@ -512,21 +535,13 @@ static void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc, }); } -static void setInlineRemark(CallBase &CS, StringRef Message) { - if (!InlineRemarkAttribute) - return; - - Attribute Attr = Attribute::get(CS.getContext(), "inline-remark", Message); - CS.addAttribute(AttributeList::FunctionIndex, Attr); -} - static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function GetAssumptionCache, ProfileSummaryInfo *PSI, std::function GetTLI, bool InsertLifetime, - function_ref GetInlineCost, + function_ref GetInlineCost, function_ref AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { SmallPtrSet SCCFunctions; @@ -557,31 +572,31 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, OptimizationRemarkEmitter ORE(F); for (BasicBlock &BB : *F) for (Instruction &I : BB) { - auto *CS = dyn_cast(&I); + auto *CB = dyn_cast(&I); // If this isn't a call, or it is a call to an intrinsic, it can // never be inlined. - if (!CS || isa(I)) + if (!CB || isa(I)) continue; // If this is a direct call to an external function, we can never inline // it. If it is an indirect call, inlining may resolve it to be a // direct call, so we keep it. - if (Function *Callee = CS->getCalledFunction()) + if (Function *Callee = CB->getCalledFunction()) if (Callee->isDeclaration()) { using namespace ore; - setInlineRemark(*CS, "unavailable definition"); + setInlineRemark(*CB, "unavailable definition"); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", CS->getCaller()) + << NV("Caller", CB->getCaller()) << " because its definition is unavailable" << setIsVerbose(); }); continue; } - CallSites.push_back(std::make_pair(CS, -1)); + CallSites.push_back(std::make_pair(CB, -1)); } } @@ -613,17 +628,17 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, // CallSites may be modified inside so ranged for loop can not be used. for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { auto &P = CallSites[CSi]; - CallBase &CS = *P.first; + CallBase &CB = *P.first; const int InlineHistoryID = P.second; - Function *Caller = CS.getCaller(); - Function *Callee = CS.getCalledFunction(); + Function *Caller = CB.getCaller(); + Function *Callee = CB.getCalledFunction(); // We can only inline direct calls to non-declarations. if (!Callee || Callee->isDeclaration()) continue; - bool IsTriviallyDead = isInstructionTriviallyDead(&CS, &GetTLI(*Caller)); + bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller)); if (!IsTriviallyDead) { // If this call site was obtained by inlining another function, verify @@ -633,7 +648,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, // infinitely inline. if (InlineHistoryID != -1 && inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) { - setInlineRemark(CS, "recursive"); + setInlineRemark(CB, "recursive"); continue; } } @@ -643,45 +658,36 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, // just become a regular analysis dependency. OptimizationRemarkEmitter ORE(Caller); - Optional OIC = shouldInline(CS, GetInlineCost, ORE); + auto OIC = shouldInline(CB, GetInlineCost, ORE); // If the policy determines that we should inline this function, // delete the call instead. - if (!OIC.hasValue()) { - setInlineRemark(CS, "deferred"); + if (!OIC) continue; - } - - if (!OIC.getValue()) { - // shouldInline() call returned a negative inline cost that explains - // why this callsite should not be inlined. - setInlineRemark(CS, inlineCostStr(*OIC)); - continue; - } // If this call site is dead and it is to a readonly function, we should // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. if (IsTriviallyDead) { - LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CS << "\n"); + LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n"); // Update the call graph by deleting the edge from Callee to Caller. - setInlineRemark(CS, "trivially dead"); - CG[Caller]->removeCallEdgeFor(CS); - CS.eraseFromParent(); + setInlineRemark(CB, "trivially dead"); + CG[Caller]->removeCallEdgeFor(CB); + CB.eraseFromParent(); ++NumCallsDeleted; } else { - // Get DebugLoc to report. CS will be invalid after Inliner. - DebugLoc DLoc = CS.getDebugLoc(); - BasicBlock *Block = CS.getParent(); + // Get DebugLoc to report. CB will be invalid after Inliner. + DebugLoc DLoc = CB.getDebugLoc(); + BasicBlock *Block = CB.getParent(); // Attempt to inline the function. using namespace ore; InlineResult IR = inlineCallIfPossible( - CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, + CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime, AARGetter, ImportedFunctionsStats); if (!IR.isSuccess()) { - setInlineRemark(CS, std::string(IR.getFailureReason()) + "; " + + setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " + inlineCostStr(*OIC)); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, @@ -777,7 +783,7 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { }; return inlineCallsImpl( SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime, - [&](CallBase &CS) { return getInlineCost(CS); }, LegacyAARGetter(*this), + [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this), ImportedFunctionsStats); } @@ -938,17 +944,17 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // FIXME: Using instructions sequence is a really bad way to do this. // Instead we should do an actual RPO walk of the function body. for (Instruction &I : instructions(N.getFunction())) - if (auto *CS = dyn_cast(&I)) - if (Function *Callee = CS->getCalledFunction()) { + if (auto *CB = dyn_cast(&I)) + if (Function *Callee = CB->getCalledFunction()) { if (!Callee->isDeclaration()) - Calls.push_back({CS, -1}); + Calls.push_back({CB, -1}); else if (!isa(I)) { using namespace ore; - setInlineRemark(*CS, "unavailable definition"); + setInlineRemark(*CB, "unavailable definition"); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", CS->getCaller()) + << NV("Caller", CB->getCaller()) << " because its definition is unavailable" << setIsVerbose(); }); @@ -1003,8 +1009,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // node however because those functions aren't going to be mutated by this // pass. FunctionAnalysisManager &FAM = - AM.getResult(*C, CG) - .getManager(); + AM.getResult(*C, CG).getManager(); // Get the remarks emission analysis for the caller. auto &ORE = FAM.getResult(F); @@ -1020,13 +1025,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, return FAM.getResult(F); }; - auto GetInlineCost = [&](CallBase &CS) { - Function &Callee = *CS.getCalledFunction(); + auto GetInlineCost = [&](CallBase &CB) { + Function &Callee = *CB.getCalledFunction(); auto &CalleeTTI = FAM.getResult(Callee); bool RemarksEnabled = Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( DEBUG_TYPE); - return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, + return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, GetTLI, PSI, RemarksEnabled ? &ORE : nullptr); }; @@ -1036,13 +1041,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, bool DidInline = false; for (; I < (int)Calls.size() && Calls[I].first->getCaller() == &F; ++I) { auto &P = Calls[I]; - CallBase *CS = P.first; + CallBase *CB = P.first; const int InlineHistoryID = P.second; - Function &Callee = *CS->getCalledFunction(); + Function &Callee = *CB->getCalledFunction(); if (InlineHistoryID != -1 && inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) { - setInlineRemark(*CS, "recursive"); + setInlineRemark(*CB, "recursive"); continue; } @@ -1056,109 +1061,105 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " "previously split out of this SCC by inlining: " << F.getName() << " -> " << Callee.getName() << "\n"); - setInlineRemark(*CS, "recursive SCC split"); + setInlineRemark(*CB, "recursive SCC split"); continue; } - Optional OIC = shouldInline(*CS, GetInlineCost, ORE); + auto OIC = shouldInline(*CB, GetInlineCost, ORE); // Check whether we want to inline this callsite. - if (!OIC.hasValue()) { - setInlineRemark(*CS, "deferred"); + if (!OIC) continue; - } - - if (!OIC.getValue()) { - // shouldInline() call returned a negative inline cost that explains - // why this callsite should not be inlined. - setInlineRemark(*CS, inlineCostStr(*OIC)); - continue; - } - - // Setup the data structure used to plumb customization into the - // `InlineFunction` routine. - InlineFunctionInfo IFI( - /*cg=*/nullptr, &GetAssumptionCache, PSI, - &FAM.getResult(*(CS->getCaller())), - &FAM.getResult(Callee)); - - // Get DebugLoc to report. CS will be invalid after Inliner. - DebugLoc DLoc = CS->getDebugLoc(); - BasicBlock *Block = CS->getParent(); + auto DoInline = [&]() -> InlineResult { + // Setup the data structure used to plumb customization into the + // `InlineFunction` routine. + InlineFunctionInfo IFI( + /*cg=*/nullptr, &GetAssumptionCache, PSI, + &FAM.getResult(*(CB->getCaller())), + &FAM.getResult(Callee)); + + InlineResult IR = InlineFunction(*CB, IFI); + if (!IR.isSuccess()) + return IR; + + DidInline = true; + InlinedCallees.insert(&Callee); + ++NumInlined; - using namespace ore; + // Add any new callsites to defined functions to the worklist. + if (!IFI.InlinedCallSites.empty()) { + int NewHistoryID = InlineHistory.size(); + InlineHistory.push_back({&Callee, InlineHistoryID}); + + for (CallBase *ICB : reverse(IFI.InlinedCallSites)) { + Function *NewCallee = ICB->getCalledFunction(); + if (!NewCallee) { + // Try to promote an indirect (virtual) call without waiting for + // the post-inline cleanup and the next DevirtSCCRepeatedPass + // iteration because the next iteration may not happen and we may + // miss inlining it. + if (tryPromoteCall(*ICB)) + NewCallee = ICB->getCalledFunction(); + } + if (NewCallee) + if (!NewCallee->isDeclaration()) + Calls.push_back({ICB, NewHistoryID}); + } + } - InlineResult IR = InlineFunction(*CS, IFI); - if (!IR.isSuccess()) { - setInlineRemark(*CS, std::string(IR.getFailureReason()) + "; " + + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) + ImportedFunctionsStats->recordInline(F, Callee); + + // Merge the attributes based on the inlining. + AttributeFuncs::mergeAttributesForInlining(F, Callee); + + // For local functions, check whether this makes the callee trivially + // dead. In that case, we can drop the body of the function eagerly + // which may reduce the number of callers of other functions to one, + // changing inline cost thresholds. + if (Callee.hasLocalLinkage()) { + // To check this we also need to nuke any dead constant uses (perhaps + // made dead by this operation on other functions). + Callee.removeDeadConstantUsers(); + if (Callee.use_empty() && !CG.isLibFunction(Callee)) { + Calls.erase( + std::remove_if(Calls.begin() + I + 1, Calls.end(), + [&](const std::pair &Call) { + return Call.first->getCaller() == &Callee; + }), + Calls.end()); + // Clear the body and queue the function itself for deletion when we + // finish inlining and call graph updates. + // Note that after this point, it is an error to do anything other + // than use the callee's address or delete it. + Callee.dropAllReferences(); + assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && + "Cannot put cause a function to become dead twice!"); + DeadFunctions.push_back(&Callee); + } + } + return IR; + }; + // Capture the context of CB before inlining, as a successful inlining may + // change that context, and we want to report success or failure in the + // original context. + auto DLoc = CB->getDebugLoc(); + auto *Block = CB->getParent(); + + auto Outcome = DoInline(); + if (!Outcome.isSuccess()) { + using namespace ore; + setInlineRemark(*CB, std::string(Outcome.getFailureReason()) + "; " + inlineCostStr(*OIC)); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) << NV("Callee", &Callee) << " will not be inlined into " << NV("Caller", &F) << ": " - << NV("Reason", IR.getFailureReason()); + << NV("Reason", Outcome.getFailureReason()); }); continue; } - DidInline = true; - InlinedCallees.insert(&Callee); - - ++NumInlined; emitInlinedInto(ORE, DLoc, Block, Callee, F, *OIC); - - // Add any new callsites to defined functions to the worklist. - if (!IFI.InlinedCallSites.empty()) { - int NewHistoryID = InlineHistory.size(); - InlineHistory.push_back({&Callee, InlineHistoryID}); - - // FIXME(mtrofin): refactor IFI.InlinedCallSites to be CallBase-based - for (CallBase *CS : reverse(IFI.InlinedCallSites)) { - Function *NewCallee = CS->getCalledFunction(); - if (!NewCallee) { - // Try to promote an indirect (virtual) call without waiting for the - // post-inline cleanup and the next DevirtSCCRepeatedPass iteration - // because the next iteration may not happen and we may miss - // inlining it. - if (tryPromoteCall(*CS)) - NewCallee = CS->getCalledFunction(); - } - if (NewCallee) - if (!NewCallee->isDeclaration()) - Calls.push_back({CS, NewHistoryID}); - } - } - - if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) - ImportedFunctionsStats->recordInline(F, Callee); - - // Merge the attributes based on the inlining. - AttributeFuncs::mergeAttributesForInlining(F, Callee); - - // For local functions, check whether this makes the callee trivially - // dead. In that case, we can drop the body of the function eagerly - // which may reduce the number of callers of other functions to one, - // changing inline cost thresholds. - if (Callee.hasLocalLinkage()) { - // To check this we also need to nuke any dead constant uses (perhaps - // made dead by this operation on other functions). - Callee.removeDeadConstantUsers(); - if (Callee.use_empty() && !CG.isLibFunction(Callee)) { - Calls.erase( - std::remove_if(Calls.begin() + I + 1, Calls.end(), - [&](const std::pair &Call) { - return Call.first->getCaller() == &Callee; - }), - Calls.end()); - // Clear the body and queue the function itself for deletion when we - // finish inlining and call graph updates. - // Note that after this point, it is an error to do anything other - // than use the callee's address or delete it. - Callee.dropAllReferences(); - assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && - "Cannot put cause a function to become dead twice!"); - DeadFunctions.push_back(&Callee); - } - } } // Back the call index up by one to put us in a good position to go around @@ -1237,8 +1238,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // function there. Also, cclear out any cached analyses. auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); FunctionAnalysisManager &FAM = - AM.getResult(DeadC, CG) - .getManager(); + AM.getResult(DeadC, CG).getManager(); FAM.clear(*DeadF, DeadF->getName()); AM.clear(DeadC, DeadC.getName()); auto &DeadRC = DeadC.getOuterRefSCC(); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 0907aa01eddca..dbd06a6c299ba 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -62,7 +62,6 @@ struct OpenMPOpt { /// Generic information that describes a runtime function struct RuntimeFunctionInfo { - ~RuntimeFunctionInfo() { DeleteContainerSeconds(UsesMap); } /// The kind, as described by the RuntimeFunction enum. RuntimeFunction Kind; @@ -87,16 +86,19 @@ struct OpenMPOpt { /// Return the vector of uses in function \p F. UseVector &getOrCreateUseVector(Function *F) { - UseVector *&UV = UsesMap[F]; + std::unique_ptr &UV = UsesMap[F]; if (!UV) - UV = new UseVector(); + UV = std::make_unique(); return *UV; } /// Return the vector of uses in function \p F or `nullptr` if there are /// none. const UseVector *getUseVector(Function &F) const { - return UsesMap.lookup(&F); + auto I = UsesMap.find(&F); + if (I != UsesMap.end()) + return I->second.get(); + return nullptr; } /// Return how many functions contain uses of this runtime function. @@ -134,7 +136,7 @@ struct OpenMPOpt { private: /// Map from functions to all uses of this runtime function contained in /// them. - DenseMap UsesMap; + DenseMap> UsesMap; }; /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. diff --git a/llvm/lib/Transforms/IPO/PruneEH.cpp b/llvm/lib/Transforms/IPO/PruneEH.cpp index 874791b5fa261..a16dc664db64d 100644 --- a/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -136,7 +136,7 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { } if (CheckReturnViaAsm && !SCCMightReturn) if (const auto *CB = dyn_cast(&I)) - if (const auto *IA = dyn_cast(CB->getCalledValue())) + if (const auto *IA = dyn_cast(CB->getCalledOperand())) if (IA->hasSideEffects()) SCCMightReturn = true; } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 489976a0cb8e7..230982d321f88 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1044,13 +1044,13 @@ bool SampleProfileLoader::inlineHotFunctions( R->getValue()->getSubprogram() && isLegalToPromote(*I, R->getValue(), &Reason)) { uint64_t C = FS->getEntrySamples(); - Instruction *DI = - pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE); + auto &DI = + pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE); Sum -= C; PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. if ((isa(DI) || isa(DI)) && - inlineCallInstruction(*cast(DI))) { + inlineCallInstruction(cast(DI))) { localNotInlinedCallSites.erase(I); LocalChanged = true; ++NumCSInlined; diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 9b09c52cdea9d..40177d93a2f10 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1028,7 +1028,7 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, VCallSite.emitRemark("single-impl", TheFn->stripPointerCasts()->getName(), OREGetter); VCallSite.CB.setCalledOperand(ConstantExpr::getBitCast( - TheFn, VCallSite.CB.getCalledValue()->getType())); + TheFn, VCallSite.CB.getCalledOperand()->getType())); // This use is no longer unsafe. if (VCallSite.NumUnsafeUses) --*VCallSite.NumUnsafeUses; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 6cc6dcdd748a8..a4d86d751c2f5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2132,6 +2132,49 @@ static Instruction *matchRotate(Instruction &Or) { return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt }); } +/// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns. +static Instruction *matchOrConcat(Instruction &Or, + InstCombiner::BuilderTy &Builder) { + assert(Or.getOpcode() == Instruction::Or && "bswap requires an 'or'"); + Value *Op0 = Or.getOperand(0), *Op1 = Or.getOperand(1); + Type *Ty = Or.getType(); + + unsigned Width = Ty->getScalarSizeInBits(); + if ((Width & 1) != 0) + return nullptr; + unsigned HalfWidth = Width / 2; + + // Canonicalize zext (lower half) to LHS. + if (!isa(Op0)) + std::swap(Op0, Op1); + + // Find lower/upper half. + Value *LowerSrc, *ShlVal, *UpperSrc; + const APInt *C; + if (!match(Op0, m_OneUse(m_ZExt(m_Value(LowerSrc)))) || + !match(Op1, m_OneUse(m_Shl(m_Value(ShlVal), m_APInt(C)))) || + !match(ShlVal, m_OneUse(m_ZExt(m_Value(UpperSrc))))) + return nullptr; + if (*C != HalfWidth || LowerSrc->getType() != UpperSrc->getType() || + LowerSrc->getType()->getScalarSizeInBits() != HalfWidth) + return nullptr; + + // Find matching bswap instructions. + // TODO: Add more patterns (bitreverse?) + Value *LowerBSwap, *UpperBSwap; + if (!match(LowerSrc, m_BSwap(m_Value(LowerBSwap))) || + !match(UpperSrc, m_BSwap(m_Value(UpperBSwap)))) + return nullptr; + + // Push the concat down, swapping the lower/upper sources. + Value *NewLower = Builder.CreateZExt(UpperBSwap, Ty); + Value *NewUpper = Builder.CreateZExt(LowerBSwap, Ty); + NewUpper = Builder.CreateShl(NewUpper, HalfWidth); + Value *BinOp = Builder.CreateOr(NewLower, NewUpper); + Function *F = Intrinsic::getDeclaration(Or.getModule(), Intrinsic::bswap, Ty); + return Builder.CreateCall(F, BinOp); +} + /// If all elements of two constant vectors are 0/-1 and inverses, return true. static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) { unsigned NumElts = cast(C1->getType())->getNumElements(); @@ -2532,6 +2575,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Instruction *Rotate = matchRotate(I)) return Rotate; + if (Instruction *Concat = matchOrConcat(I, Builder)) + return replaceInstUsesWith(I, Concat); + Value *X, *Y; const APInt *CV; if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) && diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 765eb2474006e..2f79bc6a09a63 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4163,7 +4163,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Note: New assumption intrinsics created here are registered by // the InstCombineIRInserter object. FunctionType *AssumeIntrinsicTy = II->getFunctionType(); - Value *AssumeIntrinsic = II->getCalledValue(); + Value *AssumeIntrinsic = II->getCalledOperand(); Value *A, *B; if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) { Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, II->getName()); @@ -4336,7 +4336,7 @@ static bool isSafeToEliminateVarargsCast(const CallBase &Call, // The size of ByVal or InAlloca arguments is derived from the type, so we // can't change to a type with a different size. If the size were // passed explicitly we could avoid this check. - if (!Call.isByValOrInAllocaArgument(ix)) + if (!Call.isPassPointeeByValueArgument(ix)) return true; Type* SrcTy = @@ -4541,7 +4541,7 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) { // If the callee is a pointer to a function, attempt to move any casts to the // arguments of the call/callbr/invoke. - Value *Callee = Call.getCalledValue(); + Value *Callee = Call.getCalledOperand(); if (!isa(Callee) && transformConstExprCastCall(Call)) return nullptr; @@ -4660,7 +4660,8 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) { /// If the callee is a constexpr cast of a function, attempt to move the cast to /// the arguments of the call/callbr/invoke. bool InstCombiner::transformConstExprCastCall(CallBase &Call) { - auto *Callee = dyn_cast(Call.getCalledValue()->stripPointerCasts()); + auto *Callee = + dyn_cast(Call.getCalledOperand()->stripPointerCasts()); if (!Callee) return false; @@ -4778,7 +4779,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) { // If the callee is just a declaration, don't change the varargsness of the // call. We don't want to introduce a varargs call where one doesn't // already exist. - PointerType *APTy = cast(Call.getCalledValue()->getType()); + PointerType *APTy = cast(Call.getCalledOperand()->getType()); if (FT->isVarArg()!=cast(APTy->getElementType())->isVarArg()) return false; @@ -4946,7 +4947,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) { Instruction * InstCombiner::transformCallThroughTrampoline(CallBase &Call, IntrinsicInst &Tramp) { - Value *Callee = Call.getCalledValue(); + Value *Callee = Call.getCalledOperand(); Type *CalleeTy = Callee->getType(); FunctionType *FTy = Call.getFunctionType(); AttributeList Attrs = Call.getAttributes(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index a908349eaff14..b6d0bed808d3f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -1033,9 +1033,14 @@ class Negator final { using BuilderTy = IRBuilder; BuilderTy Builder; + const DataLayout &DL; + AssumptionCache &AC; + const DominatorTree &DT; + const bool IsTrulyNegation; - Negator(LLVMContext &C, const DataLayout &DL, bool IsTrulyNegation); + Negator(LLVMContext &C, const DataLayout &DL, AssumptionCache &AC, + const DominatorTree &DT, bool IsTrulyNegation); #if LLVM_ENABLE_STATS unsigned NumValuesVisitedInThisNegator = 0; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 3487620279a30..5eb281f3c8633 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -275,6 +275,15 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } } + // abs(X) * abs(X) -> X * X + // nabs(X) * nabs(X) -> X * X + if (Op0 == Op1) { + Value *X, *Y; + SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor; + if (SPF == SPF_ABS || SPF == SPF_NABS) + return BinaryOperator::CreateMul(X, X); + } + // -X * C --> X * -C Value *X, *Y; Constant *Op1C; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp index 42bb748cc2872..c393a6373f7ac 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp @@ -46,6 +46,13 @@ #include #include +namespace llvm { +class AssumptionCache; +class DataLayout; +class DominatorTree; +class LLVMContext; +} // namespace llvm + using namespace llvm; #define DEBUG_TYPE "instcombine" @@ -87,13 +94,14 @@ static cl::opt cl::desc("What is the maximal lookup depth when trying to " "check for viability of negation sinking.")); -Negator::Negator(LLVMContext &C, const DataLayout &DL, bool IsTrulyNegation_) - : Builder(C, TargetFolder(DL), +Negator::Negator(LLVMContext &C, const DataLayout &DL_, AssumptionCache &AC_, + const DominatorTree &DT_, bool IsTrulyNegation_) + : Builder(C, TargetFolder(DL_), IRBuilderCallbackInserter([&](Instruction *I) { ++NegatorNumInstructionsCreatedTotal; NewInstructions.push_back(I); })), - IsTrulyNegation(IsTrulyNegation_) {} + DL(DL_), AC(AC_), DT(DT_), IsTrulyNegation(IsTrulyNegation_) {} #if LLVM_ENABLE_STATS Negator::~Negator() { @@ -301,6 +309,16 @@ LLVM_NODISCARD Value *Negator::visit(Value *V, unsigned Depth) { return nullptr; return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg"); } + case Instruction::Or: + if (!haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL, &AC, I, + &DT)) + return nullptr; // Don't know how to handle `or` in general. + // `or`/`add` are interchangeable when operands have no common bits set. + // `inc` is always negatible. + if (match(I->getOperand(1), m_One())) + return Builder.CreateNot(I->getOperand(0), I->getName() + ".neg"); + // Else, just defer to Instruction::Add handling. + LLVM_FALLTHROUGH; case Instruction::Add: { // `add` is negatible if both of its operands are negatible. Value *NegOp0 = visit(I->getOperand(0), Depth + 1); @@ -364,7 +382,8 @@ LLVM_NODISCARD Value *Negator::Negate(bool LHSIsZero, Value *Root, if (!NegatorEnabled || !DebugCounter::shouldExecute(NegatorCounter)) return nullptr; - Negator N(Root->getContext(), IC.getDataLayout(), LHSIsZero); + Negator N(Root->getContext(), IC.getDataLayout(), IC.getAssumptionCache(), + IC.getDominatorTree(), LHSIsZero); Optional Res = N.run(Root); if (!Res) { // Negation failed. LLVM_DEBUG(dbgs() << "Negator: failed to sink negation into " << *Root diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index a5d11ce863812..a43a026f05e2a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -671,6 +671,38 @@ static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal, return Builder.CreateOr(V, Y); } +/// Canonicalize a set or clear of a masked set of constant bits to +/// select-of-constants form. +static Instruction *foldSetClearBits(SelectInst &Sel, + InstCombiner::BuilderTy &Builder) { + Value *Cond = Sel.getCondition(); + Value *T = Sel.getTrueValue(); + Value *F = Sel.getFalseValue(); + Type *Ty = Sel.getType(); + Value *X; + const APInt *NotC, *C; + + // Cond ? (X & ~C) : (X | C) --> (X & ~C) | (Cond ? 0 : C) + if (match(T, m_And(m_Value(X), m_APInt(NotC))) && + match(F, m_OneUse(m_Or(m_Specific(X), m_APInt(C)))) && *NotC == ~(*C)) { + Constant *Zero = ConstantInt::getNullValue(Ty); + Constant *OrC = ConstantInt::get(Ty, *C); + Value *NewSel = Builder.CreateSelect(Cond, Zero, OrC, "masksel", &Sel); + return BinaryOperator::CreateOr(T, NewSel); + } + + // Cond ? (X | C) : (X & ~C) --> (X & ~C) | (Cond ? C : 0) + if (match(F, m_And(m_Value(X), m_APInt(NotC))) && + match(T, m_OneUse(m_Or(m_Specific(X), m_APInt(C)))) && *NotC == ~(*C)) { + Constant *Zero = ConstantInt::getNullValue(Ty); + Constant *OrC = ConstantInt::get(Ty, *C); + Value *NewSel = Builder.CreateSelect(Cond, OrC, Zero, "masksel", &Sel); + return BinaryOperator::CreateOr(F, NewSel); + } + + return nullptr; +} + /// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b). /// There are 8 commuted/swapped variants of this pattern. /// TODO: Also support a - UMIN(a,b) patterns. @@ -883,10 +915,11 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, return SelectArg; } - // If the ValueOnZero is not the bitwidth, we can at least make use of the - // fact that the cttz/ctlz result will not be used if the input is zero, so - // it's okay to relax it to undef for that case. - if (II->hasOneUse() && !match(II->getArgOperand(1), m_One())) + // The ValueOnZero is not the bitwidth. But if the cttz/ctlz (and optional + // zext/trunc) have one use (ending at the select), the cttz/ctlz result will + // not be used if the input is zero. Relax to 'undef_on_zero' for that case. + if (II->hasOneUse() && SelectArg->hasOneUse() && + !match(II->getArgOperand(1), m_One())) II->setArgOperand(1, ConstantInt::getTrue(II->getContext())); return nullptr; @@ -2552,6 +2585,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return Add; if (Instruction *Add = foldOverflowingAddSubSelect(SI, Builder)) return Add; + if (Instruction *Or = foldSetClearBits(SI, Builder)) + return Or; // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) auto *TI = dyn_cast(TrueVal); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 2960947bdbfb4..84648260830b2 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1429,13 +1429,16 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) { - if (!Inst.getType()->isVectorTy()) return nullptr; + // FIXME: some of this is likely fine for scalable vectors + if (!isa(Inst.getType())) + return nullptr; BinaryOperator::BinaryOps Opcode = Inst.getOpcode(); - unsigned NumElts = cast(Inst.getType())->getNumElements(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); - assert(cast(LHS->getType())->getNumElements() == NumElts); - assert(cast(RHS->getType())->getNumElements() == NumElts); + assert(cast(LHS->getType())->getElementCount() == + cast(Inst.getType())->getElementCount()); + assert(cast(RHS->getType())->getElementCount() == + cast(Inst.getType())->getElementCount()); // If both operands of the binop are vector concatenations, then perform the // narrow binop on each pair of the source operands followed by concatenation @@ -1518,11 +1521,12 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) { // intends to move shuffles closer to other shuffles and binops closer to // other binops, so they can be folded. It may also enable demanded elements // transforms. + unsigned NumElts = cast(Inst.getType())->getNumElements(); Constant *C; if (match(&Inst, m_c_BinOp(m_OneUse(m_ShuffleVector(m_Value(V1), m_Undef(), m_Mask(Mask))), m_Constant(C))) && - cast(V1->getType())->getNumElements() <= NumElts) { + cast(V1->getType())->getNumElements() <= NumElts) { assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() && "Shuffle should not change scalar type"); @@ -1533,7 +1537,8 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) { // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = bool ConstOp1 = isa(RHS); ArrayRef ShMask = Mask; - unsigned SrcVecNumElts = cast(V1->getType())->getNumElements(); + unsigned SrcVecNumElts = + cast(V1->getType())->getNumElements(); UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType()); SmallVector NewVecC(SrcVecNumElts, UndefScalar); bool MayChange = true; diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index d12ceea8b1575..93326c8fd13a7 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -69,6 +69,7 @@ #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -212,6 +213,11 @@ static cl::opt ClInstrumentAtomics( cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true)); +static cl::opt + ClInstrumentByval("asan-instrument-byval", + cl::desc("instrument byval call arguments"), cl::Hidden, + cl::init(true)); + static cl::opt ClAlwaysSlowPath( "asan-always-slow-path", cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden, @@ -612,16 +618,13 @@ struct AddressSanitizer { /// Check if we want (and can) handle this alloca. bool isInterestingAlloca(const AllocaInst &AI); - /// If it is an interesting memory access, return the PointerOperand - /// and set IsWrite/Alignment. Otherwise return nullptr. - /// MaybeMask is an output parameter for the mask Value, if we're looking at a - /// masked load/store. - Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, - uint64_t *TypeSize, unsigned *Alignment, - Value **MaybeMask = nullptr); + bool ignoreAccess(Value *Ptr); + void getInterestingMemoryOperands( + Instruction *I, SmallVectorImpl &Interesting); - void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I, - bool UseCalls, const DataLayout &DL); + void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, + InterestingMemoryOperand &O, bool UseCalls, + const DataLayout &DL); void instrumentPointerComparisonOrSubtraction(Instruction *I); void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, @@ -638,6 +641,7 @@ struct AddressSanitizer { Value *SizeArgument, uint32_t Exp); void instrumentMemIntrinsic(MemIntrinsic *MI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); + bool suppressInstrumentationSiteForDebug(int &Instrumented); bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI); bool maybeInsertAsanInitAtFunctionEntry(Function &F); void maybeInsertDynamicShadowAtFunctionEntry(Function &F); @@ -1339,98 +1343,92 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { return IsInteresting; } -Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, - bool *IsWrite, - uint64_t *TypeSize, - unsigned *Alignment, - Value **MaybeMask) { +bool AddressSanitizer::ignoreAccess(Value *Ptr) { + // Do not instrument acesses from different address spaces; we cannot deal + // with them. + Type *PtrTy = cast(Ptr->getType()->getScalarType()); + if (PtrTy->getPointerAddressSpace() != 0) + return true; + + // Ignore swifterror addresses. + // swifterror memory addresses are mem2reg promoted by instruction + // selection. As such they cannot have regular uses like an instrumentation + // function and it makes no sense to track them as memory. + if (Ptr->isSwiftError()) + return true; + + // Treat memory accesses to promotable allocas as non-interesting since they + // will not cause memory violations. This greatly speeds up the instrumented + // executable at -O0. + if (auto AI = dyn_cast_or_null(Ptr)) + if (ClSkipPromotableAllocas && !isInterestingAlloca(*AI)) + return true; + + return false; +} + +void AddressSanitizer::getInterestingMemoryOperands( + Instruction *I, SmallVectorImpl &Interesting) { // Skip memory accesses inserted by another instrumentation. - if (I->hasMetadata("nosanitize")) return nullptr; + if (I->hasMetadata("nosanitize")) + return; // Do not instrument the load fetching the dynamic shadow address. if (LocalDynamicShadow == I) - return nullptr; + return; - Value *PtrOperand = nullptr; - const DataLayout &DL = I->getModule()->getDataLayout(); if (LoadInst *LI = dyn_cast(I)) { - if (!ClInstrumentReads) return nullptr; - *IsWrite = false; - *TypeSize = DL.getTypeStoreSizeInBits(LI->getType()); - *Alignment = LI->getAlignment(); - PtrOperand = LI->getPointerOperand(); + if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand())) + return; + Interesting.emplace_back(I, LI->getPointerOperandIndex(), false, + LI->getType(), LI->getAlignment()); } else if (StoreInst *SI = dyn_cast(I)) { - if (!ClInstrumentWrites) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType()); - *Alignment = SI->getAlignment(); - PtrOperand = SI->getPointerOperand(); + if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand())) + return; + Interesting.emplace_back(I, SI->getPointerOperandIndex(), true, + SI->getValueOperand()->getType(), + SI->getAlignment()); } else if (AtomicRMWInst *RMW = dyn_cast(I)) { - if (!ClInstrumentAtomics) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType()); - *Alignment = 0; - PtrOperand = RMW->getPointerOperand(); + if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand())) + return; + Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, + RMW->getValOperand()->getType(), 0); } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { - if (!ClInstrumentAtomics) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType()); - *Alignment = 0; - PtrOperand = XCHG->getPointerOperand(); + if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand())) + return; + Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, + XCHG->getCompareOperand()->getType(), 0); } else if (auto CI = dyn_cast(I)) { - auto *F = dyn_cast(CI->getCalledValue()); + auto *F = CI->getCalledFunction(); if (F && (F->getName().startswith("llvm.masked.load.") || F->getName().startswith("llvm.masked.store."))) { - unsigned OpOffset = 0; - if (F->getName().startswith("llvm.masked.store.")) { - if (!ClInstrumentWrites) - return nullptr; - // Masked store has an initial operand for the value. - OpOffset = 1; - *IsWrite = true; - } else { - if (!ClInstrumentReads) - return nullptr; - *IsWrite = false; - } - - auto BasePtr = CI->getOperand(0 + OpOffset); + bool IsWrite = F->getName().startswith("llvm.masked.store."); + // Masked store has an initial operand for the value. + unsigned OpOffset = IsWrite ? 1 : 0; + if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads) + return; + + auto BasePtr = CI->getOperand(OpOffset); + if (ignoreAccess(BasePtr)) + return; auto Ty = cast(BasePtr->getType())->getElementType(); - *TypeSize = DL.getTypeStoreSizeInBits(Ty); + unsigned Alignment = 1; + // Otherwise no alignment guarantees. We probably got Undef. if (auto AlignmentConstant = dyn_cast(CI->getOperand(1 + OpOffset))) - *Alignment = (unsigned)AlignmentConstant->getZExtValue(); - else - *Alignment = 1; // No alignment guarantees. We probably got Undef - if (MaybeMask) - *MaybeMask = CI->getOperand(2 + OpOffset); - PtrOperand = BasePtr; + Alignment = (unsigned)AlignmentConstant->getZExtValue(); + Value *Mask = CI->getOperand(2 + OpOffset); + Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask); + } else { + for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) { + if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || + ignoreAccess(CI->getArgOperand(ArgNo))) + continue; + Type *Ty = CI->getParamByValType(ArgNo); + Interesting.emplace_back(I, ArgNo, false, Ty, 1); + } } } - - if (PtrOperand) { - // Do not instrument acesses from different address spaces; we cannot deal - // with them. - Type *PtrTy = cast(PtrOperand->getType()->getScalarType()); - if (PtrTy->getPointerAddressSpace() != 0) - return nullptr; - - // Ignore swifterror addresses. - // swifterror memory addresses are mem2reg promoted by instruction - // selection. As such they cannot have regular uses like an instrumentation - // function and it makes no sense to track them as memory. - if (PtrOperand->isSwiftError()) - return nullptr; - } - - // Treat memory accesses to promotable allocas as non-interesting since they - // will not cause memory violations. This greatly speeds up the instrumented - // executable at -O0. - if (ClSkipPromotableAllocas) - if (auto AI = dyn_cast_or_null(PtrOperand)) - return isInterestingAlloca(*AI) ? AI : nullptr; - - return PtrOperand; } static bool isPointerOperand(Value *V) { @@ -1545,15 +1543,9 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, } void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, - Instruction *I, bool UseCalls, + InterestingMemoryOperand &O, bool UseCalls, const DataLayout &DL) { - bool IsWrite = false; - unsigned Alignment = 0; - uint64_t TypeSize = 0; - Value *MaybeMask = nullptr; - Value *Addr = - isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask); - assert(Addr); + Value *Addr = O.getPtr(); // Optimization experiments. // The experiments can be used to evaluate potential optimizations that remove @@ -1573,7 +1565,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, // dynamically initialized global is always valid. GlobalVariable *G = dyn_cast(GetUnderlyingObject(Addr, DL)); if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) && - isSafeAccess(ObjSizeVis, Addr, TypeSize)) { + isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) { NumOptimizedAccessesToGlobalVar++; return; } @@ -1582,25 +1574,26 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, if (ClOpt && ClOptStack) { // A direct inbounds access to a stack variable is always valid. if (isa(GetUnderlyingObject(Addr, DL)) && - isSafeAccess(ObjSizeVis, Addr, TypeSize)) { + isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) { NumOptimizedAccessesToStackVar++; return; } } - if (IsWrite) + if (O.IsWrite) NumInstrumentedWrites++; else NumInstrumentedReads++; unsigned Granularity = 1 << Mapping.Scale; - if (MaybeMask) { - instrumentMaskedLoadOrStore(this, DL, IntptrTy, MaybeMask, I, Addr, - Alignment, Granularity, TypeSize, IsWrite, - nullptr, UseCalls, Exp); + if (O.MaybeMask) { + instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.getInsn(), + Addr, O.Alignment, Granularity, O.TypeSize, + O.IsWrite, nullptr, UseCalls, Exp); } else { - doInstrumentAddress(this, I, I, Addr, Alignment, Granularity, TypeSize, - IsWrite, nullptr, UseCalls, Exp); + doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment, + Granularity, O.TypeSize, O.IsWrite, nullptr, UseCalls, + Exp); } } @@ -2610,6 +2603,14 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) { } } +bool AddressSanitizer::suppressInstrumentationSiteForDebug(int &Instrumented) { + bool ShouldInstrument = + ClDebugMin < 0 || ClDebugMax < 0 || + (Instrumented >= ClDebugMin && Instrumented <= ClDebugMax); + Instrumented++; + return !ShouldInstrument; +} + bool AddressSanitizer::instrumentFunction(Function &F, const TargetLibraryInfo *TLI) { if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; @@ -2642,14 +2643,12 @@ bool AddressSanitizer::instrumentFunction(Function &F, // We want to instrument every address only once per basic block (unless there // are calls between uses). SmallPtrSet TempsToInstrument; - SmallVector ToInstrument; + SmallVector OperandsToInstrument; + SmallVector IntrinToInstrument; SmallVector NoReturnCalls; SmallVector AllBlocks; SmallVector PointerComparisonsOrSubtracts; int NumAllocas = 0; - bool IsWrite; - unsigned Alignment; - uint64_t TypeSize; // Fill the set of memory operations to instrument. for (auto &BB : F) { @@ -2658,29 +2657,36 @@ bool AddressSanitizer::instrumentFunction(Function &F, int NumInsnsPerBB = 0; for (auto &Inst : BB) { if (LooksLikeCodeInBug11395(&Inst)) return false; - Value *MaybeMask = nullptr; - if (Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize, - &Alignment, &MaybeMask)) { - if (ClOpt && ClOptSameTemp) { - // If we have a mask, skip instrumentation if we've already - // instrumented the full object. But don't add to TempsToInstrument - // because we might get another load/store with a different mask. - if (MaybeMask) { - if (TempsToInstrument.count(Addr)) - continue; // We've seen this (whole) temp in the current BB. - } else { - if (!TempsToInstrument.insert(Addr).second) - continue; // We've seen this temp in the current BB. + SmallVector InterestingOperands; + getInterestingMemoryOperands(&Inst, InterestingOperands); + + if (!InterestingOperands.empty()) { + for (auto &Operand : InterestingOperands) { + if (ClOpt && ClOptSameTemp) { + Value *Ptr = Operand.getPtr(); + // If we have a mask, skip instrumentation if we've already + // instrumented the full object. But don't add to TempsToInstrument + // because we might get another load/store with a different mask. + if (Operand.MaybeMask) { + if (TempsToInstrument.count(Ptr)) + continue; // We've seen this (whole) temp in the current BB. + } else { + if (!TempsToInstrument.insert(Ptr).second) + continue; // We've seen this temp in the current BB. + } } + OperandsToInstrument.push_back(Operand); + NumInsnsPerBB++; } } else if (((ClInvalidPointerPairs || ClInvalidPointerCmp) && isInterestingPointerComparison(&Inst)) || ((ClInvalidPointerPairs || ClInvalidPointerSub) && isInterestingPointerSubtraction(&Inst))) { PointerComparisonsOrSubtracts.push_back(&Inst); - continue; - } else if (isa(Inst)) { + } else if (MemIntrinsic *MI = dyn_cast(&Inst)) { // ok, take it. + IntrinToInstrument.push_back(MI); + NumInsnsPerBB++; } else { if (isa(Inst)) NumAllocas++; if (auto *CB = dyn_cast(&Inst)) { @@ -2691,17 +2697,14 @@ bool AddressSanitizer::instrumentFunction(Function &F, } if (CallInst *CI = dyn_cast(&Inst)) maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); - continue; } - ToInstrument.push_back(&Inst); - NumInsnsPerBB++; if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break; } } - bool UseCalls = - (ClInstrumentationWithCallsThreshold >= 0 && - ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold); + bool UseCalls = (ClInstrumentationWithCallsThreshold >= 0 && + OperandsToInstrument.size() + IntrinToInstrument.size() > + (unsigned)ClInstrumentationWithCallsThreshold); const DataLayout &DL = F.getParent()->getDataLayout(); ObjectSizeOpts ObjSizeOpts; ObjSizeOpts.RoundToAlign = true; @@ -2709,16 +2712,16 @@ bool AddressSanitizer::instrumentFunction(Function &F, // Instrument. int NumInstrumented = 0; - for (auto Inst : ToInstrument) { - if (ClDebugMin < 0 || ClDebugMax < 0 || - (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { - if (isInterestingMemoryAccess(Inst, &IsWrite, &TypeSize, &Alignment)) - instrumentMop(ObjSizeVis, Inst, UseCalls, - F.getParent()->getDataLayout()); - else - instrumentMemIntrinsic(cast(Inst)); - } - NumInstrumented++; + for (auto &Operand : OperandsToInstrument) { + if (!suppressInstrumentationSiteForDebug(NumInstrumented)) + instrumentMop(ObjSizeVis, Operand, UseCalls, + F.getParent()->getDataLayout()); + FunctionModified = true; + } + for (auto Inst : IntrinToInstrument) { + if (!suppressInstrumentationSiteForDebug(NumInstrumented)) + instrumentMemIntrinsic(Inst); + FunctionModified = true; } FunctionStackPoisoner FSP(F, *this); @@ -2733,10 +2736,10 @@ bool AddressSanitizer::instrumentFunction(Function &F, for (auto Inst : PointerComparisonsOrSubtracts) { instrumentPointerComparisonOrSubtraction(Inst); - NumInstrumented++; + FunctionModified = true; } - if (NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty()) + if (ChangedStack || !NoReturnCalls.empty()) FunctionModified = true; LLVM_DEBUG(dbgs() << "ASAN done instrumenting: " << FunctionModified << " " diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 7952ae7ea440e..20d29beebc952 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -389,6 +389,8 @@ class DataFlowSanitizer : public ModulePass { GlobalValue::LinkageTypes NewFLink, FunctionType *NewFT); Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName); + void initializeCallbackFunctions(Module &M); + void initializeRuntimeFunctions(Module &M); public: static char ID; @@ -740,25 +742,8 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, return cast(C.getCallee()); } -bool DataFlowSanitizer::runOnModule(Module &M) { - if (ABIList.isIn(M, "skip")) - return false; - - if (!GetArgTLSPtr) { - Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); - ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy); - if (GlobalVariable *G = dyn_cast(ArgTLS)) - G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); - } - if (!GetRetvalTLSPtr) { - RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy); - if (GlobalVariable *G = dyn_cast(RetvalTLS)) - G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); - } - - ExternalShadowMask = - Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy); - +// Initialize DataFlowSanitizer runtime functions and declare them in the module +void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) { { AttributeList AL; AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, @@ -772,7 +757,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) { DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL); } - { AttributeList AL; AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, @@ -809,7 +793,10 @@ bool DataFlowSanitizer::runOnModule(Module &M) { Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper", DFSanVarargWrapperFnTy); +} +// Initializes event callback functions and declare them in the module +void DataFlowSanitizer::initializeCallbackFunctions(Module &M) { DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback", DFSanLoadStoreCmpCallbackFnTy); DFSanStoreCallbackFn = Mod->getOrInsertFunction( @@ -818,6 +805,29 @@ bool DataFlowSanitizer::runOnModule(Module &M) { "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy); DFSanCmpCallbackFn = Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanLoadStoreCmpCallbackFnTy); +} + +bool DataFlowSanitizer::runOnModule(Module &M) { + if (ABIList.isIn(M, "skip")) + return false; + + if (!GetArgTLSPtr) { + Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); + ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy); + if (GlobalVariable *G = dyn_cast(ArgTLS)) + G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); + } + if (!GetRetvalTLSPtr) { + RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy); + if (GlobalVariable *G = dyn_cast(RetvalTLS)) + G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); + } + + ExternalShadowMask = + Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy); + + initializeCallbackFunctions(M); + initializeRuntimeFunctions(M); std::vector FnsToInstrument; SmallPtrSet FnsWithNativeABI; @@ -1553,7 +1563,7 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr); SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr); auto *MTI = cast( - IRB.CreateCall(I.getFunctionType(), I.getCalledValue(), + IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(), {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()})); if (ClPreserveAlignment) { MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes); @@ -1593,7 +1603,7 @@ void DFSanVisitor::visitReturnInst(ReturnInst &RI) { void DFSanVisitor::visitCallBase(CallBase &CB) { Function *F = CB.getCalledFunction(); - if ((F && F->isIntrinsic()) || isa(CB.getCalledValue())) { + if ((F && F->isIntrinsic()) || CB.isInlineAsm()) { visitOperandShadowInst(CB); return; } @@ -1606,7 +1616,7 @@ void DFSanVisitor::visitCallBase(CallBase &CB) { IRBuilder<> IRB(&CB); DenseMap::iterator i = - DFSF.DFS.UnwrappedFnMap.find(CB.getCalledValue()); + DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand()); if (i != DFSF.DFS.UnwrappedFnMap.end()) { Function *F = i->second; switch (DFSF.DFS.getWrapperKind(F)) { @@ -1728,8 +1738,7 @@ void DFSanVisitor::visitCallBase(CallBase &CB) { } } - FunctionType *FT = cast( - CB.getCalledValue()->getType()->getPointerElementType()); + FunctionType *FT = CB.getFunctionType(); if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) { IRB.CreateStore(DFSF.getShadow(CB.getArgOperand(i)), @@ -1766,7 +1775,7 @@ void DFSanVisitor::visitCallBase(CallBase &CB) { if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) { FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT); Value *Func = - IRB.CreateBitCast(CB.getCalledValue(), PointerType::getUnqual(NewFT)); + IRB.CreateBitCast(CB.getCalledOperand(), PointerType::getUnqual(NewFT)); std::vector Args; auto i = CB.arg_begin(), E = CB.arg_end(); diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 9470cb2cfb281..0b9856b5126a1 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -96,6 +97,10 @@ static cl::opt ClInstrumentAtomics( cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true)); +static cl::opt ClInstrumentByval("hwasan-instrument-byval", + cl::desc("instrument byval arguments"), + cl::Hidden, cl::init(true)); + static cl::opt ClRecover( "hwasan-recover", cl::desc("Enable recovery mode (continue-after-error)."), @@ -211,10 +216,10 @@ class HWAddressSanitizer { unsigned AccessSizeIndex, Instruction *InsertBefore); void instrumentMemIntrinsic(MemIntrinsic *MI); - bool instrumentMemAccess(Instruction *I); - Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, - uint64_t *TypeSize, unsigned *Alignment, - Value **MaybeMask); + bool instrumentMemAccess(InterestingMemoryOperand &O); + bool ignoreAccess(Value *Ptr); + void getInterestingMemoryOperands( + Instruction *I, SmallVectorImpl &Interesting); bool isInterestingAlloca(const AllocaInst &AI); bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size); @@ -500,62 +505,63 @@ Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) { } } -Value *HWAddressSanitizer::isInterestingMemoryAccess(Instruction *I, - bool *IsWrite, - uint64_t *TypeSize, - unsigned *Alignment, - Value **MaybeMask) { +bool HWAddressSanitizer::ignoreAccess(Value *Ptr) { + // Do not instrument acesses from different address spaces; we cannot deal + // with them. + Type *PtrTy = cast(Ptr->getType()->getScalarType()); + if (PtrTy->getPointerAddressSpace() != 0) + return true; + + // Ignore swifterror addresses. + // swifterror memory addresses are mem2reg promoted by instruction + // selection. As such they cannot have regular uses like an instrumentation + // function and it makes no sense to track them as memory. + if (Ptr->isSwiftError()) + return true; + + return false; +} + +void HWAddressSanitizer::getInterestingMemoryOperands( + Instruction *I, SmallVectorImpl &Interesting) { // Skip memory accesses inserted by another instrumentation. - if (I->hasMetadata("nosanitize")) return nullptr; + if (I->hasMetadata("nosanitize")) + return; // Do not instrument the load fetching the dynamic shadow address. if (LocalDynamicShadow == I) - return nullptr; + return; - Value *PtrOperand = nullptr; - const DataLayout &DL = I->getModule()->getDataLayout(); if (LoadInst *LI = dyn_cast(I)) { - if (!ClInstrumentReads) return nullptr; - *IsWrite = false; - *TypeSize = DL.getTypeStoreSizeInBits(LI->getType()); - *Alignment = LI->getAlignment(); - PtrOperand = LI->getPointerOperand(); + if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand())) + return; + Interesting.emplace_back(I, LI->getPointerOperandIndex(), false, + LI->getType(), LI->getAlignment()); } else if (StoreInst *SI = dyn_cast(I)) { - if (!ClInstrumentWrites) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType()); - *Alignment = SI->getAlignment(); - PtrOperand = SI->getPointerOperand(); + if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand())) + return; + Interesting.emplace_back(I, SI->getPointerOperandIndex(), true, + SI->getValueOperand()->getType(), + SI->getAlignment()); } else if (AtomicRMWInst *RMW = dyn_cast(I)) { - if (!ClInstrumentAtomics) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType()); - *Alignment = 0; - PtrOperand = RMW->getPointerOperand(); + if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand())) + return; + Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, + RMW->getValOperand()->getType(), 0); } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { - if (!ClInstrumentAtomics) return nullptr; - *IsWrite = true; - *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType()); - *Alignment = 0; - PtrOperand = XCHG->getPointerOperand(); - } - - if (PtrOperand) { - // Do not instrument accesses from different address spaces; we cannot deal - // with them. - Type *PtrTy = cast(PtrOperand->getType()->getScalarType()); - if (PtrTy->getPointerAddressSpace() != 0) - return nullptr; - - // Ignore swifterror addresses. - // swifterror memory addresses are mem2reg promoted by instruction - // selection. As such they cannot have regular uses like an instrumentation - // function and it makes no sense to track them as memory. - if (PtrOperand->isSwiftError()) - return nullptr; + if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand())) + return; + Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, + XCHG->getCompareOperand()->getType(), 0); + } else if (auto CI = dyn_cast(I)) { + for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) { + if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || + ignoreAccess(CI->getArgOperand(ArgNo))) + continue; + Type *Ty = CI->getParamByValType(ArgNo); + Interesting.emplace_back(I, ArgNo, false, Ty, 1); + } } - - return PtrOperand; } static unsigned getPointerOperandIndex(Instruction *I) { @@ -713,45 +719,32 @@ void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { MI->eraseFromParent(); } -bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) { - LLVM_DEBUG(dbgs() << "Instrumenting: " << *I << "\n"); - bool IsWrite = false; - unsigned Alignment = 0; - uint64_t TypeSize = 0; - Value *MaybeMask = nullptr; - - if (ClInstrumentMemIntrinsics && isa(I)) { - instrumentMemIntrinsic(cast(I)); - return true; - } - - Value *Addr = - isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask); +bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) { + Value *Addr = O.getPtr(); - if (!Addr) - return false; + LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n"); - if (MaybeMask) + if (O.MaybeMask) return false; //FIXME - IRBuilder<> IRB(I); - if (isPowerOf2_64(TypeSize) && - (TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) && - (Alignment >= (1UL << Mapping.Scale) || Alignment == 0 || - Alignment >= TypeSize / 8)) { - size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); + IRBuilder<> IRB(O.getInsn()); + if (isPowerOf2_64(O.TypeSize) && + (O.TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) && + (O.Alignment >= (1UL << Mapping.Scale) || O.Alignment == 0 || + O.Alignment >= O.TypeSize / 8)) { + size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeSize); if (ClInstrumentWithCalls) { - IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex], + IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex], IRB.CreatePointerCast(Addr, IntptrTy)); } else { - instrumentMemAccessInline(Addr, IsWrite, AccessSizeIndex, I); + instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn()); } } else { - IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite], + IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], {IRB.CreatePointerCast(Addr, IntptrTy), - ConstantInt::get(IntptrTy, TypeSize / 8)}); + ConstantInt::get(IntptrTy, O.TypeSize / 8)}); } - untagPointerOperand(I, Addr); + untagPointerOperand(O.getInsn(), Addr); return true; } @@ -1089,7 +1082,8 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n"); - SmallVector ToInstrument; + SmallVector OperandsToInstrument; + SmallVector IntrinToInstrument; SmallVector AllocasToInstrument; SmallVector RetVec; SmallVector LandingPadVec; @@ -1115,14 +1109,10 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { if (InstrumentLandingPads && isa(Inst)) LandingPadVec.push_back(&Inst); - Value *MaybeMask = nullptr; - bool IsWrite; - unsigned Alignment; - uint64_t TypeSize; - Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize, - &Alignment, &MaybeMask); - if (Addr || isa(Inst)) - ToInstrument.push_back(&Inst); + getInterestingMemoryOperands(&Inst, OperandsToInstrument); + + if (MemIntrinsic *MI = dyn_cast(&Inst)) + IntrinToInstrument.push_back(MI); } } @@ -1138,7 +1128,8 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { F.setPersonalityFn(nullptr); } - if (AllocasToInstrument.empty() && ToInstrument.empty()) + if (AllocasToInstrument.empty() && OperandsToInstrument.empty() && + IntrinToInstrument.empty()) return false; assert(!LocalDynamicShadow); @@ -1216,8 +1207,14 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { } } - for (auto Inst : ToInstrument) - Changed |= instrumentMemAccess(Inst); + for (auto &Operand : OperandsToInstrument) + Changed |= instrumentMemAccess(Operand); + + if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) { + for (auto Inst : IntrinToInstrument) + instrumentMemIntrinsic(cast(Inst)); + Changed = true; + } LocalDynamicShadow = nullptr; StackBaseTag = nullptr; diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 9857769e880fa..bcd4e2e8e33cc 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -193,12 +193,12 @@ class ICallPromotionFunc { // TotalCount is the total profiled count of call executions, and // NumCandidates is the number of candidate entries in ValueDataRef. std::vector getPromotionCandidatesForCallSite( - Instruction *Inst, const ArrayRef &ValueDataRef, + const CallBase &CB, const ArrayRef &ValueDataRef, uint64_t TotalCount, uint32_t NumCandidates); // Promote a list of targets for one indirect-call callsite. Return // the number of promotions. - uint32_t tryToPromote(Instruction *Inst, + uint32_t tryToPromote(CallBase &CB, const std::vector &Candidates, uint64_t &TotalCount); @@ -216,14 +216,13 @@ class ICallPromotionFunc { // Indirect-call promotion heuristic. The direct targets are sorted based on // the count. Stop at the first target that is not promoted. -// FIXME(callsite): the Instruction* parameter can be changed to CallBase std::vector ICallPromotionFunc::getPromotionCandidatesForCallSite( - Instruction *Inst, const ArrayRef &ValueDataRef, + const CallBase &CB, const ArrayRef &ValueDataRef, uint64_t TotalCount, uint32_t NumCandidates) { std::vector Ret; - LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst + LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB << " Num_targets: " << ValueDataRef.size() << " Num_candidates: " << NumCandidates << "\n"); NumOfPGOICallsites++; @@ -239,18 +238,18 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count << " Target_func: " << Target << "\n"); - if (ICPInvokeOnly && isa(Inst)) { + if (ICPInvokeOnly && isa(CB)) { LLVM_DEBUG(dbgs() << " Not promote: User options.\n"); ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) + return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) << " Not promote: User options"; }); break; } - if (ICPCallOnly && isa(Inst)) { + if (ICPCallOnly && isa(CB)) { LLVM_DEBUG(dbgs() << " Not promote: User option.\n"); ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) + return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) << " Not promote: User options"; }); break; @@ -258,7 +257,7 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", Inst) + return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB) << " Not promote: Cutoff reached"; }); break; @@ -268,7 +267,7 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( if (TargetFunction == nullptr) { LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n"); ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", Inst) + return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB) << "Cannot promote indirect call: target with md5sum " << ore::NV("target md5sum", Target) << " not found"; }); @@ -276,11 +275,11 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( } const char *Reason = nullptr; - if (!isLegalToPromote(*cast(Inst), TargetFunction, &Reason)) { + if (!isLegalToPromote(CB, TargetFunction, &Reason)) { using namespace ore; ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", Inst) + return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB) << "Cannot promote indirect call to " << NV("TargetFunction", TargetFunction) << " with count of " << NV("Count", Count) << ": " << Reason; @@ -294,23 +293,20 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( return Ret; } -// FIXME(callsite): the Instruction* parameter and return can be changed to -// CallBase -Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst, - Function *DirectCallee, - uint64_t Count, uint64_t TotalCount, - bool AttachProfToDirectCall, - OptimizationRemarkEmitter *ORE) { +CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee, + uint64_t Count, uint64_t TotalCount, + bool AttachProfToDirectCall, + OptimizationRemarkEmitter *ORE) { uint64_t ElseCount = TotalCount - Count; uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); uint64_t Scale = calculateCountScale(MaxCount); - MDBuilder MDB(Inst->getContext()); + MDBuilder MDB(CB.getContext()); MDNode *BranchWeights = MDB.createBranchWeights( scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); - CallBase &NewInst = promoteCallWithIfThenElse(*cast(Inst), - DirectCallee, BranchWeights); + CallBase &NewInst = + promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights); if (AttachProfToDirectCall) { MDBuilder MDB(NewInst.getContext()); @@ -323,24 +319,24 @@ Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst, if (ORE) ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Promoted", Inst) + return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB) << "Promote indirect call to " << NV("DirectCallee", DirectCallee) << " with count " << NV("Count", Count) << " out of " << NV("TotalCount", TotalCount); }); - return &NewInst; + return NewInst; } // Promote indirect-call to conditional direct-call for one callsite. uint32_t ICallPromotionFunc::tryToPromote( - Instruction *Inst, const std::vector &Candidates, + CallBase &CB, const std::vector &Candidates, uint64_t &TotalCount) { uint32_t NumPromoted = 0; for (auto &C : Candidates) { uint64_t Count = C.Count; - pgo::promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, - SamplePGO, &ORE); + pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO, + &ORE); assert(TotalCount >= Count); TotalCount -= Count; NumOfPGOICallPromotion++; @@ -354,28 +350,28 @@ uint32_t ICallPromotionFunc::tryToPromote( bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) { bool Changed = false; ICallPromotionAnalysis ICallAnalysis; - for (auto &I : findIndirectCalls(F)) { + for (auto *CB : findIndirectCalls(F)) { uint32_t NumVals, NumCandidates; uint64_t TotalCount; auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( - I, NumVals, TotalCount, NumCandidates); + CB, NumVals, TotalCount, NumCandidates); if (!NumCandidates || (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount))) continue; auto PromotionCandidates = getPromotionCandidatesForCallSite( - I, ICallProfDataRef, TotalCount, NumCandidates); - uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount); + *CB, ICallProfDataRef, TotalCount, NumCandidates); + uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount); if (NumPromoted == 0) continue; Changed = true; // Adjust the MD.prof metadata. First delete the old one. - I->setMetadata(LLVMContext::MD_prof, nullptr); + CB->setMetadata(LLVMContext::MD_prof, nullptr); // If all promoted, we don't need the MD.prof metadata. if (TotalCount == 0 || NumPromoted == NumVals) continue; // Otherwise we need update with the un-promoted records back. - annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount, + annotateValueSite(*M, *CB, ICallProfDataRef.slice(NumPromoted), TotalCount, IPVK_IndirectCallTarget, NumCandidates); } return Changed; diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 2753d73e0cf35..79e78208997a6 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2743,7 +2743,7 @@ struct MemorySanitizerVisitor : public InstVisitor { : Lower64ShadowExtend(IRB, S2, getShadowTy(&I)); Value *V1 = I.getOperand(0); Value *V2 = I.getOperand(1); - Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledValue(), + Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(), {IRB.CreateBitCast(S1, V1->getType()), V2}); Shift = IRB.CreateBitCast(Shift, getShadowTy(&I)); setShadow(&I, IRB.CreateOr(Shift, S2Conv)); @@ -3761,7 +3761,7 @@ struct MemorySanitizerVisitor : public InstVisitor { const DataLayout &DL = F.getParent()->getDataLayout(); CallBase *CB = cast(&I); IRBuilder<> IRB(&I); - InlineAsm *IA = cast(CB->getCalledValue()); + InlineAsm *IA = cast(CB->getCalledOperand()); int OutputArgs = getNumOutputArgs(IA, CB); // The last operand of a CallInst is the function itself. int NumOperands = CB->getNumOperands() - 1; diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index d3eeb0252352b..345693d0b4d3c 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -786,7 +786,7 @@ void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls( for (auto I : IndirCalls) { IRBuilder<> IRB(I); CallBase &CB = cast(*I); - Value *Callee = CB.getCalledValue(); + Value *Callee = CB.getCalledOperand(); if (isa(Callee)) continue; IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy)); @@ -945,7 +945,12 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, auto FlagPtr = IRB.CreateGEP( FunctionBoolArray->getValueType(), FunctionBoolArray, {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)}); - auto Store = IRB.CreateStore(ConstantInt::getTrue(Int1Ty), FlagPtr); + auto Load = IRB.CreateLoad(Int1Ty, FlagPtr); + auto ThenTerm = + SplitBlockAndInsertIfThen(IRB.CreateIsNull(Load), &*IP, false); + IRBuilder<> ThenIRB(ThenTerm); + auto Store = ThenIRB.CreateStore(ConstantInt::getTrue(Int1Ty), FlagPtr); + SetNoSanitizeMetadata(Load); SetNoSanitizeMetadata(Store); } if (Options.StackDepth && IsEntryBB && !IsLeafFunc) { diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc index 08195ee54ab0f..361035b178c85 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc +++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc @@ -57,9 +57,9 @@ public: IndirectCallPromotionPlugin(Function &Fn) : F(Fn) {} void run(std::vector &Candidates) { - std::vector Result = findIndirectCalls(F); + std::vector Result = findIndirectCalls(F); for (Instruction *I : Result) { - Value *Callee = cast(I)->getCalledValue(); + Value *Callee = cast(I)->getCalledOperand(); Instruction *InsertPt = I; Instruction *AnnotatedInst = I; Candidates.emplace_back(CandidateInfo{Callee, InsertPt, AnnotatedInst}); diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index 0fc07fb9778d6..c72ce92432a29 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -363,10 +363,12 @@ void ConstantHoistingPass::collectConstantCandidates( // instruction and operand index. if (auto IntrInst = dyn_cast(Inst)) Cost = TTI->getIntImmCostIntrin(IntrInst->getIntrinsicID(), Idx, - ConstInt->getValue(), ConstInt->getType()); + ConstInt->getValue(), ConstInt->getType(), + TargetTransformInfo::TCK_SizeAndLatency); else Cost = TTI->getIntImmCostInst(Inst->getOpcode(), Idx, ConstInt->getValue(), - ConstInt->getType()); + ConstInt->getType(), + TargetTransformInfo::TCK_SizeAndLatency); // Ignore cheap integer constants. if (Cost > TargetTransformInfo::TCC_Basic) { @@ -416,7 +418,8 @@ void ConstantHoistingPass::collectConstantCandidates( // usually lowered to a load from constant pool. Such operation is unlikely // to be cheaper than compute it by , which can be lowered to // an ADD instruction or folded into Load/Store instruction. - int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy); + int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy, + TargetTransformInfo::TCK_SizeAndLatency); ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV]; ConstCandMapType::iterator Itr; bool Inserted; @@ -582,7 +585,8 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S, for (auto User : ConstCand->Uses) { unsigned Opcode = User.Inst->getOpcode(); unsigned OpndIdx = User.OpndIdx; - Cost += TTI->getIntImmCostInst(Opcode, OpndIdx, Value, Ty); + Cost += TTI->getIntImmCostInst(Opcode, OpndIdx, Value, Ty, + TargetTransformInfo::TCK_SizeAndLatency); LLVM_DEBUG(dbgs() << "Cost: " << Cost << "\n"); for (auto C2 = S; C2 != E; ++C2) { diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 8252ff5e7166a..e770f41e78e9f 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -839,7 +839,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, // Treat byval or inalloca arguments the same, stores to them are dead at the // end of the function. for (Argument &AI : BB.getParent()->args()) - if (AI.hasByValOrInAllocaAttr()) + if (AI.hasPassPointeeByValueAttr()) DeadStackObjects.insert(&AI); const DataLayout &DL = BB.getModule()->getDataLayout(); @@ -1549,7 +1549,7 @@ struct DSEState { // Treat byval or inalloca arguments the same as Allocas, stores to them are // dead at the end of the function. for (Argument &AI : F.args()) - if (AI.hasByValOrInAllocaAttr()) + if (AI.hasPassPointeeByValueAttr()) State.InvisibleToCallerBeforeRet.insert(&AI); return State; } diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index b3bc2b196426e..904cfa27fef8f 100644 --- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -55,6 +55,7 @@ #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -1767,10 +1768,9 @@ IntersectUnsignedRange(ScalarEvolution &SE, PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) { auto &SE = AM.getResult(F); auto &DT = AM.getResult(F); + auto &BPI = AM.getResult(F); LoopInfo &LI = AM.getResult(F); - BranchProbabilityInfo BPI; - BPI.calculate(F, LI); InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI); bool Changed = false; diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index ee822763b3497..eddf521ca0243 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1261,7 +1261,8 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI) { if (const GetElementPtrInst *GEP = dyn_cast(&I)) { - if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free) + if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) != + TargetTransformInfo::TCC_Free) return false; // For a GEP, we cannot simply use getUserCost because currently it // optimistically assume that a GEP will fold into addressing mode @@ -1276,7 +1277,8 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, } return true; } else - return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free; + return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == + TargetTransformInfo::TCC_Free; } /// Return true if the only users of this instruction are outside of diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp index 8e04e6e0ffe83..8f6c1d90655d4 100644 --- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp @@ -903,15 +903,14 @@ class LoopDistributeForLoop { /// \p PtrToPartition contains the partition number for pointers. Partition /// number -1 means that the pointer is used in multiple partitions. In this /// case we can't safely omit the check. - SmallVector - includeOnlyCrossPartitionChecks( - const SmallVectorImpl &AllChecks, + SmallVector includeOnlyCrossPartitionChecks( + const SmallVectorImpl &AllChecks, const SmallVectorImpl &PtrToPartition, const RuntimePointerChecking *RtPtrChecking) { - SmallVector Checks; + SmallVector Checks; copy_if(AllChecks, std::back_inserter(Checks), - [&](const RuntimePointerChecking::PointerCheck &Check) { + [&](const RuntimePointerCheck &Check) { for (unsigned PtrIdx1 : Check.first->Members) for (unsigned PtrIdx2 : Check.second->Members) // Only include this check if there is a pair of pointers diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index 78460bfc5c003..c98d652645ed5 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -377,7 +377,7 @@ class LoadEliminationForLoop { /// Determine the pointer alias checks to prove that there are no /// intervening stores. - SmallVector collectMemchecks( + SmallVector collectMemchecks( const SmallVectorImpl &Candidates) { SmallPtrSet PtrsWrittenOnFwdingPath = @@ -391,10 +391,10 @@ class LoadEliminationForLoop { std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr)); const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks(); - SmallVector Checks; + SmallVector Checks; copy_if(AllChecks, std::back_inserter(Checks), - [&](const RuntimePointerChecking::PointerCheck &Check) { + [&](const RuntimePointerCheck &Check) { for (auto PtrIdx1 : Check.first->Members) for (auto PtrIdx2 : Check.second->Members) if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath, @@ -520,8 +520,7 @@ class LoadEliminationForLoop { // Check intervening may-alias stores. These need runtime checks for alias // disambiguation. - SmallVector Checks = - collectMemchecks(Candidates); + SmallVector Checks = collectMemchecks(Candidates); // Too many checks are likely to outweigh the benefits of forwarding. if (Checks.size() > Candidates.size() * CheckPerElim) { diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index 4f83205b4ee03..11cfa563a47bc 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -362,8 +362,7 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, // For the new PM, we also can't use BranchProbabilityInfo as an analysis // pass. Function analyses need to be preserved across loop transformations // but BPI is not preserved, hence a newly built one is needed. - BranchProbabilityInfo BPI; - BPI.calculate(*F, AR.LI); + BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI); LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, &BPI); if (!LP.runOnLoop(&L)) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 36f8c68ef496d..652ff6bfb6d88 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4911,19 +4911,24 @@ void LSRInstance::SolveRecurse(SmallVectorImpl &Solution, // Ignore formulae which may not be ideal in terms of register reuse of // ReqRegs. The formula should use all required registers before // introducing new ones. - int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); - for (const SCEV *Reg : ReqRegs) { - if ((F.ScaledReg && F.ScaledReg == Reg) || - is_contained(F.BaseRegs, Reg)) { - --NumReqRegsToFind; - if (NumReqRegsToFind == 0) - break; + // This can sometimes (notably when trying to favour postinc) lead to + // sub-optimial decisions. There it is best left to the cost modelling to + // get correct. + if (!TTI.shouldFavorPostInc() || LU.Kind != LSRUse::Address) { + int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); + for (const SCEV *Reg : ReqRegs) { + if ((F.ScaledReg && F.ScaledReg == Reg) || + is_contained(F.BaseRegs, Reg)) { + --NumReqRegsToFind; + if (NumReqRegsToFind == 0) + break; + } + } + if (NumReqRegsToFind != 0) { + // If none of the formulae satisfied the required registers, then we could + // clear ReqRegs and try again. Currently, we simply give up in this case. + continue; } - } - if (NumReqRegsToFind != 0) { - // If none of the formulae satisfied the required registers, then we could - // clear ReqRegs and try again. Currently, we simply give up in this case. - continue; } // Evaluate the cost of the current formula. If it's already worse than diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index a1df49300b906..0406592dd8948 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -446,7 +446,7 @@ static Optional analyzeLoopUnrollCost( // First accumulate the cost of this instruction. if (!Cost.IsFree) { - UnrolledCost += TTI.getUserCost(I); + UnrolledCost += TTI.getUserCost(I, TargetTransformInfo::TCK_CodeSize); LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "); LLVM_DEBUG(I->dump()); @@ -539,7 +539,7 @@ static Optional analyzeLoopUnrollCost( // Track this instruction's expected baseline cost when executing the // rolled loop form. - RolledDynamicCost += TTI.getUserCost(&I); + RolledDynamicCost += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns true, mark the instruction as free after diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index e4bc345819b10..468c9b824f61b 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -381,7 +381,7 @@ static void analyzeParsePointLiveness( dbgs() << " " << V->getName() << " " << *V << "\n"; } if (PrintLiveSetSize) { - dbgs() << "Safepoint For: " << Call->getCalledValue()->getName() << "\n"; + dbgs() << "Safepoint For: " << Call->getCalledOperand()->getName() << "\n"; dbgs() << "Number live values: " << LiveSet.size() << "\n"; } Result.LiveSet = LiveSet; @@ -1260,7 +1260,8 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, // Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. -static AttributeList legalizeCallAttributes(AttributeList AL) { +static AttributeList legalizeCallAttributes(LLVMContext &Ctx, + AttributeList AL) { if (AL.isEmpty()) return AL; @@ -1274,7 +1275,6 @@ static AttributeList legalizeCallAttributes(AttributeList AL) { } // Just skip parameter and return attributes for now - LLVMContext &Ctx = AL.getContext(); return AttributeList::get(Ctx, AttributeList::FunctionIndex, AttributeSet::get(Ctx, FnAttrs)); } @@ -1481,7 +1481,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ assert(DeoptLowering.equals("live-through") && "Unsupported value!"); } - Value *CallTarget = Call->getCalledValue(); + Value *CallTarget = Call->getCalledOperand(); if (Function *F = dyn_cast(CallTarget)) { if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) { // Calls to llvm.experimental.deoptimize are lowered to calls to the @@ -1520,7 +1520,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ // function attributes. In case if we can handle this set of attributes - // set up function attrs directly on statepoint and return attrs later for // gc_result intrinsic. - SPCall->setAttributes(legalizeCallAttributes(CI->getAttributes())); + SPCall->setAttributes( + legalizeCallAttributes(CI->getContext(), CI->getAttributes())); Token = SPCall; @@ -1546,7 +1547,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ // function attributes. In case if we can handle this set of attributes - // set up function attrs directly on statepoint and return attrs later for // gc_result intrinsic. - SPInvoke->setAttributes(legalizeCallAttributes(II->getAttributes())); + SPInvoke->setAttributes( + legalizeCallAttributes(II->getContext(), II->getAttributes())); Token = SPInvoke; @@ -1988,7 +1990,9 @@ chainToBasePointerCost(SmallVectorImpl &Chain, "non noop cast is found during rematerialization"); Type *SrcTy = CI->getOperand(0)->getType(); - Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, CI); + Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, + TargetTransformInfo::TCK_SizeAndLatency, + CI); } else if (GetElementPtrInst *GEP = dyn_cast(Instr)) { // Cost of the address calculation diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index ba97ed428dfda..e6477e3d259bb 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -361,9 +361,7 @@ class SCCPSolver : public InstVisitor { // prints a debug message with the updated value. void pushToWorkListMsg(ValueLatticeElement &IV, Value *V) { LLVM_DEBUG(dbgs() << "updated " << IV << ": " << *V << '\n'); - if (IV.isOverdefined()) - return OverdefinedInstWorkList.push_back(V); - InstWorkList.push_back(V); + pushToWorkList(IV, V); } // markConstant - Make a value be marked as "constant". If the value @@ -1223,22 +1221,23 @@ void SCCPSolver::handleCallResult(CallBase &CB) { Value *CopyOf = CB.getOperand(0); auto *PI = getPredicateInfoFor(&CB); auto *PBranch = dyn_cast_or_null(PI); + ValueLatticeElement OriginalVal = getValueState(CopyOf); if (!PI || !PBranch) { - mergeInValue(ValueState[&CB], &CB, getValueState(CopyOf)); + mergeInValue(ValueState[&CB], &CB, OriginalVal); return; } // Everything below relies on the condition being a comparison. auto *Cmp = dyn_cast(PBranch->Condition); if (!Cmp) { - mergeInValue(ValueState[&CB], &CB, getValueState(CopyOf)); + mergeInValue(ValueState[&CB], &CB, OriginalVal); return; } Value *CmpOp0 = Cmp->getOperand(0); Value *CmpOp1 = Cmp->getOperand(1); if (CopyOf != CmpOp0 && CopyOf != CmpOp1) { - mergeInValue(ValueState[&CB], &CB, getValueState(CopyOf)); + mergeInValue(ValueState[&CB], &CB, OriginalVal); return; } @@ -1259,7 +1258,6 @@ void SCCPSolver::handleCallResult(CallBase &CB) { ValueLatticeElement CondVal = getValueState(CmpOp1); ValueLatticeElement &IV = ValueState[&CB]; - ValueLatticeElement OriginalVal = getValueState(CopyOf); if (CondVal.isConstantRange() || OriginalVal.isConstantRange()) { auto NewCR = ConstantRange::getFull(DL.getTypeSizeInBits(CopyOf->getType())); @@ -1299,7 +1297,7 @@ void SCCPSolver::handleCallResult(CallBase &CB) { return; } - return (void)mergeInValue(IV, &CB, getValueState(CopyOf)); + return (void)mergeInValue(IV, &CB, OriginalVal); } } diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 0d375bb3a6eb5..f1d2e3c1ecfad 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -704,7 +704,7 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { } BinaryOperator *BO = cast(UserChain[ChainIndex]); - assert(BO->getNumUses() <= 1 && + assert((BO->use_empty() || BO->hasOneUse()) && "distributeExtsAndCloneChain clones each BinaryOperator in " "UserChain, so no one should be used more than " "once"); diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 2e2e8cdb986be..1085a2922d46a 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2660,7 +2660,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, if (CB->isConvergent() || CB->cannotDuplicate()) return false; - Cost += TTI.getUserCost(&I); + Cost += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); } assert(Cost >= 0 && "Must not have negative costs!"); LoopCost += Cost; diff --git a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp index 062d5fb8f56c6..8258b92a716d2 100644 --- a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp @@ -232,7 +232,8 @@ static bool isSafeAndProfitableToSpeculateAroundPHI( continue; int &MatCost = InsertResult.first->second.MatCost; - MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType()); + MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType(), + TargetTransformInfo::TCK_SizeAndLatency); NonFreeMat |= MatCost != TTI.TCC_Free; } if (!NonFreeMat) { @@ -283,12 +284,15 @@ static bool isSafeAndProfitableToSpeculateAroundPHI( int MatCost = IncomingConstantAndCostsAndCount.second.MatCost; int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost; if (IID) - FoldedCost += TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(), - IncomingC->getType()); + FoldedCost += + TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(), + IncomingC->getType(), + TargetTransformInfo::TCK_SizeAndLatency); else FoldedCost += TTI.getIntImmCostInst(UserI->getOpcode(), Idx, - IncomingC->getValue(), IncomingC->getType()); + IncomingC->getValue(), IncomingC->getType(), + TargetTransformInfo::TCK_SizeAndLatency); // If we accumulate more folded cost for this incoming constant than // materialized cost, then we'll regress any edge with this constant so @@ -465,7 +469,7 @@ findProfitablePHIs(ArrayRef PNs, if (CostMapIt != SpecCostMap.end()) Cost += CostMapIt->second; } - Cost += TTI.getUserCost(I); + Cost += TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); bool Inserted = SpecCostMap.insert({I, Cost}).second; (void)Inserted; assert(Inserted && "Must not re-insert a cost during the DFS!"); diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index 6ad5c621abe4b..1e37120d6b6d6 100644 --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -244,7 +244,7 @@ static unsigned ComputeSpeculationCost(const Instruction *I, case Instruction::FNeg: case Instruction::ICmp: case Instruction::FCmp: - return TTI.getUserCost(I); + return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); default: return UINT_MAX; // Disallow anything not whitelisted. diff --git a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp index 8cdf01e809354..8060e50abee99 100644 --- a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp +++ b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp @@ -112,8 +112,11 @@ void CallGraphUpdater::removeFunction(Function &DeadFn) { DeadFunctions.push_back(&DeadFn); // For the old call graph we remove the function from the SCC right away. - if (CG && !ReplacedFunctions.count(&DeadFn)) - CGSCC->DeleteNode((*CG)[&DeadFn]); + if (CG && !ReplacedFunctions.count(&DeadFn)) { + CallGraphNode *DeadCGN = (*CG)[&DeadFn]; + DeadCGN->removeAllCalledFunctions(); + CGSCC->DeleteNode(DeadCGN); + } } void CallGraphUpdater::replaceFunctionWith(Function &OldFn, Function &NewFn) { diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index d087717d1ecfe..a7a0290ee475f 100644 --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -255,6 +255,34 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) { /// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ] /// br %normal_dst /// +/// An indirect musttail call is processed slightly differently in that: +/// 1. No merge block needed for the orginal and the cloned callsite, since +/// either one ends the flow. No phi node is needed either. +/// 2. The return statement following the original call site is duplicated too +/// and placed immediately after the cloned call site per the IR convention. +/// +/// For example, the musttail call instruction below: +/// +/// orig_bb: +/// %t0 = musttail call i32 %ptr() +/// ... +/// +/// Is replaced by the following: +/// +/// cond_bb: +/// %cond = icmp eq i32 ()* %ptr, @func +/// br i1 %cond, %then_bb, %orig_bb +/// +/// then_bb: +/// ; The clone of the original call instruction is placed in the "then" +/// ; block. It is not yet promoted. +/// %t1 = musttail call i32 %ptr() +/// ret %t1 +/// +/// orig_bb: +/// ; The original call instruction stays in its original block. +/// %t0 = musttail call i32 %ptr() +/// ret %t0 static CallBase &versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights) { @@ -264,9 +292,47 @@ static CallBase &versionCallSite(CallBase &CB, Value *Callee, // Create the compare. The called value and callee must have the same type to // be compared. - if (CB.getCalledValue()->getType() != Callee->getType()) - Callee = Builder.CreateBitCast(Callee, CB.getCalledValue()->getType()); - auto *Cond = Builder.CreateICmpEQ(CB.getCalledValue(), Callee); + if (CB.getCalledOperand()->getType() != Callee->getType()) + Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType()); + auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee); + + if (OrigInst->isMustTailCall()) { + // Create an if-then structure. The original instruction stays in its block, + // and a clone of the original instruction is placed in the "then" block. + Instruction *ThenTerm = + SplitBlockAndInsertIfThen(Cond, &CB, false, BranchWeights); + BasicBlock *ThenBlock = ThenTerm->getParent(); + ThenBlock->setName("if.true.direct_targ"); + CallBase *NewInst = cast(OrigInst->clone()); + NewInst->insertBefore(ThenTerm); + + // Place a clone of the optional bitcast after the new call site. + Value *NewRetVal = NewInst; + auto Next = OrigInst->getNextNode(); + if (auto *BitCast = dyn_cast_or_null(Next)) { + assert(BitCast->getOperand(0) == OrigInst && + "bitcast following musttail call must use the call"); + auto NewBitCast = BitCast->clone(); + NewBitCast->replaceUsesOfWith(OrigInst, NewInst); + NewBitCast->insertBefore(ThenTerm); + NewRetVal = NewBitCast; + Next = BitCast->getNextNode(); + } + + // Place a clone of the return instruction after the new call site. + ReturnInst *Ret = dyn_cast_or_null(Next); + assert(Ret && "musttail call must precede a ret with an optional bitcast"); + auto NewRet = Ret->clone(); + if (Ret->getReturnValue()) + NewRet->replaceUsesOfWith(Ret->getReturnValue(), NewRetVal); + NewRet->insertBefore(ThenTerm); + + // A return instructions is terminating, so we don't need the terminator + // instruction just created. + ThenTerm->eraseFromParent(); + + return *NewInst; + } // Create an if-then-else structure. The original instruction is moved into // the "else" block, and a clone of the original instruction is placed in the @@ -317,7 +383,7 @@ static CallBase &versionCallSite(CallBase &CB, Value *Callee, return *NewInst; } -bool llvm::isLegalToPromote(CallBase &CB, Function *Callee, +bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason) { assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted"); @@ -462,7 +528,7 @@ bool llvm::tryPromoteCall(CallBase &CB) { assert(!CB.getCalledFunction()); Module *M = CB.getCaller()->getParent(); const DataLayout &DL = M->getDataLayout(); - Value *Callee = CB.getCalledValue(); + Value *Callee = CB.getCalledOperand(); LoadInst *VTableEntryLoad = dyn_cast(Callee); if (!VTableEntryLoad) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 5532c2cc5aaf0..02c192232931f 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -878,6 +878,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::NoSync: case Attribute::None: case Attribute::NonNull: + case Attribute::Preallocated: case Attribute::ReadNone: case Attribute::ReadOnly: case Attribute::Returned: diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp index 451a68e7e3ce2..c5dfbf9d92d13 100644 --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -266,7 +266,7 @@ static Function *getFunction(Constant *C) { Function * Evaluator::getCalleeWithFormalArgs(CallBase &CB, SmallVectorImpl &Formals) { - auto *V = CB.getCalledValue(); + auto *V = CB.getCalledOperand(); if (auto *Fn = getFunction(getVal(V))) return getFormalParams(CB, Fn, Formals) ? Fn : nullptr; @@ -486,7 +486,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, } // Cannot handle inline asm. - if (isa(CB.getCalledValue())) { + if (CB.isInlineAsm()) { LLVM_DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); return false; } @@ -568,7 +568,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) { - InstResult = castCallResultIfNeeded(CB.getCalledValue(), C); + InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C); if (!InstResult) return false; LLVM_DEBUG(dbgs() << "Constant folded function call. Result: " @@ -591,7 +591,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; } ValueStack.pop_back(); - InstResult = castCallResultIfNeeded(CB.getCalledValue(), RetVal); + InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal); if (RetVal && !InstResult) return false; diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 655ba9350b02e..713ae41d2247e 100644 --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -171,21 +171,17 @@ int FunctionComparator::cmpRangeMetadata(const MDNode *L, return 0; } -// FIXME(CallSite): the parameters should be CallBase -int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, - const Instruction *R) const { - const CallBase *LCS = cast(L); - const CallBase *RCS = cast(R); - - assert(LCS->getOpcode() == RCS->getOpcode() && "Can't compare otherwise!"); +int FunctionComparator::cmpOperandBundlesSchema(const CallBase &LCS, + const CallBase &RCS) const { + assert(LCS.getOpcode() == RCS.getOpcode() && "Can't compare otherwise!"); if (int Res = - cmpNumbers(LCS->getNumOperandBundles(), RCS->getNumOperandBundles())) + cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles())) return Res; - for (unsigned i = 0, e = LCS->getNumOperandBundles(); i != e; ++i) { - auto OBL = LCS->getOperandBundleAt(i); - auto OBR = RCS->getOperandBundleAt(i); + for (unsigned I = 0, E = LCS.getNumOperandBundles(); I != E; ++I) { + auto OBL = LCS.getOperandBundleAt(I); + auto OBR = RCS.getOperandBundleAt(I); if (int Res = OBL.getTagName().compare(OBR.getTagName())) return Res; @@ -592,7 +588,7 @@ int FunctionComparator::cmpOperations(const Instruction *L, return Res; if (int Res = cmpAttrs(CBL->getAttributes(), CBR->getAttributes())) return Res; - if (int Res = cmpOperandBundlesSchema(L, R)) + if (int Res = cmpOperandBundlesSchema(*CBL, *CBR)) return Res; if (const CallInst *CI = dyn_cast(L)) if (int Res = cmpNumbers(CI->getTailCallKind(), @@ -660,6 +656,16 @@ int FunctionComparator::cmpOperations(const Instruction *L, return cmpNumbers(RMWI->getSyncScopeID(), cast(R)->getSyncScopeID()); } + if (const ShuffleVectorInst *SVI = dyn_cast(L)) { + ArrayRef LMask = SVI->getShuffleMask(); + ArrayRef RMask = cast(R)->getShuffleMask(); + if (int Res = cmpNumbers(LMask.size(), RMask.size())) + return Res; + for (size_t i = 0, e = LMask.size(); i != e; ++i) { + if (int Res = cmpNumbers(LMask[i], RMask[i])) + return Res; + } + } if (const PHINode *PNL = dyn_cast(L)) { const PHINode *PNR = cast(R); // Ensure that in addition to the incoming values being identical diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index acf501b48b4ef..bcba256fde160 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -79,9 +79,12 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); +// Disabled by default, because the added alignment assumptions may increase +// compile-time and block optimizations. This option is not suitable for use +// with frontends that emit comprehensive parameter alignment annotations. static cl::opt PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", - cl::init(true), cl::Hidden, + cl::init(false), cl::Hidden, cl::desc("Convert align attributes to assumptions during inlining.")); static cl::opt UpdateReturnAttributes( @@ -534,7 +537,7 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( // instructions require no special handling. CallInst *CI = dyn_cast(I); - if (!CI || CI->doesNotThrow() || isa(CI->getCalledValue())) + if (!CI || CI->doesNotThrow() || CI->isInlineAsm()) continue; // We do not need to (and in fact, cannot) convert possibly throwing calls @@ -1242,7 +1245,7 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { Function *CalledFunc = CB.getCalledFunction(); for (Argument &Arg : CalledFunc->args()) { unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; - if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) { + if (Align && !Arg.hasPassPointeeByValueAttr() && !Arg.hasNUses(0)) { if (!DTCalculated) { DT.recalculate(*CB.getCaller()); DTCalculated = true; @@ -1559,8 +1562,7 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock, /// Update the branch metadata for cloned call instructions. static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, const ProfileCount &CalleeEntryCount, - const Instruction *TheCall, - ProfileSummaryInfo *PSI, + const CallBase &TheCall, ProfileSummaryInfo *PSI, BlockFrequencyInfo *CallerBFI) { if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1) @@ -1810,7 +1812,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, CalledFunc->front()); - updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), &CB, + updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), CB, IFI.PSI, IFI.CallerBFI); // Inject byval arguments initialization. @@ -2150,7 +2152,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Skip call sites which are nounwind intrinsics. auto *CalledFn = - dyn_cast(I->getCalledValue()->stripPointerCasts()); + dyn_cast(I->getCalledOperand()->stripPointerCasts()); if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow()) continue; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index dd5a57124ab30..265d6d9337d93 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1963,7 +1963,7 @@ CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) { SmallVector OpBundles; II->getOperandBundlesAsDefs(OpBundles); CallInst *NewCall = CallInst::Create(II->getFunctionType(), - II->getCalledValue(), Args, OpBundles); + II->getCalledOperand(), Args, OpBundles); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); NewCall->setDebugLoc(II->getDebugLoc()); @@ -2014,7 +2014,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, // as of this time. InvokeInst *II = - InvokeInst::Create(CI->getFunctionType(), CI->getCalledValue(), Split, + InvokeInst::Create(CI->getFunctionType(), CI->getCalledOperand(), Split, UnwindEdge, InvokeArgs, OpBundles, CI->getName(), BB); II->setDebugLoc(CI->getDebugLoc()); II->setCallingConv(CI->getCallingConv()); @@ -2045,7 +2045,7 @@ static bool markAliveBlocks(Function &F, // canonicalizes unreachable insts into stores to null or undef. for (Instruction &I : *BB) { if (auto *CI = dyn_cast(&I)) { - Value *Callee = CI->getCalledValue(); + Value *Callee = CI->getCalledOperand(); // Handle intrinsic calls. if (Function *F = dyn_cast(Callee)) { auto IntrinsicID = F->getIntrinsicID(); @@ -2120,7 +2120,7 @@ static bool markAliveBlocks(Function &F, Instruction *Terminator = BB->getTerminator(); if (auto *II = dyn_cast(Terminator)) { // Turn invokes that call 'nounwind' functions into ordinary calls. - Value *Callee = II->getCalledValue(); + Value *Callee = II->getCalledOperand(); if ((isa(Callee) && !NullPointerIsDefined(BB->getParent())) || isa(Callee)) { diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index ab0b5a5d9bf74..a5fbdb53826b6 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemorySSA.h" diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp index 50752bd78a650..ce0fb62603ec2 100644 --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -44,9 +45,8 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, } } -void LoopVersioning::setAliasChecks( - SmallVector Checks) { - AliasChecks = std::move(Checks); +void LoopVersioning::setAliasChecks(ArrayRef Checks) { + AliasChecks = {Checks.begin(), Checks.end()}; } void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) { @@ -194,8 +194,7 @@ void LoopVersioning::prepareNoAliasMetadata() { // Go through the checks and for each pointer group, collect the scopes for // each non-aliasing pointer group. - DenseMap> + DenseMap> GroupToNonAliasingScopes; for (const auto &Check : AliasChecks) diff --git a/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/llvm/lib/Transforms/Utils/LowerInvoke.cpp index 1af0ce3d86cc1..0b225e8abc4e7 100644 --- a/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -53,7 +53,7 @@ static bool runImpl(Function &F) { II->getOperandBundlesAsDefs(OpBundles); // Insert a normal call instruction... CallInst *NewCall = - CallInst::Create(II->getFunctionType(), II->getCalledValue(), + CallInst::Create(II->getFunctionType(), II->getCalledOperand(), CallArgs, OpBundles, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index be5a375a2cfa1..2b82b23142e2a 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -330,7 +330,7 @@ static unsigned ComputeSpeculationCost(const User *I, const TargetTransformInfo &TTI) { assert(isSafeToSpeculativelyExecute(I) && "Instruction is not safe to speculatively execute!"); - return TTI.getUserCost(I); + return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); } /// If we have a merge point of an "if condition" as accepted above, @@ -3045,7 +3045,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, return false; // Not in white-list - not worthwhile folding. // And finally, if this is a non-free instruction that we are okay // speculating, ensure that we consider the speculation budget. - BudgetRemaining -= TTI.getUserCost(&I); + BudgetRemaining -= TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); if (BudgetRemaining < 0) return false; // Eagerly refuse to fold as soon as we're out of budget. } @@ -6103,7 +6103,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { // A call to null is undefined. if (auto *CB = dyn_cast(Use)) return !NullPointerIsDefined(CB->getFunction()) && - CB->getCalledValue() == I; + CB->getCalledOperand() == I; } return false; } diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 28b4b0447b1a3..a4ab13fb5e5ec 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1564,8 +1564,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { LibFunc_exp10l, B, Attrs); // pow(n, x) -> exp2(log2(n) * x) - if (Pow->hasOneUse() && Pow->hasApproxFunc() && Pow->hasNoNaNs() && - Pow->hasNoInfs() && BaseF->isNormal() && !BaseF->isNegative()) { + if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && Pow->hasNoInfs() && + BaseF->isFiniteNonZero() && !BaseF->isNegative()) { Value *Log = nullptr; if (Ty->isFloatTy()) Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat())); @@ -1668,10 +1668,6 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { bool AllowApprox = Pow->hasApproxFunc(); bool Ignored; - // Bail out if simplifying libcalls to pow() is disabled. - if (!hasFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl)) - return nullptr; - // Propagate the math semantics from the call to any created instructions. IRBuilderBase::FastMathFlagGuard Guard(B); B.setFastMathFlags(Pow->getFastMathFlags()); diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp index f299681ef779b..b67d4a18ec432 100644 --- a/llvm/lib/Transforms/Utils/SizeOpts.cpp +++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -38,6 +38,11 @@ cl::opt PGSOColdCodeOnlyForSamplePGO( cl::desc("Apply the profile guided size optimizations only " "to cold code under sample PGO.")); +cl::opt PGSOColdCodeOnlyForPartialSamplePGO( + "pgso-cold-code-only-for-partial-sample-pgo", cl::Hidden, cl::init(true), + cl::desc("Apply the profile guided size optimizations only " + "to cold code under partial-profile sample PGO.")); + cl::opt PGSOIRPassOrTestOnly( "pgso-ir-pass-or-test-only", cl::Hidden, cl::init(false), cl::desc("Apply the profile guided size optimizations only" @@ -53,7 +58,7 @@ cl::opt PgsoCutoffInstrProf( "for instrumentation profile.")); cl::opt PgsoCutoffSampleProf( - "pgso-cutoff-sample-prof", cl::Hidden, cl::init(800000), cl::ZeroOrMore, + "pgso-cutoff-sample-prof", cl::Hidden, cl::init(990000), cl::ZeroOrMore, cl::desc("The profile guided size optimization profile summary cutoff " "for sample profile.")); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c884548eb86bb..b139f8520df32 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -407,7 +407,8 @@ class InnerLoopVectorizer { BasicBlock *createVectorizedLoopSkeleton(); /// Widen a single instruction within the innermost loop. - void widenInstruction(Instruction &I); + void widenInstruction(Instruction &I, VPUser &Operands, + VPTransformState &State); /// Widen a single call instruction within the innermost loop. void widenCallInstruction(CallInst &I, VPUser &ArgOperands, @@ -3276,7 +3277,8 @@ unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, // to be vectors, so we need to extract individual elements from there, // execute VF scalar calls, and then gather the result into the vector return // value. - unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys); + unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys, + TTI::TCK_RecipThroughput); if (VF == 1) return ScalarCallCost; @@ -3301,7 +3303,8 @@ unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, return Cost; // If the corresponding vector cost is cheaper, return its cost. - unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys); + unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys, + TTI::TCK_RecipThroughput); if (VectorCallCost < Cost) { NeedToScalarize = false; return VectorCallCost; @@ -3319,7 +3322,9 @@ unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, FMF = FPMO->getFastMathFlags(); SmallVector Operands(CI->arg_operands()); - return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF, CI); + return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF, + TargetTransformInfo::TCK_RecipThroughput, + CI); } static Type *smallestIntegerVectorType(Type *T1, Type *T2) { @@ -4231,7 +4236,8 @@ static bool mayDivideByZero(Instruction &I) { return !CInt || CInt->isZero(); } -void InnerLoopVectorizer::widenInstruction(Instruction &I) { +void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User, + VPTransformState &State) { switch (I.getOpcode()) { case Instruction::Call: case Instruction::Br: @@ -4263,8 +4269,8 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { for (unsigned Part = 0; Part < UF; ++Part) { SmallVector Ops; - for (Value *Op : I.operands()) - Ops.push_back(getOrCreateVectorValue(Op, Part)); + for (VPValue *VPOp : User.operands()) + Ops.push_back(State.get(VPOp, Part)); Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); @@ -4285,8 +4291,8 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { auto *Cmp = cast(&I); setDebugLocFromInst(Builder, Cmp); for (unsigned Part = 0; Part < UF; ++Part) { - Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part); - Value *B = getOrCreateVectorValue(Cmp->getOperand(1), Part); + Value *A = State.get(User.getOperand(0), Part); + Value *B = State.get(User.getOperand(1), Part); Value *C = nullptr; if (FCmp) { // Propagate fast math flags. @@ -4323,7 +4329,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF); for (unsigned Part = 0; Part < UF; ++Part) { - Value *A = getOrCreateVectorValue(CI->getOperand(0), Part); + Value *A = State.get(User.getOperand(0), Part); Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); VectorLoopValueMap.setVectorValue(&I, Part, Cast); addMetadata(Cast, &I); @@ -4382,22 +4388,17 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands, if (VF > 1) TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); + assert(VectorF && "Can't retrieve vector intrinsic."); } else { // Use vector version of the function call. const VFShape Shape = VFShape::get(*CI, {VF, false} /*EC*/, false /*HasGlobalPred*/); #ifndef NDEBUG - const SmallVector Infos = VFDatabase::getMappings(*CI); - assert(std::find_if(Infos.begin(), Infos.end(), - [&Shape](const VFInfo &Info) { - return Info.Shape == Shape; - }) != Infos.end() && - "Vector function shape is missing from the database."); + assert(VFDatabase(*CI).getVectorizedFunction(Shape) != nullptr && + "Can't create vector function."); #endif VectorF = VFDatabase(*CI).getVectorizedFunction(Shape); } - assert(VectorF && "Can't create vector function."); - SmallVector OpBundles; CI->getOperandBundlesAsDefs(OpBundles); CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles); @@ -5701,8 +5702,9 @@ int LoopVectorizationCostModel::computePredInstDiscount( // Compute the scalarization overhead of needed insertelement instructions // and phi nodes. if (isScalarWithPredication(I) && !I->getType()->isVoidTy()) { - ScalarCost += TTI.getScalarizationOverhead(ToVectorTy(I->getType(), VF), - true, false); + ScalarCost += TTI.getScalarizationOverhead( + cast(ToVectorTy(I->getType(), VF)), + APInt::getAllOnesValue(VF), true, false); ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI); } @@ -5718,7 +5720,8 @@ int LoopVectorizationCostModel::computePredInstDiscount( Worklist.push_back(J); else if (needsExtract(J, VF)) ScalarCost += TTI.getScalarizationOverhead( - ToVectorTy(J->getType(),VF), false, true); + cast(ToVectorTy(J->getType(), VF)), + APInt::getAllOnesValue(VF), false, true); } // Scale the total scalar cost by block probability. @@ -5833,7 +5836,8 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // vectorized loop where the user of it is a vectorized instruction. const MaybeAlign Alignment = getLoadStoreAlignment(I); Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), - Alignment, AS); + Alignment, AS, + TTI::TCK_RecipThroughput); // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. @@ -5861,6 +5865,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, Value *Ptr = getLoadStorePointerOperand(I); unsigned AS = getLoadStoreAddressSpace(I); int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); + enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && "Stride should be 1 or -1 for consecutive memory access"); @@ -5868,9 +5873,11 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, unsigned Cost = 0; if (Legal->isMaskRequired(I)) Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, - Alignment ? Alignment->value() : 0, AS); + Alignment ? Alignment->value() : 0, AS, + CostKind); else - Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I); + Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, + CostKind, I); bool Reverse = ConsecutiveStride < 0; if (Reverse) @@ -5884,16 +5891,19 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, auto *VectorTy = cast(ToVectorTy(ValTy, VF)); const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); + enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; if (isa(I)) { return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) + + TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS, + CostKind) + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy); } StoreInst *SI = cast(I); bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand()); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) + + TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS, + CostKind) + (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, @@ -5910,7 +5920,9 @@ unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I, return TTI.getAddressComputationCost(VectorTy) + TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, Legal->isMaskRequired(I), - Alignment ? Alignment->value() : 0, I); + Alignment ? Alignment->value() : 0, + TargetTransformInfo::TCK_RecipThroughput, + I); } unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, @@ -5939,7 +5951,8 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed(); unsigned Cost = TTI.getInterleavedMemoryOpCost( I->getOpcode(), WideVecTy, Group->getFactor(), Indices, - Group->getAlign().value(), AS, Legal->isMaskRequired(I), UseMaskForGaps); + Group->getAlign().value(), AS, TTI::TCK_RecipThroughput, + Legal->isMaskRequired(I), UseMaskForGaps); if (Group->isReverse()) { // TODO: Add support for reversed masked interleaved access. @@ -5961,7 +5974,8 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, unsigned AS = getLoadStoreAddressSpace(I); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I); + TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, + TTI::TCK_RecipThroughput, I); } return getWideningCost(I, VF); } @@ -6002,7 +6016,8 @@ unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, Type *RetTy = ToVectorTy(I->getType(), VF); if (!RetTy->isVoidTy() && (!isa(I) || !TTI.supportsEfficientVectorElementLoadStore())) - Cost += TTI.getScalarizationOverhead(RetTy, true, false); + Cost += TTI.getScalarizationOverhead( + cast(RetTy), APInt::getAllOnesValue(VF), true, false); // Some targets keep addresses scalar. if (isa(I) && !TTI.prefersVectorizedAddressing()) @@ -6182,6 +6197,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF); auto SE = PSE.getSE(); + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { @@ -6206,9 +6222,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (ScalarPredicatedBB) { // Return cost for branches around scalarized and predicated blocks. - Type *Vec_i1Ty = + VectorType *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF); - return (TTI.getScalarizationOverhead(Vec_i1Ty, false, true) + + return (TTI.getScalarizationOverhead(Vec_i1Ty, APInt::getAllOnesValue(VF), + false, true) + (TTI.getCFInstrCost(Instruction::Br) * VF)); } else if (I->getParent() == TheLoop->getLoopLatch() || VF == 1) // The back-edge branch will remain, as will all scalar branches. @@ -6237,7 +6254,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, return (Phi->getNumIncomingValues() - 1) * TTI.getCmpSelInstrCost( Instruction::Select, ToVectorTy(Phi->getType(), VF), - ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF)); + ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF), + CostKind); return TTI.getCFInstrCost(Instruction::PHI); } @@ -6259,7 +6277,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, Cost += VF * TTI.getCFInstrCost(Instruction::PHI); // The cost of the non-predicated instruction. - Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy); + Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind); // The cost of insertelement and extractelement instructions needed for // scalarization. @@ -6300,13 +6318,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, SmallVector Operands(I->operand_values()); unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; return N * TTI.getArithmeticInstrCost( - I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue, + I->getOpcode(), VectorTy, CostKind, + TargetTransformInfo::OK_AnyValue, Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I); } case Instruction::FNeg: { unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; return N * TTI.getArithmeticInstrCost( - I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue, + I->getOpcode(), VectorTy, CostKind, + TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None, I->getOperand(0), I); @@ -6319,7 +6339,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); - return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, I); + return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, + CostKind, I); } case Instruction::ICmp: case Instruction::FCmp: { @@ -6328,7 +6349,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF)) ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]); VectorTy = ToVectorTy(ValTy, VF); - return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, I); + return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, CostKind, + I); } case Instruction::Store: case Instruction::Load: { @@ -6361,7 +6383,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (isOptimizableIVTruncate(I, VF)) { auto *Trunc = cast(I); return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(), - Trunc->getSrcTy(), Trunc); + Trunc->getSrcTy(), CostKind, Trunc); } Type *SrcScalarTy = I->getOperand(0)->getType(); @@ -6387,7 +6409,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; - return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I); + return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, + CostKind, I); } case Instruction::Call: { bool NeedToScalarize; @@ -6400,7 +6423,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, default: // The cost of executing VF copies of the scalar instruction. This opcode // is unknown. Assume that it is the same as 'mul'. - return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) + + return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, + CostKind) + getScalarizationOverhead(I, VF); } // end of switch. } @@ -6936,12 +6960,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, VFRange &Range, if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) return nullptr; - // Success: widen this call. - auto VPValues = map_range(CI->arg_operands(), [&Plan](Value *Op) { - return Plan.getOrAddVPValue(Op); - }); - - return new VPWidenCallRecipe(*CI, VPValues); + return new VPWidenCallRecipe(*CI, Plan.mapToVPValues(CI->arg_operands())); } bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const { @@ -7004,7 +7023,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, VPlan &Plan) const { return nullptr; // Success: widen this instruction. - return new VPWidenRecipe(*I); + return new VPWidenRecipe(*I, Plan.mapToVPValues(I->operands())); } VPBasicBlock *VPRecipeBuilder::handleReplication( @@ -7411,7 +7430,7 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) { } void VPWidenRecipe::execute(VPTransformState &State) { - State.ILV->widenInstruction(Ingredient); + State.ILV->widenInstruction(Ingredient, User, State); } void VPWidenGEPRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 00560ba010a97..008d4002dd835 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -666,6 +666,15 @@ class BoUpSLP { /// may not be necessary. bool isLoadCombineReductionCandidate(unsigned ReductionOpcode) const; + /// Assume that a vector of stores of bitwise-or/shifted/zexted loaded values + /// can be load combined in the backend. Load combining may not be allowed in + /// the IR optimizer, so we do not want to alter the pattern. For example, + /// partially transforming a scalar bswap() pattern into vector code is + /// effectively impossible for the backend to undo. + /// TODO: If load combining is allowed in the IR optimizer, this analysis + /// may not be necessary. + bool isLoadCombineCandidate() const; + OptimizationRemarkEmitter *getORE() { return ORE; } /// This structure holds any data we need about the edges being traversed @@ -3259,7 +3268,8 @@ getVectorCallCosts(CallInst *CI, VectorType *VecTy, TargetTransformInfo *TTI, VectorType::get(Arg->getType(), VecTy->getNumElements())); // If the corresponding vector call is cheaper, return its cost. - LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys); + LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys, + TTI::TCK_RecipThroughput); } return {IntrinsicCost, LibCost}; } @@ -3273,6 +3283,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { else if (CmpInst *CI = dyn_cast(VL[0])) ScalarTy = CI->getOperand(0)->getType(); VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // If we have computed a smaller type for the expression, update VecTy so // that the costs will be accurate. @@ -3380,7 +3391,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { Ext->getOpcode(), Ext->getType(), VecTy, i); // Add back the cost of s|zext which is subtracted separately. DeadCost += TTI->getCastInstrCost( - Ext->getOpcode(), Ext->getType(), E->getType(), Ext); + Ext->getOpcode(), Ext->getType(), E->getType(), CostKind, + Ext); continue; } } @@ -3404,7 +3416,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { case Instruction::BitCast: { Type *SrcTy = VL0->getOperand(0)->getType(); int ScalarEltCost = - TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, VL0); + TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, CostKind, + VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } @@ -3417,7 +3430,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // Check if the values are candidates to demote. if (!MinBWs.count(VL0) || VecTy != SrcVecTy) { VecCost = ReuseShuffleCost + - TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy, VL0); + TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy, + CostKind, VL0); } return VecCost - ScalarCost; } @@ -3426,13 +3440,15 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { case Instruction::Select: { // Calculate the cost of this instruction. int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, - Builder.getInt1Ty(), VL0); + Builder.getInt1Ty(), + CostKind, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VL0); + int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, + CostKind, VL0); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::FNeg: @@ -3493,13 +3509,15 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { SmallVector Operands(VL0->operand_values()); int ScalarEltCost = TTI->getArithmeticInstrCost( - E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0); + E->getOpcode(), ScalarTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP, + Operands, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } int ScalarCost = VecTy->getNumElements() * ScalarEltCost; int VecCost = TTI->getArithmeticInstrCost( - E->getOpcode(), VecTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0); + E->getOpcode(), VecTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP, + Operands, VL0); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::GetElementPtr: { @@ -3509,26 +3527,30 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { TargetTransformInfo::OK_UniformConstantValue; int ScalarEltCost = - TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK); + TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, CostKind, + Op1VK, Op2VK); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } int ScalarCost = VecTy->getNumElements() * ScalarEltCost; int VecCost = - TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK); + TTI->getArithmeticInstrCost(Instruction::Add, VecTy, CostKind, + Op1VK, Op2VK); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::Load: { // Cost of wide load - cost of scalar loads. MaybeAlign alignment(cast(VL0)->getAlignment()); int ScalarEltCost = - TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0); + TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, + CostKind, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost; int VecLdCost = - TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0, VL0); + TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0, + CostKind, VL0); if (!E->ReorderIndices.empty()) { // TODO: Merge this shuffle with the ReuseShuffleCost. VecLdCost += TTI->getShuffleCost( @@ -3543,12 +3565,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { cast(IsReorder ? VL[E->ReorderIndices.front()] : VL0); MaybeAlign Alignment(SI->getAlignment()); int ScalarEltCost = - TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, VL0); + TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, + CostKind, VL0); if (NeedToShuffleReuses) ReuseShuffleCost = -(ReuseShuffleNumbers - VL.size()) * ScalarEltCost; int ScalarStCost = VecTy->getNumElements() * ScalarEltCost; int VecStCost = TTI->getMemoryOpCost(Instruction::Store, - VecTy, Alignment, 0, VL0); + VecTy, Alignment, 0, CostKind, VL0); if (IsReorder) { // TODO: Merge this shuffle with the ReuseShuffleCost. VecStCost += TTI->getShuffleCost( @@ -3570,7 +3593,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { FMF = FPMO->getFastMathFlags(); int ScalarEltCost = - TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF); + TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF, 1, CostKind); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } @@ -3596,34 +3619,34 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { if (NeedToShuffleReuses) { for (unsigned Idx : E->ReuseShuffleIndices) { Instruction *I = cast(VL[Idx]); - ReuseShuffleCost -= TTI->getInstructionCost( - I, TargetTransformInfo::TCK_RecipThroughput); + ReuseShuffleCost -= TTI->getInstructionCost(I, CostKind); } for (Value *V : VL) { Instruction *I = cast(V); - ReuseShuffleCost += TTI->getInstructionCost( - I, TargetTransformInfo::TCK_RecipThroughput); + ReuseShuffleCost += TTI->getInstructionCost(I, CostKind); } } for (Value *V : VL) { Instruction *I = cast(V); assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); - ScalarCost += TTI->getInstructionCost( - I, TargetTransformInfo::TCK_RecipThroughput); + ScalarCost += TTI->getInstructionCost(I, CostKind); } // VecCost is equal to sum of the cost of creating 2 vectors // and the cost of creating shuffle. int VecCost = 0; if (Instruction::isBinaryOp(E->getOpcode())) { - VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy); - VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy); + VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); + VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, + CostKind); } else { Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType(); Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size()); VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size()); - VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty); - VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty); + VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, + CostKind); + VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, + CostKind); } VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0); return ReuseShuffleCost + VecCost - ScalarCost; @@ -3659,24 +3682,19 @@ bool BoUpSLP::isFullyVectorizableTinyTree() const { return true; } -bool BoUpSLP::isLoadCombineReductionCandidate(unsigned RdxOpcode) const { - if (RdxOpcode != Instruction::Or) - return false; - - unsigned NumElts = VectorizableTree[0]->Scalars.size(); - Value *FirstReduced = VectorizableTree[0]->Scalars[0]; - - // Look past the reduction to find a source value. Arbitrarily follow the +static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts, + TargetTransformInfo *TTI) { + // Look past the root to find a source value. Arbitrarily follow the // path through operand 0 of any 'or'. Also, peek through optional // shift-left-by-constant. - Value *ZextLoad = FirstReduced; + Value *ZextLoad = Root; while (match(ZextLoad, m_Or(m_Value(), m_Value())) || match(ZextLoad, m_Shl(m_Value(), m_Constant()))) ZextLoad = cast(ZextLoad)->getOperand(0); - // Check if the input to the reduction is an extended load. + // Check if the input is an extended load of the required or/shift expression. Value *LoadPtr; - if (!match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr))))) + if (ZextLoad == Root || !match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr))))) return false; // Require that the total load bit width is a legal integer type. @@ -3684,15 +3702,36 @@ bool BoUpSLP::isLoadCombineReductionCandidate(unsigned RdxOpcode) const { // But <16 x i8> --> i128 is not, so the backend probably can't reduce it. Type *SrcTy = LoadPtr->getType()->getPointerElementType(); unsigned LoadBitWidth = SrcTy->getIntegerBitWidth() * NumElts; - LLVMContext &Context = FirstReduced->getContext(); - if (!TTI->isTypeLegal(IntegerType::get(Context, LoadBitWidth))) + if (!TTI->isTypeLegal(IntegerType::get(Root->getContext(), LoadBitWidth))) return false; // Everything matched - assume that we can fold the whole sequence using // load combining. - LLVM_DEBUG(dbgs() << "SLP: Assume load combining for scalar reduction of " - << *(cast(FirstReduced)) << "\n"); + LLVM_DEBUG(dbgs() << "SLP: Assume load combining for tree starting at " + << *(cast(Root)) << "\n"); + + return true; +} + +bool BoUpSLP::isLoadCombineReductionCandidate(unsigned RdxOpcode) const { + if (RdxOpcode != Instruction::Or) + return false; + + unsigned NumElts = VectorizableTree[0]->Scalars.size(); + Value *FirstReduced = VectorizableTree[0]->Scalars[0]; + return isLoadCombineCandidateImpl(FirstReduced, NumElts, TTI); +} +bool BoUpSLP::isLoadCombineCandidate() const { + // Peek through a final sequence of stores and check if all operations are + // likely to be load-combined. + unsigned NumElts = VectorizableTree[0]->Scalars.size(); + for (Value *Scalar : VectorizableTree[0]->Scalars) { + Value *X; + if (!match(Scalar, m_Store(m_Value(X), m_Value())) || + !isLoadCombineCandidateImpl(X, NumElts, TTI)) + return false; + } return true; } @@ -3874,10 +3913,13 @@ int BoUpSLP::getTreeCost() { int BoUpSLP::getGatherCost(VectorType *Ty, const DenseSet &ShuffledIndices) const { - int Cost = 0; - for (unsigned i = 0, e = Ty->getNumElements(); i < e; ++i) + unsigned NumElts = Ty->getNumElements(); + APInt DemandedElts = APInt::getNullValue(NumElts); + for (unsigned i = 0; i < NumElts; ++i) if (!ShuffledIndices.count(i)) - Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + DemandedElts.setBit(i); + int Cost = TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true, + /*Extract*/ false); if (!ShuffledIndices.empty()) Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty); return Cost; @@ -5738,6 +5780,8 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, } if (R.isTreeTinyAndNotFullyVectorizable()) return false; + if (R.isLoadCombineCandidate()) + return false; R.computeMinimumValueSizes(); @@ -5990,6 +6034,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, } if (R.isTreeTinyAndNotFullyVectorizable()) continue; + if (R.isLoadCombineCandidate()) + return false; R.computeMinimumValueSizes(); int Cost = R.getTreeCost() - UserCost; @@ -7015,6 +7061,34 @@ static bool findBuildAggregate(Value *LastInsertInst, TargetTransformInfo *TTI, "Expected insertelement or insertvalue instruction!"); UserCost = 0; do { + // TODO: Use TTI's getScalarizationOverhead for sequence of inserts rather + // than sum of single inserts as the latter may overestimate cost. + // This work should imply improving cost estimation for extracts that + // added in for external (for vectorization tree) users. + // For example, in following case all extracts added in order to feed + // into external users (inserts), which in turn form sequence to build + // an aggregate that we do match here: + // %4 = extractelement <4 x i64> %3, i32 0 + // %v0 = insertelement <4 x i64> undef, i64 %4, i32 0 + // %5 = extractelement <4 x i64> %3, i32 1 + // %v1 = insertelement <4 x i64> %v0, i64 %5, i32 1 + // %6 = extractelement <4 x i64> %3, i32 2 + // %v2 = insertelement <4 x i64> %v1, i64 %6, i32 2 + // %7 = extractelement <4 x i64> %3, i32 3 + // %v3 = insertelement <4 x i64> %v2, i64 %7, i32 3 + // + // Cost of this entire sequence is currently estimated as sum of single + // extracts (as this aggregate build sequence is an external to + // vectorization tree user) minus cost of the aggregate build. + // As this whole sequence will be optimized away we want the cost to be + // zero. But it is not quite possible using given approach (at least for + // X86) because inserts can be more expensive than extracts for longer + // vector lengths so the difference turns out not zero in such a case. + // Ideally we want to match this entire sequence and treat it as a no-op + // (i.e. do not count into final cost at all). + // Currently the difference tends to be negative thus adding a bias + // toward favoring vectorization. If we switch into using TTI interface + // the bias tendency will remain but will be lower. Value *InsertedOperand; if (auto *IE = dyn_cast(LastInsertInst)) { InsertedOperand = IE->getOperand(1); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 1a112d35d9590..1db5cbd13b918 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -636,7 +636,6 @@ class VPRecipeBase : public ilist_node_with_parent { virtual void execute(struct VPTransformState &State) = 0; /// Each recipe prints itself. - void print(raw_ostream &O, const Twine &Indent); virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const = 0; @@ -767,8 +766,13 @@ class VPWidenRecipe : public VPRecipeBase { /// Hold the instruction to be widened. Instruction &Ingredient; + /// Hold VPValues for the operands of the ingredient. + VPUser User; + public: - VPWidenRecipe(Instruction &I) : VPRecipeBase(VPWidenSC), Ingredient(I) {} + template + VPWidenRecipe(Instruction &I, iterator_range Operands) + : VPRecipeBase(VPWidenSC), Ingredient(I), User(Operands) {} ~VPWidenRecipe() override = default; @@ -1617,6 +1621,16 @@ class VPlan { /// Dump the plan to stderr (for debugging). void dump() const; + /// Returns a range mapping the values the range \p Operands to their + /// corresponding VPValues. + iterator_range>> + mapToVPValues(User::op_range Operands) { + std::function Fn = [this](Value *Op) { + return getOrAddVPValue(Op); + }; + return map_range(Operands, Fn); + } + private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index fa864889efa3f..2b48b2b591603 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -72,7 +72,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes( } else if (GetElementPtrInst *GEP = dyn_cast(Inst)) { NewRecipe = new VPWidenGEPRecipe(GEP, OrigLoop); } else - NewRecipe = new VPWidenRecipe(*Inst); + NewRecipe = + new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands())); NewRecipe->insertBefore(Ingredient); Ingredient->eraseFromParent(); diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp index b095d7134a5fe..cadaa4afeef14 100644 --- a/llvm/lib/XRay/InstrumentationMap.cpp +++ b/llvm/lib/XRay/InstrumentationMap.cpp @@ -114,8 +114,11 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, if (SupportsRelocation && SupportsRelocation(Reloc.getType())) { auto AddendOrErr = object::ELFRelocationRef(Reloc).getAddend(); auto A = AddendOrErr ? *AddendOrErr : 0; - uint64_t resolved = Resolver(Reloc, Reloc.getSymbol()->getValue(), A); - Relocs.insert({Reloc.getOffset(), resolved}); + Expected ValueOrErr = Reloc.getSymbol()->getValue(); + if (!ValueOrErr) + // TODO: Test this error. + return ValueOrErr.takeError(); + Relocs.insert({Reloc.getOffset(), Resolver(Reloc, *ValueOrErr, A)}); } else if (Reloc.getType() == RelativeRelocation) { if (auto AddendOrErr = object::ELFRelocationRef(Reloc).getAddend()) Relocs.insert({Reloc.getOffset(), *AddendOrErr}); diff --git a/llvm/test/Analysis/BranchProbabilityInfo/switch.ll b/llvm/test/Analysis/BranchProbabilityInfo/switch.ll new file mode 100644 index 0000000000000..491fdad94d78a --- /dev/null +++ b/llvm/test/Analysis/BranchProbabilityInfo/switch.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -analyze -lazy-branch-prob | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +declare void @g(i32) + +; Check correctness of reported probabilities in case of multiple edges between +; basic blocks. In this case sum of probabilities over all edges should be +; returned by BranchProbabilityInfo::getEdgeProbability. + +define void @test1(i32 %x) { +;CHECK: edge entry -> return probability is 0x0ccccccd / 0x80000000 = 10.00% +;CHECK: edge entry -> bb0 probability is 0x26666666 / 0x80000000 = 30.00% +;CHECK: edge entry -> bb0 probability is 0x26666666 / 0x80000000 = 30.00% +;CHECK: edge entry -> bb0 probability is 0x26666666 / 0x80000000 = 30.00% +;CHECK: edge entry -> bb1 probability is 0x26666666 / 0x80000000 = 30.00% +;CHECK: edge entry -> bb1 probability is 0x26666666 / 0x80000000 = 30.00% +;CHECK: edge entry -> bb1 probability is 0x26666666 / 0x80000000 = 30.00% +;CHECK: edge entry -> bb2 probability is 0x26666666 / 0x80000000 = 30.00% +;CHECK: edge entry -> bb2 probability is 0x26666666 / 0x80000000 = 30.00% +;CHECK: edge entry -> bb2 probability is 0x26666666 / 0x80000000 = 30.00% +;CHECK: edge bb0 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge bb1 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +;CHECK: edge bb2 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +entry: + switch i32 %x, label %return [ + i32 0, label %bb0 + i32 3, label %bb0 + i32 6, label %bb0 + i32 1, label %bb1 + i32 4, label %bb1 + i32 7, label %bb1 + i32 2, label %bb2 + i32 5, label %bb2 + i32 8, label %bb2 + ] + +bb0: ; preds = %entry, %entry, %entry + tail call void @g(i32 0) + br label %return + +bb1: ; preds = %entry, %entry, %entry + tail call void @g(i32 1) + br label %return + +bb2: ; preds = %entry, %entry, %entry + tail call void @g(i32 2) + br label %return + +return: ; preds = %bb2, %bb1, %bb0, %entry + ret void +} diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll index edfe281ff5341..7f1795432fe52 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -149,8 +149,8 @@ define i32 @casts() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> @@ -159,18 +159,18 @@ define i32 @casts() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> @@ -179,18 +179,18 @@ define i32 @casts() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> @@ -229,8 +229,8 @@ define i32 @casts() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> @@ -239,8 +239,8 @@ define i32 @casts() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> @@ -249,8 +249,8 @@ define i32 @casts() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> @@ -259,18 +259,18 @@ define i32 @casts() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-LABEL: 'casts' @@ -371,13 +371,13 @@ define i32 @casts() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r86 = fpext <2 x float> undef to <2 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %r89 = fpext <16 x float> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> @@ -416,8 +416,8 @@ define i32 @casts() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> @@ -426,38 +426,38 @@ define i32 @casts() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> @@ -496,8 +496,8 @@ define i32 @casts() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> @@ -506,8 +506,8 @@ define i32 @casts() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> @@ -516,28 +516,28 @@ define i32 @casts() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-LABEL: 'casts' @@ -621,30 +621,30 @@ define i32 @casts() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> @@ -653,16 +653,16 @@ define i32 @casts() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> @@ -673,16 +673,16 @@ define i32 @casts() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> @@ -693,16 +693,16 @@ define i32 @casts() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> @@ -713,16 +713,16 @@ define i32 @casts() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> @@ -733,16 +733,16 @@ define i32 @casts() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> @@ -753,16 +753,16 @@ define i32 @casts() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> @@ -773,16 +773,16 @@ define i32 @casts() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> @@ -793,16 +793,16 @@ define i32 @casts() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -888,30 +888,30 @@ define i32 @casts() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> @@ -920,16 +920,16 @@ define i32 @casts() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> @@ -940,16 +940,16 @@ define i32 @casts() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> @@ -960,16 +960,16 @@ define i32 @casts() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> @@ -980,16 +980,16 @@ define i32 @casts() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> @@ -1000,16 +1000,16 @@ define i32 @casts() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> @@ -1020,16 +1020,16 @@ define i32 @casts() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> @@ -1040,16 +1040,16 @@ define i32 @casts() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> @@ -1060,16 +1060,16 @@ define i32 @casts() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -1217,8 +1217,8 @@ define i32 @casts() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> @@ -1227,18 +1227,18 @@ define i32 @casts() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> @@ -1247,18 +1247,18 @@ define i32 @casts() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> @@ -1297,8 +1297,8 @@ define i32 @casts() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> @@ -1307,8 +1307,8 @@ define i32 @casts() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> @@ -1317,8 +1317,8 @@ define i32 @casts() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> @@ -1327,18 +1327,18 @@ define i32 @casts() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; -- scalars -- @@ -1742,14 +1742,14 @@ define i32 @load_extends() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-LABEL: 'load_extends' @@ -1778,14 +1778,14 @@ define i32 @load_extends() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8R-LABEL: 'load_extends' diff --git a/llvm/test/Analysis/CostModel/ARM/control-flow.ll b/llvm/test/Analysis/CostModel/ARM/control-flow.ll new file mode 100644 index 0000000000000..9c7b1b269ebde --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/control-flow.ll @@ -0,0 +1,447 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-SIZE +; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-V8M-SIZE +; RUN: opt < %s -cost-model -analyze -cost-kind=latency -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-LATENCY +; RUN: opt < %s -cost-model -analyze -cost-kind=latency -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-V8M-LATENCY +; RUN: opt < %s -cost-model -analyze -cost-kind=latency -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-V8_1M-LATENCY +; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-THROUGHPUT +; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-V8M-THROUGHPUT +; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-V8_1M-THROUGHPUT + +define i32 @simple_loop_cost(i32 %N) { +; CHECK-T1-SIZE-LABEL: 'simple_loop_cost' +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-V8M-SIZE-LABEL: 'simple_loop_cost' +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-T1-LATENCY-LABEL: 'simple_loop_cost' +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-V8M-LATENCY-LABEL: 'simple_loop_cost' +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-V8_1M-LATENCY-LABEL: 'simple_loop_cost' +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-T1-THROUGHPUT-LABEL: 'simple_loop_cost' +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +; CHECK-V8M-THROUGHPUT-LABEL: 'simple_loop_cost' +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +; CHECK-V8_1M-THROUGHPUT-LABEL: 'simple_loop_cost' +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +entry: + %zero = icmp eq i32 %N, 0 + br i1 %zero, label %exit, label %preheader + +preheader: + br label %loop + +loop: + %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] + %iv.next = add nuw i32 %iv, 1 + %cmp = icmp ne i32 %iv.next, %N + br i1 %cmp, label %loop, label %exit + +exit: + %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + ret i32 %res +} + +define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) { +; CHECK-T1-SIZE-LABEL: 'simple_mul_loop' +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-V8M-SIZE-LABEL: 'simple_mul_loop' +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-T1-LATENCY-LABEL: 'simple_mul_loop' +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-V8M-LATENCY-LABEL: 'simple_mul_loop' +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-V8_1M-LATENCY-LABEL: 'simple_mul_loop' +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-T1-THROUGHPUT-LABEL: 'simple_mul_loop' +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +; CHECK-V8M-THROUGHPUT-LABEL: 'simple_mul_loop' +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +; CHECK-V8_1M-THROUGHPUT-LABEL: 'simple_mul_loop' +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +entry: + %zero = icmp eq i32 %N, 0 + br i1 %zero, label %exit, label %preheader + +preheader: + br label %loop + +loop: + %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] + %addr.a = getelementptr i32, i32* %A, i32 %iv + %load = load i32, i32* %addr.a + %mul = mul i32 %load, %load + %addr.b = getelementptr i32, i32* %B, i32 %iv + store i32 %mul, i32* %addr.b + %iv.next = add nuw i32 %iv, 1 + %cmp = icmp ne i32 %iv.next, %N + br i1 %cmp, label %loop, label %exit + +exit: + %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + ret i32 %res +} + +define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) { +; CHECK-T1-SIZE-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-V8M-SIZE-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-T1-LATENCY-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-V8M-LATENCY-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-V8_1M-LATENCY-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-T1-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +; CHECK-V8M-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +; CHECK-V8_1M-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +entry: + %zero = icmp eq i32 %N, 0 + br i1 %zero, label %exit, label %preheader + +preheader: + br label %loop + +loop: + %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] + %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] + %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] + %load = load i16, i16* %addr.a + %sext = sext i16 %load to i32 + %mul = mul i32 %sext, 7 + store i32 %mul, i32* %addr.b + %iv.next = add nuw i32 %iv, 1 + %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 + %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 + %cmp = icmp ne i32 %iv.next, %N + br i1 %cmp, label %loop, label %exit + +exit: + %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + ret i32 %res +} diff --git a/llvm/test/Analysis/CostModel/ARM/immediates.ll b/llvm/test/Analysis/CostModel/ARM/immediates.ll new file mode 100644 index 0000000000000..4afec1fc5142f --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/immediates.ll @@ -0,0 +1,163 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-SIZE +; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-T2-SIZE +; RUN: opt < %s -cost-model -analyze -cost-kind=latency -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-LATENCY +; RUN: opt < %s -cost-model -analyze -cost-kind=latency -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-T2-LATENCY +; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-THROUGHPUT +; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-T2-THROUGHPUT + +define i32 @const_costs() { +; CHECK-T1-SIZE-LABEL: 'const_costs' +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 1 +; +; CHECK-T2-SIZE-LABEL: 'const_costs' +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 +; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 1 +; +; CHECK-T1-LATENCY-LABEL: 'const_costs' +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 1 +; +; CHECK-T2-LATENCY-LABEL: 'const_costs' +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 +; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 1 +; +; CHECK-T1-THROUGHPUT-LABEL: 'const_costs' +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 1 +; +; CHECK-T2-THROUGHPUT-LABEL: 'const_costs' +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_1 = add i32 undef, 1 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_32767 = add i32 undef, 32767 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_1 = sub i32 undef, 1 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sub_32768 = sub i32 undef, 32768 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_2 = mul i32 undef, 2 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_3 = mul i32 undef, 3 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_27 = mul i32 undef, 27 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_255 = and i32 undef, 255 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_65535 = and i32 undef, 65535 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_1_0 = select i1 undef, i32 1, i32 0 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %select_7_255 = select i1 undef, i32 7, i32 255 +; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 1 +; + %add_1 = add i32 undef, 1 + %add_32767 = add i32 undef, 32767 + %sub_1 = sub i32 undef, 1 + %sub_32768 = sub i32 undef, 32768 + %mul_2 = mul i32 undef, 2 + %mul_3 = mul i32 undef, 3 + %mul_27 = mul i32 undef, 27 + %and_255 = and i32 undef, 255 + %and_65535 = and i32 undef, 65535 + %and_1 = and i32 undef, 1 + %xor_1 = xor i32 undef, 1 + %xor_7 = xor i32 undef, 7 + %gep_1 = getelementptr i32, i32* undef, i32 1 + %gep_16 = getelementptr i32, i32* undef, i32 16 + %cmp_244 = icmp ne i32 undef, 244 + %cmp_256 = icmp uge i32 undef, 256 + %cmp_1024 = icmp ult i32 undef, 1024 + %select_1_0 = select i1 undef, i32 1, i32 0 + %select_7_255 = select i1 undef, i32 7, i32 255 + ret i32 1 +} + diff --git a/llvm/test/Analysis/CostModel/X86/arith-fix.ll b/llvm/test/Analysis/CostModel/X86/arith-fix.ll index d2d25f4c5b0e5..a4ceb1629d05e 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fix.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fix.ll @@ -35,77 +35,77 @@ define i32 @smul(i32 %arg) { ; SSSE3-LABEL: 'smul' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'smul' ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'smul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'smul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'smul' @@ -114,17 +114,17 @@ define i32 @smul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'smul' @@ -133,17 +133,17 @@ define i32 @smul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'smul' @@ -152,74 +152,74 @@ define i32 @smul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'smul' ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'smul' ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'smul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) @@ -269,77 +269,77 @@ define i32 @umul(i32 %arg) { ; SSSE3-LABEL: 'umul' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'umul' ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'umul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'umul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'umul' @@ -348,17 +348,17 @@ define i32 @umul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'umul' @@ -367,17 +367,17 @@ define i32 @umul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'umul' @@ -386,74 +386,74 @@ define i32 @umul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'umul' ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'umul' ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'umul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll index 2c3b1844ca284..9b0a5ff1cb9a8 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -673,9 +673,9 @@ define i32 @fdiv(i32 %arg) { define i32 @frem(i32 %arg) { ; SSE1-LABEL: 'frem' ; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = frem <4 x float> undef, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16F32 = frem <16 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = frem <4 x double> undef, undef @@ -684,9 +684,9 @@ define i32 @frem(i32 %arg) { ; ; SSE2-LABEL: 'frem' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = frem <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16F32 = frem <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef @@ -707,23 +707,23 @@ define i32 @frem(i32 %arg) { ; AVX-LABEL: 'frem' ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16F32 = frem <16 x float> undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = frem <4 x double> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F64 = frem <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F64 = frem <8 x double> undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'frem' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16F32 = frem <16 x float> undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = frem <4 x double> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8F64 = frem <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = frem <8 x double> undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'frem' @@ -1059,9 +1059,9 @@ define i32 @fcopysign(i32 %arg) { define i32 @fma(i32 %arg) { ; SSE1-LABEL: 'fma' ; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; SSE1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; SSE1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; SSE1-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) ; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) ; SSE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) ; SSE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) @@ -1070,9 +1070,9 @@ define i32 @fma(i32 %arg) { ; ; SSE2-LABEL: 'fma' ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) @@ -1093,12 +1093,12 @@ define i32 @fma(i32 %arg) { ; AVX-LABEL: 'fma' ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fma' diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index 146655567d2ef..3c1511da67ebe 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -991,77 +991,77 @@ define i32 @smul(i32 %arg) { ; SSSE3-LABEL: 'smul' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'smul' ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'smul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'smul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'smul' @@ -1070,17 +1070,17 @@ define i32 @smul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'smul' @@ -1089,17 +1089,17 @@ define i32 @smul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'smul' @@ -1108,74 +1108,74 @@ define i32 @smul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'smul' ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'smul' ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'smul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef) @@ -1229,77 +1229,77 @@ define i32 @umul(i32 %arg) { ; SSSE3-LABEL: 'umul' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'umul' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'umul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'umul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'umul' @@ -1308,17 +1308,17 @@ define i32 @umul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'umul' @@ -1327,17 +1327,17 @@ define i32 @umul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'umul' @@ -1346,74 +1346,74 @@ define i32 @umul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'umul' ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'umul' ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'umul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll index cfd59adadd17f..6330dca1739b7 100644 --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -25,7 +25,7 @@ define i32 @add(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C = trunc <4 x i32> undef to <4 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <8 x i1> undef to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %E = sext <8 x i1> undef to <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = trunc <8 x i32> undef to <8 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %H = trunc i32 undef to i1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -36,7 +36,7 @@ define i32 @add(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C = trunc <4 x i32> undef to <4 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D = zext <8 x i1> undef to <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = sext <8 x i1> undef to <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %H = trunc i32 undef to i1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -44,10 +44,10 @@ define i32 @add(i32 %arg) { ; AVX512-LABEL: 'add' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A = zext <4 x i1> undef to <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <4 x i1> undef to <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C = trunc <4 x i32> undef to <4 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C = trunc <4 x i32> undef to <4 x i1> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D = zext <8 x i1> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = sext <8 x i1> undef to <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i1> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %H = trunc i32 undef to i1 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -85,8 +85,8 @@ define i32 @zext_sext(<8 x i1> %in) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D = zext <4 x i32> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> @@ -95,7 +95,7 @@ define i32 @zext_sext(<8 x i1> %in) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G = trunc <8 x i64> undef to <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -115,8 +115,8 @@ define i32 @zext_sext(<8 x i1> %in) { ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D = zext <4 x i32> undef to <4 x i64> -; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> -; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> @@ -125,7 +125,7 @@ define i32 @zext_sext(<8 x i1> %in) { ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G = trunc <8 x i64> undef to <8 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> ; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -150,12 +150,12 @@ define i32 @zext_sext(<8 x i1> %in) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %E = trunc <4 x i64> undef to <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = trunc <8 x i32> undef to <8 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %G = trunc <8 x i64> undef to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -185,40 +185,100 @@ define i32 @zext_sext(<8 x i1> %in) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %G = trunc <8 x i64> undef to <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512-LABEL: 'zext_sext' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %Z = zext <8 x i1> %in to <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %S = sext <8 x i1> %in to <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %E = trunc <4 x i64> undef to <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = trunc <8 x i64> undef to <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX512F-LABEL: 'zext_sext' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i16> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512BW-LABEL: 'zext_sext' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i16> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512DQ-LABEL: 'zext_sext' +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i16> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %Z = zext <8 x i1> %in to <8 x i32> %S = sext <8 x i1> %in to <8 x i32> @@ -469,9 +529,9 @@ define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) { ; SSE-LABEL: 'fp_conv' ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = fpext <4 x float> %c to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A2 = fpext <8 x float> %a to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A2 = fpext <8 x float> %a to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A3 = fptrunc <4 x double> undef to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A4 = fptrunc <8 x double> undef to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A4 = fptrunc <8 x double> undef to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'fp_conv' diff --git a/llvm/test/Analysis/CostModel/X86/extend.ll b/llvm/test/Analysis/CostModel/X86/extend.ll index 219030f015560..8e2c7b0b56b8d 100644 --- a/llvm/test/Analysis/CostModel/X86/extend.ll +++ b/llvm/test/Analysis/CostModel/X86/extend.ll @@ -16,21 +16,21 @@ define i32 @zext_vXi32() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'zext_vXi32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'zext_vXi32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'zext_vXi32' @@ -184,7 +184,7 @@ define i32 @zext_vXi8() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'zext_vXi8' @@ -202,7 +202,7 @@ define i32 @zext_vXi8() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'zext_vXi8' @@ -220,7 +220,7 @@ define i32 @zext_vXi8() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'zext_vXi8' @@ -350,7 +350,7 @@ define i32 @zext_vXi1() { ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> @@ -519,21 +519,21 @@ define i32 @sext_vXi32() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sext_vXi32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sext_vXi32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sext_vXi32' @@ -687,7 +687,7 @@ define i32 @sext_vXi8() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sext_vXi8' @@ -705,7 +705,7 @@ define i32 @sext_vXi8() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sext_vXi8' @@ -723,7 +723,7 @@ define i32 @sext_vXi8() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sext_vXi8' @@ -853,7 +853,7 @@ define i32 @sext_vXi1() { ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll index c30041af018c0..efa3d634e5cd3 100644 --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -13,30 +13,30 @@ define i32 @fptosi_double_i64(i32 %arg) { ; SSE2-LABEL: 'fptosi_double_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptosi_double_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_double_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptosi_double_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptosi_double_i64' @@ -49,8 +49,8 @@ define i32 @fptosi_double_i64(i32 %arg) { ; SLM-LABEL: 'fptosi_double_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = fptosi double undef to i64 @@ -63,21 +63,21 @@ define i32 @fptosi_double_i64(i32 %arg) { define i32 @fptosi_double_i32(i32 %arg) { ; SSE-LABEL: 'fptosi_double_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_double_i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_double_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -100,15 +100,15 @@ define i32 @fptosi_double_i16(i32 %arg) { ; AVX-LABEL: 'fptosi_double_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_double_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = fptosi double undef to i16 @@ -121,37 +121,37 @@ define i32 @fptosi_double_i16(i32 %arg) { define i32 @fptosi_double_i8(i32 %arg) { ; SSE2-LABEL: 'fptosi_double_i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptosi_double_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_double_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_double_i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fptosi_double_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I8 = fptosi double undef to i8 @@ -164,34 +164,34 @@ define i32 @fptosi_double_i8(i32 %arg) { define i32 @fptosi_float_i64(i32 %arg) { ; SSE2-LABEL: 'fptosi_float_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptosi_float_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_float_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptosi_float_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptosi_float_i64' @@ -206,8 +206,8 @@ define i32 @fptosi_float_i64(i32 %arg) { ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = fptosi float undef to i64 @@ -252,45 +252,29 @@ define i32 @fptosi_float_i32(i32 %arg) { } define i32 @fptosi_float_i16(i32 %arg) { -; SSE2-LABEL: 'fptosi_float_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'fptosi_float_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'fptosi_float_i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_float_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_float_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SLM-LABEL: 'fptosi_float_i16' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = fptosi float undef to i16 %V2I16 = fptosi <2 x float> undef to <2 x i16> @@ -303,42 +287,42 @@ define i32 @fptosi_float_i16(i32 %arg) { define i32 @fptosi_float_i8(i32 %arg) { ; SSE2-LABEL: 'fptosi_float_i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptosi_float_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_float_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_float_i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fptosi_float_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I8 = fptosi float undef to i8 diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll index 357b5fd864d8e..734027062129d 100644 --- a/llvm/test/Analysis/CostModel/X86/fptoui.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -13,30 +13,30 @@ define i32 @fptoui_double_i64(i32 %arg) { ; SSE2-LABEL: 'fptoui_double_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_double_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_double_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptoui_double_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptoui_double_i64' @@ -49,8 +49,8 @@ define i32 @fptoui_double_i64(i32 %arg) { ; SLM-LABEL: 'fptoui_double_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = fptoui double undef to i64 @@ -63,16 +63,16 @@ define i32 @fptoui_double_i64(i32 %arg) { define i32 @fptoui_double_i32(i32 %arg) { ; SSE2-LABEL: 'fptoui_double_i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_double_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_double_i32' @@ -93,7 +93,7 @@ define i32 @fptoui_double_i32(i32 %arg) { ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I32 = fptoui double undef to i32 @@ -104,40 +104,26 @@ define i32 @fptoui_double_i32(i32 %arg) { } define i32 @fptoui_double_i16(i32 %arg) { -; SSE2-LABEL: 'fptoui_double_i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'fptoui_double_i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE-LABEL: 'fptoui_double_i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_double_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_double_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SLM-LABEL: 'fptoui_double_i16' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = fptoui double undef to i16 %V2I16 = fptoui <2 x double> undef to <2 x i16> @@ -149,37 +135,37 @@ define i32 @fptoui_double_i16(i32 %arg) { define i32 @fptoui_double_i8(i32 %arg) { ; SSE2-LABEL: 'fptoui_double_i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_double_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_double_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_double_i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fptoui_double_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I8 = fptoui double undef to i8 @@ -192,34 +178,34 @@ define i32 @fptoui_double_i8(i32 %arg) { define i32 @fptoui_float_i64(i32 %arg) { ; SSE2-LABEL: 'fptoui_float_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_float_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_float_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptoui_float_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptoui_float_i64' @@ -234,8 +220,8 @@ define i32 @fptoui_float_i64(i32 %arg) { ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = fptoui float undef to i64 @@ -249,18 +235,18 @@ define i32 @fptoui_float_i64(i32 %arg) { define i32 @fptoui_float_i32(i32 %arg) { ; SSE2-LABEL: 'fptoui_float_i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_float_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_float_i32' @@ -268,7 +254,7 @@ define i32 @fptoui_float_i32(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_float_i32' @@ -283,8 +269,8 @@ define i32 @fptoui_float_i32(i32 %arg) { ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I32 = fptoui float undef to i32 @@ -298,42 +284,42 @@ define i32 @fptoui_float_i32(i32 %arg) { define i32 @fptoui_float_i16(i32 %arg) { ; SSE2-LABEL: 'fptoui_float_i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_float_i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_float_i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_float_i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fptoui_float_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = fptoui float undef to i16 @@ -347,42 +333,42 @@ define i32 @fptoui_float_i16(i32 %arg) { define i32 @fptoui_float_i8(i32 %arg) { ; SSE2-LABEL: 'fptoui_float_i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_float_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_float_i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_float_i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fptoui_float_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> -; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I8 = fptoui float undef to i8 diff --git a/llvm/test/Analysis/CostModel/X86/fround.ll b/llvm/test/Analysis/CostModel/X86/fround.ll index ef2eca1803123..5e22473569563 100644 --- a/llvm/test/Analysis/CostModel/X86/fround.ll +++ b/llvm/test/Analysis/CostModel/X86/fround.ll @@ -16,9 +16,9 @@ target triple = "x86_64-apple-macosx10.8.0" define i32 @ceil(i32 %arg) { ; SSE2-LABEL: 'ceil' ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.ceil.f32(float undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.ceil.f64(double undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) @@ -96,9 +96,9 @@ define i32 @ceil(i32 %arg) { define i32 @floor(i32 %arg) { ; SSE2-LABEL: 'floor' ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.floor.f32(float undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.floor.f64(double undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) @@ -176,9 +176,9 @@ define i32 @floor(i32 %arg) { define i32 @nearbyint(i32 %arg) { ; SSE2-LABEL: 'nearbyint' ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) @@ -256,9 +256,9 @@ define i32 @nearbyint(i32 %arg) { define i32 @rint(i32 %arg) { ; SSE2-LABEL: 'rint' ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.rint.f32(float undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.rint.f64(double undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) @@ -336,9 +336,9 @@ define i32 @rint(i32 %arg) { define i32 @trunc(i32 %arg) { ; SSE2-LABEL: 'trunc' ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.trunc.f32(float undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.trunc.f64(double undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll index afa4079dce760..2a5950760b9ad 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll @@ -22,7 +22,7 @@ for.end: ; preds = %vector.body ret void ; CORE2: Printing analysis 'Cost Model Analysis' for function 'test1': -; CORE2: Cost Model: Found an estimated cost of 49 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load) +; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load) ; COREI7: Printing analysis 'Cost Model Analysis' for function 'test1': ; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load) @@ -50,7 +50,7 @@ for.end: ; preds = %vector.body ret void ; CORE2: Printing analysis 'Cost Model Analysis' for function 'test2': -; CORE2: Cost Model: Found an estimated cost of 49 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load) +; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load) ; COREI7: Printing analysis 'Cost Model Analysis' for function 'test2': ; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load) diff --git a/llvm/test/Analysis/CostModel/X86/load_store.ll b/llvm/test/Analysis/CostModel/X86/load_store.ll index 7058086399df6..57f6a94affc28 100644 --- a/llvm/test/Analysis/CostModel/X86/load_store.ll +++ b/llvm/test/Analysis/CostModel/X86/load_store.ll @@ -109,8 +109,8 @@ define i32 @loads(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'loads' @@ -129,8 +129,8 @@ define i32 @loads(i32 %arg) { ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'loads' @@ -149,8 +149,8 @@ define i32 @loads(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; load i8, i8* undef, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index 5bd0e08d960fd..ee3f3316ea7fd 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -14,26 +14,26 @@ define i32 @masked_load() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 287 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_load' @@ -80,12 +80,12 @@ define i32 @masked_load() { ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 290 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -107,12 +107,12 @@ define i32 @masked_load() { ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 307 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -744,26 +744,26 @@ define i32 @masked_expandload() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_expandload' @@ -794,55 +794,55 @@ define i32 @masked_expandload() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX-LABEL: 'masked_expandload' -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_expandload' -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -1051,7 +1051,7 @@ define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) { ; SSE2-LABEL: 'test2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test2' @@ -1103,7 +1103,7 @@ define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) { define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) { ; SSE2-LABEL: 'test4' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res ; ; SSE42-LABEL: 'test4' @@ -1191,7 +1191,7 @@ define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) { ; SSE2-LABEL: 'test7' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res ; ; SSE42-LABEL: 'test7' @@ -1217,7 +1217,7 @@ define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %d define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { ; SSE2-LABEL: 'test8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res ; ; SSE42-LABEL: 'test8' @@ -1337,31 +1337,31 @@ define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1381,31 +1381,31 @@ define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) { ; SSE2-LABEL: 'test_gather_16f32_var_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_var_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_var_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1425,31 +1425,31 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, < define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1471,7 +1471,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind ; SSE2-LABEL: 'test_gather_16f32_const_mask2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1479,7 +1479,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind ; SSE42-LABEL: 'test_gather_16f32_const_mask2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1487,7 +1487,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind ; AVX1-LABEL: 'test_gather_16f32_const_mask2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1495,7 +1495,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind ; AVX2-LABEL: 'test_gather_16f32_const_mask2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1503,7 +1503,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind ; SKL-LABEL: 'test_gather_16f32_const_mask2' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer -; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res diff --git a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll index 28dd66d7980bf..c6ca2bded39cd 100644 --- a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll +++ b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll @@ -180,31 +180,49 @@ define void @sext512() "min-legal-vector-width"="512" { } define void @trunc256() "min-legal-vector-width"="256" { -; VEC256-LABEL: 'trunc256' -; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A = trunc <8 x i64> undef to <8 x i32> -; VEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %B = trunc <8 x i64> undef to <8 x i16> -; VEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %C = trunc <8 x i64> undef to <8 x i8> -; VEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D = trunc <16 x i32> undef to <16 x i16> -; VEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %E = trunc <16 x i32> undef to <16 x i8> -; VEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %F = trunc <32 x i16> undef to <32 x i8> -; VEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX-LABEL: 'trunc256' +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = trunc <8 x i64> undef to <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %B = trunc <8 x i64> undef to <8 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %C = trunc <8 x i64> undef to <8 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D = trunc <16 x i32> undef to <16 x i16> +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %E = trunc <16 x i32> undef to <16 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %F = trunc <32 x i16> undef to <32 x i8> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512VL256-LABEL: 'trunc256' +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = trunc <8 x i64> undef to <8 x i32> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %B = trunc <8 x i64> undef to <8 x i16> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %C = trunc <8 x i64> undef to <8 x i8> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D = trunc <16 x i32> undef to <16 x i16> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %E = trunc <16 x i32> undef to <16 x i8> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %F = trunc <32 x i16> undef to <32 x i8> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VL512-LABEL: 'trunc256' ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = trunc <8 x i64> undef to <8 x i32> -; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = trunc <8 x i64> undef to <8 x i16> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B = trunc <8 x i64> undef to <8 x i16> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C = trunc <8 x i64> undef to <8 x i8> -; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = trunc <16 x i32> undef to <16 x i16> -; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = trunc <16 x i32> undef to <16 x i8> -; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %F = trunc <32 x i16> undef to <32 x i8> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D = trunc <16 x i32> undef to <16 x i16> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %E = trunc <16 x i32> undef to <16 x i8> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %F = trunc <32 x i16> undef to <32 x i8> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; +; SKX256-LABEL: 'trunc256' +; SKX256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = trunc <8 x i64> undef to <8 x i32> +; SKX256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %B = trunc <8 x i64> undef to <8 x i16> +; SKX256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %C = trunc <8 x i64> undef to <8 x i8> +; SKX256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D = trunc <16 x i32> undef to <16 x i16> +; SKX256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %E = trunc <16 x i32> undef to <16 x i8> +; SKX256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = trunc <32 x i16> undef to <32 x i8> +; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ; SKX512-LABEL: 'trunc256' ; SKX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = trunc <8 x i64> undef to <8 x i32> -; SKX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = trunc <8 x i64> undef to <8 x i16> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B = trunc <8 x i64> undef to <8 x i16> ; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C = trunc <8 x i64> undef to <8 x i8> -; SKX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = trunc <16 x i32> undef to <16 x i16> -; SKX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = trunc <16 x i32> undef to <16 x i8> -; SKX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F = trunc <32 x i16> undef to <32 x i8> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D = trunc <16 x i32> undef to <16 x i16> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %E = trunc <16 x i32> undef to <16 x i8> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <32 x i16> undef to <32 x i8> ; SKX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %A = trunc <8 x i64> undef to <8 x i32> @@ -250,13 +268,13 @@ define i32 @zext256_vXi1() "min-legal-vector-width"="256" { ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = zext <16 x i1> undef to <16 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = zext <32 x i1> undef to <32 x i8> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512VL512-LABEL: 'zext256_vXi1' @@ -388,14 +406,14 @@ define i32 @sext256_vXi1() "min-legal-vector-width"="256" { ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i8 = sext <16 x i1> undef to <16 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i8 = sext <32 x i1> undef to <32 x i8> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512VL512-LABEL: 'sext256_vXi1' @@ -501,3 +519,170 @@ define i32 @sext256_vXi1() "min-legal-vector-width"="256" { ret i32 undef } + +define i32 @trunc_vXi1() "min-legal-vector-width"="256" { +; AVX-LABEL: 'trunc_vXi1' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512VL256-LABEL: 'trunc_vXi1' +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512VL512-LABEL: 'trunc_vXi1' +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SKX256-LABEL: 'trunc_vXi1' +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SKX512-LABEL: 'trunc_vXi1' +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; SKX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + %V2i64 = trunc <2 x i64> undef to <2 x i1> + %V4i64 = trunc <4 x i64> undef to <4 x i1> + %V8i64 = trunc <8 x i64> undef to <8 x i1> + %V16i64 = trunc <16 x i64> undef to <16 x i1> + %V32i64 = trunc <32 x i64> undef to <32 x i1> + %V64i64 = trunc <64 x i64> undef to <64 x i1> + + %V2i32 = trunc <2 x i32> undef to <2 x i1> + %V4i32 = trunc <4 x i32> undef to <4 x i1> + %V8i32 = trunc <8 x i32> undef to <8 x i1> + %V16i32 = trunc <16 x i32> undef to <16 x i1> + %V32i32 = trunc <32 x i32> undef to <32 x i1> + %V64i32 = trunc <64 x i32> undef to <64 x i1> + + %V2i16 = trunc <2 x i16> undef to <2 x i1> + %V4i16 = trunc <4 x i16> undef to <4 x i1> + %V8i16 = trunc <8 x i16> undef to <8 x i1> + %V16i16 = trunc <16 x i16> undef to <16 x i1> + %V32i16 = trunc <32 x i16> undef to <32 x i1> + %V64i16 = trunc <64 x i16> undef to <64 x i1> + + %V2i8 = trunc <2 x i8> undef to <2 x i1> + %V4i8 = trunc <4 x i8> undef to <4 x i1> + %V8i8 = trunc <8 x i8> undef to <8 x i1> + %V16i8 = trunc <16 x i8> undef to <16 x i1> + %V32i8 = trunc <32 x i8> undef to <32 x i1> + %V64i8 = trunc <64 x i8> undef to <64 x i1> + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll index 208dd3d1a01a6..133c1e6c885ab 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll @@ -859,27 +859,27 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512BW-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi16' @@ -894,27 +894,27 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512VMBI-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> ; AVX512VMBI-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' @@ -1249,27 +1249,27 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX512VMBI-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> ; AVX512VMBI-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll index 84477441719ea..830887fc5a7ec 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse.ll @@ -229,16 +229,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll index 1caab2985be97..6415caeebc119 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll @@ -285,18 +285,18 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' @@ -386,7 +386,7 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll index 53f7c1ccdc755..204c38ccb20ad 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll @@ -202,19 +202,19 @@ define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' @@ -293,17 +293,17 @@ define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b3 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll index 242319e2957e6..0ae3e63a85395 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src.ll @@ -268,21 +268,21 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' @@ -372,17 +372,17 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi8' diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll index 708b1432e4c24..c2c86d8b29196 100644 --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -109,14 +109,14 @@ define i32 @sitofp_i64_double() { ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double ; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sitofp_i64_double' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double ; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sitofp_i64_double' @@ -213,7 +213,7 @@ define i32 @sitofp_i32_float() { ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i32_float' @@ -246,15 +246,15 @@ define i32 @sitofp_i64_float() { ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sitofp_i64_float' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sitofp_i64_float' diff --git a/llvm/test/Analysis/CostModel/X86/sse-itoi.ll b/llvm/test/Analysis/CostModel/X86/sse-itoi.ll index 3d18ad8a06b10..a17b75769879b 100644 --- a/llvm/test/Analysis/CostModel/X86/sse-itoi.ll +++ b/llvm/test/Analysis/CostModel/X86/sse-itoi.ll @@ -584,13 +584,13 @@ define void @truncate_v8i16_to_v8i8(<8 x i16>* %a) { define void @truncate_v4i16_to_v4i8(<4 x i16>* %a) { ; SSE2-LABEL: 'truncate_v4i16_to_v4i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = trunc <4 x i16> %1 to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <4 x i16> %1 to <4 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, <4 x i8>* undef, align 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'truncate_v4i16_to_v4i8' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a -; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <4 x i16> %1 to <4 x i8> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = trunc <4 x i16> %1 to <4 x i8> ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, <4 x i8>* undef, align 4 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll index 8061d8abcd791..26c5c77232d4b 100755 --- a/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll @@ -13,9 +13,9 @@ define void @load_i16_stride2() { ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 32 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 16 For instruction: %1 = load +;CHECK: Found an estimated cost of 3 for VF 32 For instruction: %1 = load entry: br label %for.body @@ -38,10 +38,10 @@ define void @load_i16_stride3() { ;CHECK-LABEL: load_i16_stride3 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 32 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load +;CHECK: Found an estimated cost of 5 for VF 32 For instruction: %1 = load entry: br label %for.body @@ -64,10 +64,10 @@ define void @load_i16_stride4() { ;CHECK-LABEL: load_i16_stride4 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 5 for VF 32 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load +;CHECK: Found an estimated cost of 8 for VF 32 For instruction: %1 = load entry: br label %for.body @@ -89,11 +89,11 @@ for.end: ; preds = %for.body define void @load_i16_stride5() { ;CHECK-LABEL: load_i16_stride5 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 2 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 6 for VF 32 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 2 For instruction: %1 = load +;CHECK: Found an estimated cost of 2 for VF 4 For instruction: %1 = load +;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 5 for VF 16 For instruction: %1 = load +;CHECK: Found an estimated cost of 10 for VF 32 For instruction: %1 = load entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll index 3c88b38edec7d..8c5528a9c1589 100755 --- a/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll @@ -14,7 +14,7 @@ define void @load_i8_stride2() { ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 16 For instruction: %1 = load +;CHECK: Found an estimated cost of 4 for VF 16 For instruction: %1 = load ;CHECK: Found an estimated cost of 8 for VF 32 For instruction: %1 = load ;CHECK: Found an estimated cost of 20 for VF 64 For instruction: %1 = load entry: @@ -40,7 +40,7 @@ define void @load_i8_stride3() { ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 4 for VF 8 For instruction: %1 = load ;CHECK: Found an estimated cost of 13 for VF 16 For instruction: %1 = load ;CHECK: Found an estimated cost of 16 for VF 32 For instruction: %1 = load ;CHECK: Found an estimated cost of 25 for VF 64 For instruction: %1 = load @@ -67,7 +67,7 @@ define void @load_i8_stride4() { ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load +;CHECK: Found an estimated cost of 4 for VF 8 For instruction: %1 = load ;CHECK: Found an estimated cost of 8 for VF 16 For instruction: %1 = load ;CHECK: Found an estimated cost of 20 for VF 32 For instruction: %1 = load ;CHECK: Found an estimated cost of 59 for VF 64 For instruction: %1 = load @@ -93,7 +93,7 @@ define void @load_i8_stride5() { ;CHECK-LABEL: load_i8_stride5 ;CHECK: Found an estimated cost of 1 for VF 1 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load -;CHECK: Found an estimated cost of 3 for VF 4 For instruction: %1 = load +;CHECK: Found an estimated cost of 4 for VF 4 For instruction: %1 = load ;CHECK: Found an estimated cost of 8 for VF 8 For instruction: %1 = load ;CHECK: Found an estimated cost of 20 for VF 16 For instruction: %1 = load ;CHECK: Found an estimated cost of 39 for VF 32 For instruction: %1 = load diff --git a/llvm/test/Analysis/CostModel/X86/trunc.ll b/llvm/test/Analysis/CostModel/X86/trunc.ll index c4c070a1d9191..a4cbc65226abc 100644 --- a/llvm/test/Analysis/CostModel/X86/trunc.ll +++ b/llvm/test/Analysis/CostModel/X86/trunc.ll @@ -15,37 +15,23 @@ define i32 @trunc_vXi32() { ; SSE-LABEL: 'trunc_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX1-LABEL: 'trunc_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; AVX2-LABEL: 'trunc_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX-LABEL: 'trunc_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'trunc_vXi32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; BTVER2-LABEL: 'trunc_vXi32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2i64 = trunc <2 x i64> undef to <2 x i32> %V4i64 = trunc <4 x i64> undef to <4 x i32> @@ -59,39 +45,39 @@ define i32 @trunc_vXi16() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'trunc_vXi16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'trunc_vXi16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'trunc_vXi16' @@ -99,12 +85,12 @@ define i32 @trunc_vXi16() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'trunc_vXi16' @@ -112,25 +98,25 @@ define i32 @trunc_vXi16() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'trunc_vXi16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'trunc_vXi16' @@ -138,12 +124,12 @@ define i32 @trunc_vXi16() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2i64 = trunc <2 x i64> undef to <2 x i16> @@ -166,20 +152,20 @@ define i32 @trunc_vXi8() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'trunc_vXi8' @@ -187,20 +173,20 @@ define i32 @trunc_vXi8() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'trunc_vXi8' @@ -208,20 +194,20 @@ define i32 @trunc_vXi8() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'trunc_vXi8' @@ -230,19 +216,19 @@ define i32 @trunc_vXi8() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'trunc_vXi8' @@ -251,19 +237,19 @@ define i32 @trunc_vXi8() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'trunc_vXi8' @@ -271,20 +257,20 @@ define i32 @trunc_vXi8() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'trunc_vXi8' @@ -292,20 +278,20 @@ define i32 @trunc_vXi8() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'trunc_vXi8' @@ -314,19 +300,19 @@ define i32 @trunc_vXi8() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2i64 = trunc <2 x i64> undef to <2 x i8> @@ -354,86 +340,221 @@ define i32 @trunc_vXi8() { } define i32 @trunc_vXi1() { -; SSE-LABEL: 'trunc_vXi1' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'trunc_vXi1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX-LABEL: 'trunc_vXi1' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSSE3-LABEL: 'trunc_vXi1' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX512-LABEL: 'trunc_vXi1' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE42-LABEL: 'trunc_vXi1' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX1-LABEL: 'trunc_vXi1' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'trunc_vXi1' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512F-LABEL: 'trunc_vXi1' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512BW-LABEL: 'trunc_vXi1' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; BTVER2-LABEL: 'trunc_vXi1' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2i64 = trunc <2 x i64> undef to <2 x i1> %V4i64 = trunc <4 x i64> undef to <4 x i1> diff --git a/llvm/test/Analysis/CostModel/X86/uitofp.ll b/llvm/test/Analysis/CostModel/X86/uitofp.ll index 4b458fec9a45f..49562502f27d9 100644 --- a/llvm/test/Analysis/CostModel/X86/uitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/uitofp.ll @@ -109,7 +109,7 @@ define i32 @uitofp_i64_double() { ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f64 = uitofp i64 undef to double ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'uitofp_i64_double' @@ -213,7 +213,7 @@ define i32 @uitofp_i32_float() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'uitofp_i32_float' @@ -221,7 +221,7 @@ define i32 @uitofp_i32_float() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'uitofp_i32_float' @@ -237,7 +237,7 @@ define i32 @uitofp_i32_float() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %cvt_i32_f32 = uitofp i32 undef to float @@ -262,7 +262,7 @@ define i32 @uitofp_i64_float() { ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'uitofp_i64_float' diff --git a/llvm/test/Analysis/MemorySSA/phi-translation.ll b/llvm/test/Analysis/MemorySSA/phi-translation.ll index 4aa24f2c9aa28..4d8111654d568 100644 --- a/llvm/test/Analysis/MemorySSA/phi-translation.ll +++ b/llvm/test/Analysis/MemorySSA/phi-translation.ll @@ -193,3 +193,45 @@ if.end: br label %while.cond } +; CHECK-LABEL: define i32 @use_not_optimized_due_to_backedge +define i32 @use_not_optimized_due_to_backedge(i32* nocapture %m_i_strides, i32* nocapture readonly %eval_left_dims) { +entry: +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK_NEXT: store i32 1, i32* %m_i_strides, align 4 + store i32 1, i32* %m_i_strides, align 4 + br label %for.body + +for.cond.cleanup: ; preds = %for.inc + ret i32 %m_i_size.1 + +for.body: ; preds = %entry, %for.inc +; CHECK: 4 = MemoryPhi({entry,1},{for.inc,3}) +; CHECK-NEXT: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %m_i_size.022 = phi i32 [ 1, %entry ], [ %m_i_size.1, %for.inc ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cmp1 = icmp eq i64 %indvars.iv, 0 + %arrayidx2 = getelementptr inbounds i32, i32* %m_i_strides, i64 %indvars.iv +; CHECK: MemoryUse(4) MayAlias +; CHECK-NEXT: %0 = load i32, i32* %arrayidx2, align 4 + %0 = load i32, i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds i32, i32* %eval_left_dims, i64 %indvars.iv +; CHECK: MemoryUse(4) MayAlias +; CHECK-NEXT: %1 = load i32, i32* %arrayidx4, align 4 + %1 = load i32, i32* %arrayidx4, align 4 + %mul = mul nsw i32 %1, %0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %arrayidx7 = getelementptr inbounds i32, i32* %m_i_strides, i64 %indvars.iv.next +; CHECK: 2 = MemoryDef(4) +; CHECK-NEXT: store i32 %mul, i32* %arrayidx7, align 4 + store i32 %mul, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then +; CHECK: 3 = MemoryPhi({for.body,4},{if.then,2}) +; CHECK-NEXT: %m_i_size.1 = phi i32 [ %m_i_size.022, %if.then ], [ %mul, %for.body ] + %m_i_size.1 = phi i32 [ %m_i_size.022, %if.then ], [ %mul, %for.body ] + br i1 %cmp1, label %for.body, label %for.cond.cleanup +} diff --git a/llvm/test/Analysis/ValueTracking/knownnonzero-shift.ll b/llvm/test/Analysis/ValueTracking/knownnonzero-shift.ll deleted file mode 100644 index c91e531578404..0000000000000 --- a/llvm/test/Analysis/ValueTracking/knownnonzero-shift.ll +++ /dev/null @@ -1,15 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -instsimplify -S < %s | FileCheck %s - -define i1 @test(i8 %p, i8* %pq) { -; CHECK-LABEL: @test( -; CHECK-NEXT: ret i1 true -; - %q = load i8, i8* %pq, !range !0 ; %q is known nonzero; no known bits - %1 = shl i8 %p, %q ; because %q is nonzero, %1[0] is known to be zero. - %2 = and i8 %1, 1 - %x = icmp eq i8 %2, 0 - ret i1 %x -} - -!0 = !{ i8 1, i8 5 } diff --git a/llvm/test/Assembler/invalid-byval-type3.ll b/llvm/test/Assembler/invalid-byval-type3.ll index 4626dd71c5b56..5263e03d45dee 100644 --- a/llvm/test/Assembler/invalid-byval-type3.ll +++ b/llvm/test/Assembler/invalid-byval-type3.ll @@ -1,4 +1,4 @@ ; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s -; CHECK: Attributes 'byval' and 'inalloca' do not support unsized types! +; CHECK: Attributes 'byval'{{.*}} do not support unsized types! declare void @foo(void()* byval(void())) diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index cc3d076086c60..d38eaee485ab0 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -204,7 +204,7 @@ define void @f34() ; CHECK: define void @f34() { call void @nobuiltin() nobuiltin -; CHECK: call void @nobuiltin() #40 +; CHECK: call void @nobuiltin() #[[NOBUILTIN:[0-9]+]] ret void; } @@ -371,7 +371,13 @@ define void @f62() nosync ; CHECK: define void @f63() #39 define void @f63() sanitize_memtag { - ret void; + ret void +} + +; CHECK: define void @f64(i32* preallocated(i32) %a) +define void @f64(i32* preallocated(i32) %a) +{ + ret void } ; CHECK: attributes #0 = { noreturn } @@ -414,4 +420,4 @@ define void @f63() sanitize_memtag ; CHECK: attributes #37 = { nofree } ; CHECK: attributes #38 = { nosync } ; CHECK: attributes #39 = { sanitize_memtag } -; CHECK: attributes #40 = { nobuiltin } +; CHECK: attributes #[[NOBUILTIN]] = { nobuiltin } diff --git a/llvm/test/Bitcode/operand-bundles-bc-analyzer.ll b/llvm/test/Bitcode/operand-bundles-bc-analyzer.ll index ac7feec77aa1b..6d7d4c0a912c3 100644 --- a/llvm/test/Bitcode/operand-bundles-bc-analyzer.ll +++ b/llvm/test/Bitcode/operand-bundles-bc-analyzer.ll @@ -7,6 +7,7 @@ ; CHECK-NEXT: &1 | FileCheck %s +; RUN: llc -mtriple aarch64 -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck --match-full-lines %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios5.0.0" @@ -13,8 +13,8 @@ define i32 @main() #0 !dbg !14 { ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1, debug-location !DILocation(line: 0, scope: !18) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1, debug-location !DILocation(line: 0, scope: !18) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2, debug-location !DILocation(line: 0, scope: !22) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2, debug-location !DILocation(line: 0, scope: !22) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %ir.retval) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fold-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fold-select.mir index 060db2c99ee57..0f54e9b548d89 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/fold-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/fold-select.mir @@ -21,9 +21,9 @@ body: | ; CHECK: liveins: $w0, $w1 ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[MOVwzr:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr ; CHECK: $wzr = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVwzr]], [[COPY1]], 0, implicit $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY2]], [[COPY1]], 0, implicit $nzcv ; CHECK: $w0 = COPY [[CSELWr]] ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(s32) = COPY $w0 @@ -50,9 +50,9 @@ body: | ; CHECK: liveins: $s0, $w0, $w1 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0 - ; CHECK: [[MOVwzr:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr ; CHECK: FCMPSri [[COPY1]], implicit-def $nzcv - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[MOVwzr]], [[COPY]], 0, implicit $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY2]], [[COPY]], 0, implicit $nzcv ; CHECK: $w0 = COPY [[CSELWr]] ; CHECK: RET_ReallyLR implicit $w0 %1:gpr(s32) = COPY $w1 @@ -66,3 +66,34 @@ body: | RET_ReallyLR implicit $w0 ... +--- +name: check_update_predicate +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; The G_ICMP is optimized here to be a slt comparison by adding 1 to the + ; constant. So, the CSELWr should use the predicate code 11, rather than + ; 13. + + ; CHECK-LABEL: name: check_update_predicate + ; CHECK: liveins: $w0, $w1 + ; CHECK: %copy1:gpr32sp = COPY $w0 + ; CHECK: %copy2:gpr32 = COPY $w1 + ; CHECK: %cst:gpr32 = MOVi32imm -1 + ; CHECK: $wzr = SUBSWri %copy1, 0, 0, implicit-def $nzcv + ; CHECK: %select:gpr32 = CSELWr %cst, %copy2, 11, implicit $nzcv + ; CHECK: $w0 = COPY %select + ; CHECK: RET_ReallyLR implicit $w0 + %copy1:gpr(s32) = COPY $w0 + %copy2:gpr(s32) = COPY $w1 + %cst:gpr(s32) = G_CONSTANT i32 -1 + %cmp:gpr(s32) = G_ICMP intpred(sle), %copy1(s32), %cst + %trunc:gpr(s1) = G_TRUNC %cmp(s32) + %select:gpr(s32) = G_SELECT %trunc(s1), %cst, %copy2 + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir new file mode 100644 index 0000000000000..91780e2601fe6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir @@ -0,0 +1,630 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +name: slt_to_sle_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x slt c => x sle c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: slt_to_sle_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 4097 + %4:gpr(s32) = G_ICMP intpred(slt), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: slt_to_sle_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x slt c => x sle c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: slt_to_sle_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4097 + %4:gpr(s32) = G_ICMP intpred(slt), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: sge_to_sgt_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x sge c => x sgt c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: sge_to_sgt_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 4097 + %4:gpr(s32) = G_ICMP intpred(sge), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: sge_to_sgt_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x sge c => x sgt c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: sge_to_sgt_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4097 + %4:gpr(s32) = G_ICMP intpred(sge), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: ult_to_ule_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x ult c => x ule c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: ult_to_ule_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 8, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 4097 + %4:gpr(s32) = G_ICMP intpred(ult), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ult_to_ule_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x ult c => x ule c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: ult_to_ule_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 8, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4097 + %4:gpr(s32) = G_ICMP intpred(ult), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: uge_to_ugt_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x uge c => x ugt c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: uge_to_ugt_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 4097 + %4:gpr(s32) = G_ICMP intpred(uge), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: uge_to_ugt_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x uge c => x ugt c - 1 + ; + ; We should not have a MOV here. We can subtract 1 from the constant and + ; change the condition code. + ; + ; log_2(4096) == 12, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: uge_to_ugt_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 1, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 4097 + %4:gpr(s32) = G_ICMP intpred(uge), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: sle_to_slt_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x sle c => x slt c + 1 + ; + ; We should not have a MOV here. We can add 1 to the constant and change + ; the condition code. + ; + ; log_2(8192) == 13, so we can represent this as a 12 bit value with a + ; left shift. + ; + ; (We can't use 4095 here, because that's a legal arithmetic immediate.) + + ; CHECK-LABEL: name: sle_to_slt_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 2, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 8191 + %4:gpr(s32) = G_ICMP intpred(sle), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: sle_to_slt_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x sle c => x slt c + 1 + ; + ; We should not have a MOV here. We can add 1 to the constant and change + ; the condition code. + ; + ; log_2(8192) == 13, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: sle_to_slt_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 2, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 8191 + %4:gpr(s32) = G_ICMP intpred(sle), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: sgt_to_sge_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; x sgt c => s sge c + 1 + ; + ; We should not have a MOV here. We can add 1 to the constant and change + ; the condition code. + ; + ; log_2(8192) == 13, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: sgt_to_sge_s32 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 2, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 8191 + %4:gpr(s32) = G_ICMP intpred(sgt), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: sgt_to_sge_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; x sgt c => s sge c + 1 + ; + ; We should not have a MOV here. We can add 1 to the constant and change + ; the condition code. + ; + ; log_2(8192) == 13, so we can represent this as a 12 bit value with a + ; left shift. + + ; CHECK-LABEL: name: sgt_to_sge_s64 + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 2, 12, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 8191 + %4:gpr(s32) = G_ICMP intpred(sgt), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: no_opt_int32_min +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; This one should contain a MOV. + ; + ; If we subtract 1 from the constant, it will wrap around, and so it's not + ; true that + ; + ; x slt c => x sle c - 1 + ; x sge c => x sgt c - 1 + + ; CHECK-LABEL: name: no_opt_int32_min + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm -2147483648 + ; CHECK: $wzr = SUBSWrr [[COPY]], [[MOVi32imm]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 -2147483648 + %4:gpr(s32) = G_ICMP intpred(slt), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: no_opt_int64_min +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; This one should contain a MOV. + ; + ; If we subtract 1 from the constant, it will wrap around, and so it's not + ; true that + ; + ; x slt c => x sle c - 1 + ; x sge c => x sgt c - 1 + + ; CHECK-LABEL: name: no_opt_int64_min + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm -9223372036854775808 + ; CHECK: $xzr = SUBSXrr [[COPY]], [[MOVi64imm]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 -9223372036854775808 + %4:gpr(s32) = G_ICMP intpred(slt), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: no_opt_int32_max +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; This one should contain a MOV. + ; + ; If we add 1 to the constant, it will wrap around, and so it's not true + ; that + ; + ; x slt c => x sle c - 1 + ; x sge c => x sgt c - 1 + + ; CHECK-LABEL: name: no_opt_int32_max + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2147483647 + ; CHECK: $wzr = SUBSWrr [[COPY]], [[MOVi32imm]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: [[ANDWri:%[0-9]+]]:gpr32sp = ANDWri [[CSINCWr]], 0 + ; CHECK: $w0 = COPY [[ANDWri]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 2147483647 + %4:gpr(s32) = G_ICMP intpred(sle), %0(s32), %1 + %5:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_AND %4, %5 + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: no_opt_int64_max +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; This one should contain a MOV. + ; + ; If we add 1 to the constant, it will wrap around, and so it's not true + ; that + ; + ; x slt c => x sle c - 1 + ; x sge c => x sgt c - 1 + + + ; CHECK-LABEL: name: no_opt_int64_max + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 9223372036854775807 + ; CHECK: $xzr = SUBSXrr [[COPY]], [[MOVi64imm]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 9223372036854775807 + %4:gpr(s32) = G_ICMP intpred(sle), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: no_opt_zero +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; This one should contain a MOV. + ; + ; This is an unsigned comparison, so when the constant is 0, the following + ; does not hold: + ; + ; x slt c => x sle c - 1 + ; x sge c => x sgt c - 1 + + ; CHECK-LABEL: name: no_opt_zero + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: $xzr = SUBSXri [[COPY]], 0, 0, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv + ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[CSINCWr]], %subreg.sub_32 + ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[INSERT_SUBREG]], 4096 + ; CHECK: $x0 = COPY [[ANDXri]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = G_CONSTANT i64 0 + %4:gpr(s32) = G_ICMP intpred(ult), %0(s64), %1 + %6:gpr(s64) = G_ANYEXT %4(s32) + %5:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_AND %6, %5 + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir index 131e67335bb1f..f5ebbc2944b6c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir @@ -1,15 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s ---- | - define void @cbz_s32() { ret void } - define void @cbz_s64() { ret void } - define void @cbnz_s32() { ret void } - define void @cbnz_s64() { ret void } - define hidden void @test_rhs_inttoptr(i64* %p) { ret void } - define hidden void @test_rhs_unknown(i64* %p) { ret void } -... - --- name: cbz_s32 legalized: true @@ -132,7 +123,7 @@ body: | ; CHECK: CBZX [[COPY]], %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8 into %ir.p) + ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8) ; CHECK: bb.2: ; CHECK: RET_ReallyLR bb.1: @@ -148,7 +139,7 @@ body: | bb.2: %5:gpr(s64) = G_CONSTANT i64 0 - G_STORE %5(s64), %0(p0) :: (store 8 into %ir.p) + G_STORE %5(s64), %0(p0) :: (store 8) bb.3: RET_ReallyLR @@ -166,12 +157,12 @@ body: | ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY]], 0 :: (load 8 from %ir.p) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY]], 0 :: (load 8) ; CHECK: $xzr = SUBSXri [[LDRXui]], 42, 0, implicit-def $nzcv ; CHECK: Bcc 0, %bb.2, implicit $nzcv ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8 into %ir.p) + ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8) ; CHECK: bb.2: ; CHECK: RET_ReallyLR bb.1: @@ -181,16 +172,47 @@ body: | %0:gpr(p0) = COPY $x0 %2:gpr(s64) = G_CONSTANT i64 42 %4:gpr(s64) = G_CONSTANT i64 0 - %1:gpr(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p) + %1:gpr(s64) = G_LOAD %0(p0) :: (load 8) %5:gpr(s32) = G_ICMP intpred(eq), %1(s64), %2 %3:gpr(s1) = G_TRUNC %5(s32) G_BRCOND %3(s1), %bb.3 bb.2: %6:gpr(s64) = G_CONSTANT i64 0 - G_STORE %6(s64), %0(p0) :: (store 8 into %ir.p) + G_STORE %6(s64), %0(p0) :: (store 8) bb.3: RET_ReallyLR ... +--- +name: update_pred_minus_one +legalized: true +regBankSelected: true + +body: | + ; The G_ICMP here will be optimized into a slt against 0. + ; The branch should inherit this change, so we should have Bcc 11 rather than + ; Bcc 13. + + ; CHECK-LABEL: name: update_pred_minus_one + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0 + ; CHECK: $wzr = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv + ; CHECK: Bcc 11, %bb.1, implicit $nzcv + ; CHECK: B %bb.0 + ; CHECK: bb.1: + bb.0: + liveins: $w0 + successors: %bb.0, %bb.1 + + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = G_CONSTANT i32 -1 + %2:gpr(s32) = G_ICMP intpred(sle), %0, %1 + %3:gpr(s1) = G_TRUNC %2(s32) + G_BRCOND %3(s1), %bb.1 + G_BR %bb.0 + + bb.1: +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir index fe1fd9385e615..329e555c81cb0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir @@ -99,10 +99,9 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: %copy:gpr64 = COPY $x0 - ; CHECK: %negative_one:gpr64 = MOVi64imm -1 - ; CHECK: %and:gpr64common = ANDXri %copy, 8000 - ; CHECK: $xzr = SUBSXrr %and, %negative_one, implicit-def $nzcv - ; CHECK: Bcc 12, %bb.1, implicit $nzcv + ; CHECK: %and:gpr64sp = ANDXri %copy, 8000 + ; CHECK: $xzr = SUBSXri %and, 0, 0, implicit-def $nzcv + ; CHECK: Bcc 10, %bb.1, implicit $nzcv ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index c8965bfc8438b..e2d2b841b3485 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -50,6 +50,7 @@ ; CHECK-NEXT: Remove unreachable blocks from the CFG ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Constant Hoisting @@ -85,6 +86,7 @@ ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Lazy Branch Probability Analysis ; CHECK-NEXT: Lazy Block Frequency Analysis diff --git a/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll b/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll index 42448fcce56cf..89fc6457482f0 100644 --- a/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll +++ b/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll @@ -29,6 +29,7 @@ ; RUN: llc -mtriple=arm64-linux-gnu -mattr=+reserve-x26 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVE,CHECK-RESERVE-X26 ; RUN: llc -mtriple=arm64-linux-gnu -mattr=+reserve-x27 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVE,CHECK-RESERVE-X27 ; RUN: llc -mtriple=arm64-linux-gnu -mattr=+reserve-x28 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVE,CHECK-RESERVE-X28 +; RUN: llc -mtriple=arm64-linux-gnu -mattr=+reserve-x30 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVE,CHECK-RESERVE-X30 ; Test multiple of reserve-x# options together. ; RUN: llc -mtriple=arm64-linux-gnu \ @@ -67,6 +68,7 @@ ; RUN: -mattr=+reserve-x26 \ ; RUN: -mattr=+reserve-x27 \ ; RUN: -mattr=+reserve-x28 \ +; RUN: -mattr=+reserve-x30 \ ; RUN: -o - %s | FileCheck %s \ ; RUN: --check-prefix=CHECK-RESERVE \ ; RUN: --check-prefix=CHECK-RESERVE-X1 \ @@ -92,7 +94,8 @@ ; RUN: --check-prefix=CHECK-RESERVE-X25 \ ; RUN: --check-prefix=CHECK-RESERVE-X26 \ ; RUN: --check-prefix=CHECK-RESERVE-X27 \ -; RUN: --check-prefix=CHECK-RESERVE-X28 +; RUN: --check-prefix=CHECK-RESERVE-X28 \ +; RUN: --check-prefix=CHECK-RESERVE-X30 ; x18 is reserved as a platform register on Darwin but not on other ; systems. Create loads of register pressure and make sure this is respected. @@ -134,6 +137,7 @@ define void @keep_live() { ; CHECK-RESERVE-X26-NOT: ldr x26 ; CHECK-RESERVE-X27-NOT: ldr x27 ; CHECK-RESERVE-X28-NOT: ldr x28 +; CHECK-RESERVE-X30-NOT: ldr x30 ; CHECK-RESERVE: Spill ; CHECK-RESERVE-NOT: ldr fp ; CHECK-RESERVE-X1-NOT: ldr x1, @@ -160,6 +164,7 @@ define void @keep_live() { ; CHECK-RESERVE-X26-NOT: ldr x26 ; CHECK-RESERVE-X27-NOT: ldr x27 ; CHECK-RESERVE-X28-NOT: ldr x28 +; CHECK-RESERVE-X30-NOT: ldr x30 ; CHECK-RESERVE: ret ret void } diff --git a/llvm/test/CodeGen/AArch64/cpus.ll b/llvm/test/CodeGen/AArch64/cpus.ll index 2d840037c4acc..ee19ac62f7e48 100644 --- a/llvm/test/CodeGen/AArch64/cpus.ll +++ b/llvm/test/CodeGen/AArch64/cpus.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=carmel 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a35 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a34 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll index 31def889adf63..193998bcf71cc 100644 --- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -43,3 +43,179 @@ define @udiv_i64( %a, %b %div = udiv %a, %b ret %div } + +; +; SMIN +; + +define @smin_i8( %a, %b, %c) { +; CHECK-LABEL: @smin_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %cmp = icmp slt %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @smin_i16( %a, %b, %c) { +; CHECK-LABEL: @smin_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %cmp = icmp slt %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @smin_i32( %a, %b, %c) { +; CHECK-LABEL: smin_i32: +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %cmp = icmp slt %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @smin_i64( %a, %b, %c) { +; CHECK-LABEL: smin_i64: +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %cmp = icmp slt %a, %b + %min = select %cmp, %a, %b + ret %min +} + +; +; UMIN +; + +define @umin_i8( %a, %b, %c) { +; CHECK-LABEL: @umin_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %cmp = icmp ult %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @umin_i16( %a, %b, %c) { +; CHECK-LABEL: @umin_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %cmp = icmp ult %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @umin_i32( %a, %b, %c) { +; CHECK-LABEL: umin_i32: +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %cmp = icmp ult %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @umin_i64( %a, %b, %c) { +; CHECK-LABEL: umin_i64: +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %cmp = icmp ult %a, %b + %min = select %cmp, %a, %b + ret %min +} + +; +; SMAX +; + +define @smax_i8( %a, %b, %c) { +; CHECK-LABEL: @smax_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %cmp = icmp sgt %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @smax_i16( %a, %b, %c) { +; CHECK-LABEL: @smax_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %cmp = icmp sgt %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @smax_i32( %a, %b, %c) { +; CHECK-LABEL: smax_i32: +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %cmp = icmp sgt %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @smax_i64( %a, %b, %c) { +; CHECK-LABEL: smax_i64: +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %cmp = icmp sgt %a, %b + %min = select %cmp, %a, %b + ret %min +} + +; +; UMAX +; + +define @umax_i8( %a, %b, %c) { +; CHECK-LABEL: @umax_i8 +; CHECK-DAG: ptrue p0.b +; CHECK-DAG: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %cmp = icmp ugt %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @umax_i16( %a, %b, %c) { +; CHECK-LABEL: @umax_i16 +; CHECK-DAG: ptrue p0.h +; CHECK-DAG: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %cmp = icmp ugt %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @umax_i32( %a, %b, %c) { +; CHECK-LABEL: umax_i32: +; CHECK-DAG: ptrue p0.s +; CHECK-DAG: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %cmp = icmp ugt %a, %b + %min = select %cmp, %a, %b + ret %min +} + +define @umax_i64( %a, %b, %c) { +; CHECK-LABEL: umax_i64: +; CHECK-DAG: ptrue p0.d +; CHECK-DAG: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %cmp = icmp ugt %a, %b + %min = select %cmp, %a, %b + ret %min +} diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-iterative-2.mir b/llvm/test/CodeGen/AArch64/machine-outliner-iterative-2.mir new file mode 100644 index 0000000000000..33b0dfe783a89 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-iterative-2.mir @@ -0,0 +1,118 @@ +# REQUIRES: asserts +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=machine-outliner \ +# RUN: -machine-outliner-reruns=0 -verify-machineinstrs %s -o - | FileCheck --check-prefix=ITER1 %s + +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=machine-outliner \ +# RUN: -machine-outliner-reruns=1 -verify-machineinstrs %s -o - | FileCheck --check-prefix=ITER2 %s + +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=machine-outliner \ +# RUN: -machine-outliner-reruns=2 -debug-only=machine-outliner \ +# RUN: -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck --check-prefix=ITER3 %s + +--- | + declare void @z1(i32, i32) + declare void @z2(i32, i32, i32) + define void @a(i32 %p1) { unreachable } + define void @b(i32 %p1) { unreachable } +... +--- +#ITER1-LABEL: name: a +#ITER1: $w19 = ORRWrs $wzr, killed $w0, 0 +#ITER1-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER1-NEXT: $w1 = ORRWri $wzr, 1 +#ITER1-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER1-NEXT: $w1 = ORRWri $wzr, 1984 +#ITER1-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER1-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} + +#ITER2-LABEL: name: a +#ITER2: $w19 = ORRWrs $wzr, killed $w0, 0 +#ITER2-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER2-NEXT: $w1 = ORRWri $wzr, 1 +#ITER2-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER2-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER2-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +name: a +tracksRegLiveness: true +liveins: + - { reg: '$w0', virtual-reg: '' } +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $w0, $lr, $x19 + + early-clobber $sp = frame-setup STPXpre killed $x19, killed $lr, $sp, -2 :: (store 8), (store 8) + $w19 = ORRWrs $wzr, killed $w0, 0 + $w0 = ORRWri $wzr, 0 + $w1 = ORRWri $wzr, 1 + BL @z1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit-def $sp + $w1 = ORRWri $wzr, 1 + $w2 = ORRWri $wzr, 1920 + $w0 = ORRWrs $wzr, $w19, 0 + BL @z2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit-def $sp + $w1 = ORRWri $wzr, 1984 + $w2 = ORRWri $wzr, 1920 + $w0 = ORRWrs $wzr, killed $w19, 0 + BL @z2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit-def $sp + $w0 = ORRWri $wzr, 0 + $w1 = ORRWri $wzr, 1 + BL @z1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit-def $sp + early-clobber $sp, $x19, $lr = frame-destroy LDPXpost $sp, 2 :: (load 8), (load 8) + RET undef $lr + +... +--- +#ITER1-LABEL: name: b +#ITER1: $w19 = ORRWrs $wzr, killed $w0, 0 +#ITER1-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER1-NEXT: $w1 = ORRWri $wzr, 1984 +#ITER1-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER1-NEXT: $w1 = ORRWri $wzr, 1984 +#ITER1-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER1-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} + +#ITER2-LABEL: name: b +#ITER2: $w19 = ORRWrs $wzr, killed $w0, 0 +#ITER2-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER2-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER2-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +#ITER2-NEXT: BL @OUTLINED_FUNCTION_{{[0-9]+}} +name: b +tracksRegLiveness: true +liveins: + - { reg: '$w0', virtual-reg: '' } +machineFunctionInfo: + hasRedZone: false +body: | + bb.0: + liveins: $w0, $lr, $x19 + + early-clobber $sp = frame-setup STPXpre killed $x19, killed $lr, $sp, -2 :: (store 8), (store 8) + $w19 = ORRWrs $wzr, killed $w0, 0 + $w0 = ORRWri $wzr, 0 + $w1 = ORRWri $wzr, 1 + BL @z1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit-def $sp + $w1 = ORRWri $wzr, 1984 + $w2 = ORRWri $wzr, 1920 + $w0 = ORRWrs $wzr, $w19, 0 + BL @z2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit-def $sp + $w1 = ORRWri $wzr, 1984 + $w2 = ORRWri $wzr, 1920 + $w0 = ORRWrs $wzr, killed $w19, 0 + BL @z2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit-def $sp + $w0 = ORRWri $wzr, 0 + $w1 = ORRWri $wzr, 1 + BL @z1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit-def $sp + early-clobber $sp, $x19, $lr = frame-destroy LDPXpost $sp, 2 :: (load 8), (load 8) + RET undef $lr +... + +#ITER1-LABEL: name: OUTLINED_FUNCTION_{{[0-9]+}} +#ITER1-LABEL: name: OUTLINED_FUNCTION_{{[0-9]+}} + +#ITER2-LABEL: name: OUTLINED_FUNCTION_{{[0-9]+}} +#ITER2-LABEL: name: OUTLINED_FUNCTION_{{[0-9]+}} +#ITER2-LABEL: name: OUTLINED_FUNCTION_{{[0-9]+}} + +# ITER3: Did not outline on iteration 3 out of 3 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir b/llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir index 08aa043f5c683..c692929d7b882 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir +++ b/llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir @@ -1,6 +1,6 @@ -# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outline-runs=2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix TWO-RUNS -# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outline-runs=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix ONE-RUN -# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outline-runs=4 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix FOUR-RUNS +# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outliner-reruns=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix TWO-RUNS +# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outliner-reruns=0 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix ONE-RUN +# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outliner-reruns=3 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix FOUR-RUNS # Example of Repeated Instruction Sequence - Iterative Machine Outlining # diff --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll new file mode 100644 index 0000000000000..b98916585b245 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll @@ -0,0 +1,339 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: not --crash llc -mtriple=aarch64_be -mattr=+sve < %s + +define @bitcast_i16_to_i8( %v) { +; CHECK-LABEL: bitcast_i16_to_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i32_to_i8( %v) { +; CHECK-LABEL: bitcast_i32_to_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i64_to_i8( %v) { +; CHECK-LABEL: bitcast_i64_to_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_half_to_i8( %v) { +; CHECK-LABEL: bitcast_half_to_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_float_to_i8( %v) { +; CHECK-LABEL: bitcast_float_to_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_double_to_i8( %v) { +; CHECK-LABEL: bitcast_double_to_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i8_to_i16( %v) { +; CHECK-LABEL: bitcast_i8_to_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i32_to_i16( %v) { +; CHECK-LABEL: bitcast_i32_to_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i64_to_i16( %v) { +; CHECK-LABEL: bitcast_i64_to_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_half_to_i16( %v) { +; CHECK-LABEL: bitcast_half_to_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_float_to_i16( %v) { +; CHECK-LABEL: bitcast_float_to_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_double_to_i16( %v) { +; CHECK-LABEL: bitcast_double_to_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i8_to_i32( %v) { +; CHECK-LABEL: bitcast_i8_to_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i16_to_i32( %v) { +; CHECK-LABEL: bitcast_i16_to_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i64_to_i32( %v) { +; CHECK-LABEL: bitcast_i64_to_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_half_to_i32( %v) { +; CHECK-LABEL: bitcast_half_to_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_float_to_i32( %v) { +; CHECK-LABEL: bitcast_float_to_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_double_to_i32( %v) { +; CHECK-LABEL: bitcast_double_to_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i8_to_i64( %v) { +; CHECK-LABEL: bitcast_i8_to_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i16_to_i64( %v) { +; CHECK-LABEL: bitcast_i16_to_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i32_to_i64( %v) { +; CHECK-LABEL: bitcast_i32_to_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_half_to_i64( %v) { +; CHECK-LABEL: bitcast_half_to_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_float_to_i64( %v) { +; CHECK-LABEL: bitcast_float_to_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_double_to_i64( %v) { +; CHECK-LABEL: bitcast_double_to_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i8_to_half( %v) { +; CHECK-LABEL: bitcast_i8_to_half: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i16_to_half( %v) { +; CHECK-LABEL: bitcast_i16_to_half: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i32_to_half( %v) { +; CHECK-LABEL: bitcast_i32_to_half: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i64_to_half( %v) { +; CHECK-LABEL: bitcast_i64_to_half: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_float_to_half( %v) { +; CHECK-LABEL: bitcast_float_to_half: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_double_to_half( %v) { +; CHECK-LABEL: bitcast_double_to_half: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i8_to_float( %v) { +; CHECK-LABEL: bitcast_i8_to_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i16_to_float( %v) { +; CHECK-LABEL: bitcast_i16_to_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i32_to_float( %v) { +; CHECK-LABEL: bitcast_i32_to_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i64_to_float( %v) { +; CHECK-LABEL: bitcast_i64_to_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_half_to_float( %v) { +; CHECK-LABEL: bitcast_half_to_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_double_to_float( %v) { +; CHECK-LABEL: bitcast_double_to_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i8_to_double( %v) { +; CHECK-LABEL: bitcast_i8_to_double: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i16_to_double( %v) { +; CHECK-LABEL: bitcast_i16_to_double: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i32_to_double( %v) { +; CHECK-LABEL: bitcast_i32_to_double: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i64_to_double( %v) { +; CHECK-LABEL: bitcast_i64_to_double: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_half_to_double( %v) { +; CHECK-LABEL: bitcast_half_to_double: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_float_to_double( %v) { +; CHECK-LABEL: bitcast_float_to_double: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll new file mode 100644 index 0000000000000..90acf8cf3d0a7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -0,0 +1,135 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define @test_lane0_16xi8( %a) { +; CHECK-LABEL: test_lane0_16xi8 +; CHECK: mov [[REG:.*]], #30 +; CHECK: mov z0.b, p{{[0-7]}}/m, [[REG]] + %b = insertelement %a, i8 30, i32 0 + ret %b +} + +define @test_lane0_8xi16( %a) { +; CHECK-LABEL: test_lane0_8xi16 +; CHECK: mov [[REG:.*]], #30 +; CHECK: mov z0.h, p{{[0-7]}}/m, [[REG]] + %b = insertelement %a, i16 30, i32 0 + ret %b +} + +define @test_lane0_4xi32( %a) { +; CHECK-LABEL: test_lane0_4xi32 +; CHECK: mov [[REG:.*]], #30 +; CHECK: mov z0.s, p{{[0-7]}}/m, [[REG]] + %b = insertelement %a, i32 30, i32 0 + ret %b +} + +define @test_lane0_2xi64( %a) { +; CHECK-LABEL: test_lane0_2xi64 +; CHECK: mov w[[REG:.*]], #30 +; CHECK: mov z0.d, p{{[0-7]}}/m, x[[REG]] + %b = insertelement %a, i64 30, i32 0 + ret %b +} + +define @test_lane0_2xf64( %a) { +; CHECK-LABEL: test_lane0_2xf64 +; CHECK: fmov d[[REG:[0-9]+]], #1.00000000 +; CHECK: mov z0.d, p{{[0-7]}}/m, z[[REG]].d + %b = insertelement %a, double 1.0, i32 0 + ret %b +} + +define @test_lane0_4xf32( %a) { +; CHECK-LABEL: test_lane0_4xf32 +; CHECK: fmov s[[REG:[0-9]+]], #1.00000000 +; CHECK: mov z0.s, p{{[0-7]}}/m, z[[REG]].s + %b = insertelement %a, float 1.0, i32 0 + ret %b +} + +define @test_lane0_8xf16( %a) { +; CHECK-LABEL: test_lane0_8xf16 +; CHECK: fmov h[[REG:[0-9]+]], #1.00000000 +; CHECK: mov z0.h, p{{[0-7]}}/m, z[[REG]].h + %b = insertelement %a, half 1.0, i32 0 + ret %b +} + +; Undefined lane insert +define @test_lane4_2xi64( %a) { +; CHECK-LABEL: test_lane4_2xi64 +; CHECK: mov w[[IDXREG:.*]], #4 +; CHECK: index z[[CMPVEC:[0-9]+]].d, #0, #1 +; CHECK: mov z[[IDXVEC:[0-9]+]].d, x[[IDXREG]] +; CHECK: cmpeq p[[PRED:[0-9]+]].d, p{{[0-7]}}/z, z[[CMPVEC]].d, z[[IDXVEC]].d +; CHECK: mov w[[VALREG:.*]], #30 +; CHECK: mov z0.d, p[[PRED]]/m, x[[VALREG]] + %b = insertelement %a, i64 30, i32 4 + ret %b +} + +; Undefined lane insert +define @test_lane9_8xf16( %a) { +; CHECK-LABEL: test_lane9_8xf16 +; CHECK: mov w[[IDXREG:.*]], #9 +; CHECK: index z[[CMPVEC:[0-9]+]].h, #0, #1 +; CHECK: mov z[[IDXVEC:[0-9]+]].h, w[[IDXREG]] +; CHECK: cmpeq p[[PRED:[0-9]+]].h, p{{[0-7]}}/z, z[[CMPVEC]].h, z[[IDXVEC]].h +; CHECK: fmov h[[VALREG:[0-9]+]], #1.00000000 +; CHECK: mov z0.h, p[[PRED]]/m, h[[VALREG]] + %b = insertelement %a, half 1.0, i32 9 + ret %b +} + +define @test_lane1_16xi8( %a) { +; CHECK-LABEL: test_lane1_16xi8 +; CHECK: mov w[[IDXREG:.*]], #1 +; CHECK: index z[[CMPVEC:[0-9]+]].b, #0, #1 +; CHECK: mov z[[IDXVEC:[0-9]+]].b, w[[IDXREG]] +; CHECK: cmpeq p[[PRED:[0-9]+]].b, p{{[0-7]}}/z, z[[CMPVEC]].b, z[[IDXVEC]].b +; CHECK: mov w[[VALREG:.*]], #30 +; CHECK: mov z0.b, p[[PRED]]/m, w[[VALREG]] + %b = insertelement %a, i8 30, i32 1 + ret %b +} + +define @test_lanex_16xi8( %a, i32 %x) { +; CHECK-LABEL: test_lanex_16xi8 +; CHECK: index z[[CMPVEC:[0-9]+]].b, #0, #1 +; CHECK: mov z[[IDXVEC:[0-9]+]].b, w[[IDXREG]] +; CHECK: cmpeq p[[PRED:[0-9]+]].b, p{{[0-7]}}/z, z[[CMPVEC]].b, z[[IDXVEC]].b +; CHECK: mov w[[VALREG:.*]], #30 +; CHECK: mov z0.b, p[[PRED]]/m, w[[VALREG]] + %b = insertelement %a, i8 30, i32 %x + ret %b +} + + +; Redundant lane insert +define @extract_insert_4xi32( %a) { +; CHECK-LABEL: extract_insert_4xi32 +; CHECK-NOT: mov w{{.*}}, #30 +; CHECK-NOT: mov z0.d + %b = extractelement %a, i32 2 + %c = insertelement %a, i32 %b, i32 2 + ret %c +} + +define @test_lane6_undef_8xi16(i16 %a) { +; CHECK-LABEL: test_lane6_undef_8xi16 +; CHECK: mov w[[IDXREG:.*]], #6 +; CHECK: index z[[CMPVEC:.*]].h, #0, #1 +; CHECK: mov z[[IDXVEC:[0-9]+]].h, w[[IDXREG]] +; CHECK: cmpeq p[[PRED:.*]].h, p{{.*}}/z, z[[CMPVEC]].h, z[[IDXVEC]].h +; CHECK: mov z0.h, p[[PRED]]/m, w0 + %b = insertelement undef, i16 %a, i32 6 + ret %b +} + +define @test_lane0_undef_16xi8(i8 %a) { +; CHECK-LABEL: test_lane0_undef_16xi8 +; CHECK: fmov s0, w0 + %b = insertelement undef, i8 %a, i32 0 + ret %b +} diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll index 083a7d35439cd..c933c2eab40df 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s ; ; FADDA diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll index 9c417ce1dd428..2d50e6fefdd9c 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -1,5 +1,221 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; SMAX + +define @smax_i8( %a) { +; CHECK-LABEL: smax_i8: +; CHECK: smax z0.b, z0.b, #-128 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %elt = insertelement undef, i8 -128, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smax.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @smax_i16( %a) { +; CHECK-LABEL: smax_i16: +; CHECK: smax z0.h, z0.h, #127 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 127, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smax.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @smax_i32( %a) { +; CHECK-LABEL: smax_i32: +; CHECK: smax z0.s, z0.s, #-128 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 -128, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smax.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @smax_i64( %a) { +; CHECK-LABEL: smax_i64: +; CHECK: smax z0.d, z0.d, #127 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 127, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smax.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +; SMIN + +define @smin_i8( %a) { +; CHECK-LABEL: smin_i8: +; CHECK: smin z0.b, z0.b, #127 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %elt = insertelement undef, i8 127, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smin.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @smin_i16( %a) { +; CHECK-LABEL: smin_i16: +; CHECK: smin z0.h, z0.h, #-128 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 -128, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smin.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @smin_i32( %a) { +; CHECK-LABEL: smin_i32: +; CHECK: smin z0.s, z0.s, #127 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 127, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smin.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @smin_i64( %a) { +; CHECK-LABEL: smin_i64: +; CHECK: smin z0.d, z0.d, #-128 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 -128, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.smin.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +; UMAX + +define @umax_i8( %a) { +; CHECK-LABEL: umax_i8: +; CHECK: umax z0.b, z0.b, #0 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %elt = insertelement undef, i8 0, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umax.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @umax_i16( %a) { +; CHECK-LABEL: umax_i16: +; CHECK: umax z0.h, z0.h, #255 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 255, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umax.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @umax_i32( %a) { +; CHECK-LABEL: umax_i32: +; CHECK: umax z0.s, z0.s, #0 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 0, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umax.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @umax_i64( %a) { +; CHECK-LABEL: umax_i64: +; CHECK: umax z0.d, z0.d, #255 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 255, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umax.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +; UMIN + +define @umin_i8( %a) { +; CHECK-LABEL: umin_i8: +; CHECK: umin z0.b, z0.b, #255 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %elt = insertelement undef, i8 255, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umin.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @umin_i16( %a) { +; CHECK-LABEL: umin_i16: +; CHECK: umin z0.h, z0.h, #0 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 0, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umin.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @umin_i32( %a) { +; CHECK-LABEL: umin_i32: +; CHECK: umin z0.s, z0.s, #255 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 255, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umin.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @umin_i64( %a) { +; CHECK-LABEL: umin_i64: +; CHECK: umin z0.d, z0.d, #0 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 0, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.umin.nxv2i64( %pg, + %a, + %splat) + ret %out +} + ; SQADD define @sqadd_b_lowimm( %a) { @@ -336,3 +552,28 @@ declare @llvm.aarch64.sve.uqsub.x.nxv16i8(, declare @llvm.aarch64.sve.uqsub.x.nxv8i16(, ) declare @llvm.aarch64.sve.uqsub.x.nxv4i32(, ) declare @llvm.aarch64.sve.uqsub.x.nxv2i64(, ) + +declare @llvm.aarch64.sve.smax.nxv16i8(, , ) +declare @llvm.aarch64.sve.smax.nxv8i16(, , ) +declare @llvm.aarch64.sve.smax.nxv4i32(, , ) +declare @llvm.aarch64.sve.smax.nxv2i64(, , ) + +declare @llvm.aarch64.sve.smin.nxv16i8(, , ) +declare @llvm.aarch64.sve.smin.nxv8i16(, , ) +declare @llvm.aarch64.sve.smin.nxv4i32(, , ) +declare @llvm.aarch64.sve.smin.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umax.nxv16i8(, , ) +declare @llvm.aarch64.sve.umax.nxv8i16(, , ) +declare @llvm.aarch64.sve.umax.nxv4i32(, , ) +declare @llvm.aarch64.sve.umax.nxv2i64(, , ) + +declare @llvm.aarch64.sve.umin.nxv16i8(, , ) +declare @llvm.aarch64.sve.umin.nxv8i16(, , ) +declare @llvm.aarch64.sve.umin.nxv4i32(, , ) +declare @llvm.aarch64.sve.umin.nxv2i64(, , ) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 %pattern) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32 %pattern) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 %pattern) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32 %pattern) diff --git a/llvm/test/CodeGen/AArch64/sve-zeroinit.ll b/llvm/test/CodeGen/AArch64/sve-zeroinit.ll new file mode 100644 index 0000000000000..4ae496b409fff --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-zeroinit.ll @@ -0,0 +1,81 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-none-linux-gnu" + +define @test_zeroinit_2xi64() { +; CHECK-LABEL: test_zeroinit_2xi64 +; CHECK: mov z0.d, #0 +; CHECK-NEXT: ret + ret zeroinitializer +} + +define @test_zeroinit_4xi32() { +; CHECK-LABEL: test_zeroinit_4xi32 +; CHECK: mov z0.s, #0 +; CHECK-NEXT: ret + ret zeroinitializer +} + +define @test_zeroinit_8xi16() { +; CHECK-LABEL: test_zeroinit_8xi16 +; CHECK: mov z0.h, #0 +; CHECK-NEXT: ret + ret zeroinitializer +} + +define @test_zeroinit_16xi8() { +; CHECK-LABEL: test_zeroinit_16xi8 +; CHECK: mov z0.b, #0 +; CHECK-NEXT: ret + ret zeroinitializer +} + +define @test_zeroinit_2xf64() { +; CHECK-LABEL: test_zeroinit_2xf64 +; CHECK: mov z0.d, #0 +; CHECK-NEXT: ret + ret zeroinitializer +} + +define @test_zeroinit_4xf32() { +; CHECK-LABEL: test_zeroinit_4xf32 +; CHECK: mov z0.s, #0 +; CHECK-NEXT: ret + ret zeroinitializer +} + +define @test_zeroinit_8xf16() { +; CHECK-LABEL: test_zeroinit_8xf16 +; CHECK: mov z0.h, #0 +; CHECK-NEXT: ret + ret zeroinitializer +} + +define @test_zeroinit_2xi1() { +; CHECK-LABEL: test_zeroinit_2xi1 +; CHECK: whilelo p0.d, xzr, xzr +; CHECK-NEXT: ret + ret zeroinitializer +} + +define @test_zeroinit_4xi1() { +; CHECK-LABEL: test_zeroinit_4xi1 +; CHECK: whilelo p0.s, xzr, xzr +; CHECK-NEXT: ret + ret zeroinitializer +} + +define @test_zeroinit_8xi1() { +; CHECK-LABEL: test_zeroinit_8xi1 +; CHECK: whilelo p0.h, xzr, xzr +; CHECK-NEXT: ret + ret zeroinitializer +} + +define @test_zeroinit_16xi1() { +; CHECK-LABEL: test_zeroinit_16xi1 +; CHECK: whilelo p0.b, xzr, xzr +; CHECK-NEXT: ret + ret zeroinitializer +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 3f573e7f9a861..2e280d277a59e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -727,9 +727,6 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: s_mov_b32 s18, 0 ; GPRIDX-NEXT: s_mov_b32 s19, 0x40200000 -; GPRIDX-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GPRIDX-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GPRIDX-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill ; GPRIDX-NEXT: s_mov_b32 s17, 0x401c0000 ; GPRIDX-NEXT: s_mov_b32 s16, s18 ; GPRIDX-NEXT: s_mov_b32 s15, 0x40180000 @@ -793,9 +790,6 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off ; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off -; GPRIDX-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload -; GPRIDX-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GPRIDX-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; @@ -816,9 +810,6 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; MOVREL-NEXT: s_mov_b32 s8, s18 ; MOVREL-NEXT: s_mov_b64 s[6:7], 2.0 ; MOVREL-NEXT: s_mov_b64 s[4:5], 1.0 -; MOVREL-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; MOVREL-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; MOVREL-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill ; MOVREL-NEXT: v_mov_b32_e32 v34, s19 ; MOVREL-NEXT: v_mov_b32_e32 v33, s18 ; MOVREL-NEXT: v_mov_b32_e32 v32, s17 @@ -868,10 +859,6 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[7:10], off ; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[11:14], off ; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[15:18], off -; MOVREL-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload -; MOVREL-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; MOVREL-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; MOVREL-NEXT: s_waitcnt vmcnt(0) ; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0 ; MOVREL-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll index bfb2c84f6c027..18aa54dcf827b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll @@ -29,8 +29,6 @@ define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x1 unorm d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: ; return to shader part epilog @@ -92,8 +90,6 @@ define amdgpu_ps half @load_1d_f16_y(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x2 unorm d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: ; return to shader part epilog @@ -155,8 +151,6 @@ define amdgpu_ps half @load_1d_f16_z(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x4 unorm d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: ; return to shader part epilog @@ -218,8 +212,6 @@ define amdgpu_ps half @load_1d_f16_w(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: ; return to shader part epilog @@ -284,8 +276,6 @@ define amdgpu_ps <2 x half> @load_1d_v2f16_xy(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x3 unorm d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: ; return to shader part epilog @@ -350,8 +340,6 @@ define amdgpu_ps <2 x half> @load_1d_v2f16_xz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x5 unorm d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: ; return to shader part epilog @@ -416,8 +404,6 @@ define amdgpu_ps <2 x half> @load_1d_v2f16_xw(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x9 unorm d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: ; return to shader part epilog @@ -482,8 +468,6 @@ define amdgpu_ps <2 x half> @load_1d_v2f16_yz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v0, v0, s[0:7] dmask:0x6 unorm d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: ; return to shader part epilog @@ -558,8 +542,6 @@ define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: ; return to shader part epilog @@ -622,8 +604,6 @@ define amdgpu_ps float @load_1d_f16_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v1 @@ -691,8 +671,6 @@ define amdgpu_ps float @load_1d_v2f16_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm tfe d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v1 @@ -768,8 +746,6 @@ define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 ; GFX8-PACKED-NEXT: s_mov_b32 s5, s7 ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 -; GFX8-PACKED-NEXT: s_nop 0 -; GFX8-PACKED-NEXT: s_nop 0 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x10 unorm tfe d16 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) ; GFX8-PACKED-NEXT: v_mov_b32_e32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll index a192b372995b7..d04469bf3e2e7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll @@ -26,8 +26,6 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha ; PACKED-NEXT: s_mov_b32 s5, s7 ; PACKED-NEXT: s_mov_b32 s6, s8 ; PACKED-NEXT: s_mov_b32 s7, s9 -; PACKED-NEXT: s_nop 0 -; PACKED-NEXT: s_nop 0 ; PACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm ; PACKED-NEXT: s_endpgm call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -59,8 +57,6 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; PACKED-NEXT: s_mov_b32 s5, s7 ; PACKED-NEXT: s_mov_b32 s6, s8 ; PACKED-NEXT: s_mov_b32 s7, s9 -; PACKED-NEXT: s_nop 0 -; PACKED-NEXT: s_nop 0 ; PACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm ; PACKED-NEXT: s_endpgm call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -102,8 +98,6 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; PACKED-NEXT: s_mov_b32 s5, s7 ; PACKED-NEXT: s_mov_b32 s6, s8 ; PACKED-NEXT: s_mov_b32 s7, s9 -; PACKED-NEXT: s_nop 0 -; PACKED-NEXT: s_nop 0 ; PACKED-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm ; PACKED-NEXT: s_endpgm call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index cdfbf5043672e..de7521c3d30d4 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -5353,7 +5353,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_mul_hi_u32 v6, v0, v3 ; GCN-NEXT: v_mul_hi_u32 v9, v1, v2 ; GCN-NEXT: v_mul_lo_u32 v2, v1, v2 -; GCN-NEXT: s_mov_b32 s4, 0x976a7376 +; GCN-NEXT: s_movk_i32 s4, 0x11e ; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; GCN-NEXT: v_mul_lo_u32 v6, v1, v3 ; GCN-NEXT: v_mul_hi_u32 v3, v1, v3 @@ -5369,7 +5369,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_mul_hi_u32 v5, v0, s3 ; GCN-NEXT: v_addc_u32_e64 v2, vcc, v1, v3, s[0:1] ; GCN-NEXT: v_mul_lo_u32 v6, v2, s3 -; GCN-NEXT: s_movk_i32 s2, 0x11f +; GCN-NEXT: s_mov_b32 s2, 0x976a7377 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_mul_lo_u32 v5, v0, s3 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v6 @@ -5377,14 +5377,14 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_mul_hi_u32 v10, v0, v4 ; GCN-NEXT: v_mul_hi_u32 v9, v0, v5 ; GCN-NEXT: v_mul_hi_u32 v11, v2, v4 -; GCN-NEXT: s_mov_b32 s3, 0x976a7377 +; GCN-NEXT: s_movk_i32 s3, 0x11f ; GCN-NEXT: s_mov_b32 s9, s5 ; GCN-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; GCN-NEXT: v_addc_u32_e32 v9, vcc, v8, v10, vcc ; GCN-NEXT: v_mul_lo_u32 v10, v2, v5 ; GCN-NEXT: v_mul_hi_u32 v5, v2, v5 ; GCN-NEXT: v_mul_lo_u32 v2, v2, v4 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v9, v5, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v11, v7, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v5, v2 @@ -5407,24 +5407,24 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v5, v7, vcc ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v8, v2, vcc -; GCN-NEXT: v_mul_lo_u32 v2, v0, s2 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s3 -; GCN-NEXT: v_mul_lo_u32 v4, v1, s3 -; GCN-NEXT: v_mov_b32_e32 v5, s2 +; GCN-NEXT: v_mul_lo_u32 v2, v0, s3 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s2 +; GCN-NEXT: v_mul_lo_u32 v4, v1, s2 +; GCN-NEXT: v_mov_b32_e32 v5, s3 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; GCN-NEXT: v_mul_lo_u32 v3, v0, s3 +; GCN-NEXT: v_mul_lo_u32 v3, v0, s2 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_sub_i32_e32 v4, vcc, s7, v2 ; GCN-NEXT: v_sub_i32_e32 v3, vcc, s6, v3 ; GCN-NEXT: v_subb_u32_e64 v4, s[0:1], v4, v5, vcc -; GCN-NEXT: v_subrev_i32_e64 v5, s[0:1], s3, v3 +; GCN-NEXT: v_subrev_i32_e64 v5, s[0:1], s2, v3 ; GCN-NEXT: v_subbrev_u32_e64 v4, s[0:1], 0, v4, s[0:1] -; GCN-NEXT: s_movk_i32 s3, 0x11e -; GCN-NEXT: v_cmp_lt_u32_e64 s[0:1], s3, v4 +; GCN-NEXT: v_cmp_lt_u32_e64 s[0:1], s4, v4 +; GCN-NEXT: s_mov_b32 s2, 0x976a7376 ; GCN-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] -; GCN-NEXT: v_cmp_lt_u32_e64 s[0:1], s4, v5 +; GCN-NEXT: v_cmp_lt_u32_e64 s[0:1], s2, v5 ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] -; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v4 +; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v4 ; GCN-NEXT: v_cndmask_b32_e64 v4, v6, v5, s[0:1] ; GCN-NEXT: v_add_i32_e64 v5, s[0:1], 2, v0 ; GCN-NEXT: v_addc_u32_e64 v6, s[0:1], 0, v1, s[0:1] @@ -5434,11 +5434,11 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_cndmask_b32_e64 v4, v8, v6, s[0:1] ; GCN-NEXT: v_mov_b32_e32 v6, s7 ; GCN-NEXT: v_subb_u32_e32 v2, vcc, v6, v2, vcc -; GCN-NEXT: v_cmp_lt_u32_e32 vcc, s3, v2 +; GCN-NEXT: v_cmp_lt_u32_e32 vcc, s4, v2 ; GCN-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; GCN-NEXT: v_cmp_lt_u32_e32 vcc, s4, v3 +; GCN-NEXT: v_cmp_lt_u32_e32 vcc, s2, v3 ; GCN-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s2, v2 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s3, v2 ; GCN-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GCN-NEXT: v_cndmask_b32_e64 v2, v7, v5, s[0:1] @@ -5599,7 +5599,7 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GCN-NEXT: v_mul_lo_u32 v10, v3, v8 ; GCN-NEXT: v_mul_hi_u32 v8, v3, v8 ; GCN-NEXT: v_mul_lo_u32 v3, v3, v5 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v9, v8, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v11, v2, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, v6, v3 @@ -5725,9 +5725,8 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_mul_lo_u32 v2, v0, s2 ; GCN-NEXT: v_mul_hi_u32 v3, v0, s3 ; GCN-NEXT: v_mul_lo_u32 v4, v1, s3 -; GCN-NEXT: s_mov_b32 s12, 0x9761f7c9 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s8, s4 +; GCN-NEXT: s_movk_i32 s12, 0x11f +; GCN-NEXT: s_mov_b32 s13, 0x9761f7c9 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GCN-NEXT: v_mul_lo_u32 v3, v0, s3 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v4, v2 @@ -5736,12 +5735,13 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_mul_hi_u32 v6, v0, v3 ; GCN-NEXT: v_mul_hi_u32 v9, v1, v2 ; GCN-NEXT: v_mul_lo_u32 v2, v1, v2 -; GCN-NEXT: s_movk_i32 s4, 0x11f +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s9, s5 ; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; GCN-NEXT: v_mul_lo_u32 v6, v1, v3 ; GCN-NEXT: v_mul_hi_u32 v3, v1, v3 ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc -; GCN-NEXT: s_mov_b32 s9, s5 +; GCN-NEXT: s_movk_i32 s5, 0x11e ; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v9, v7, vcc @@ -5752,7 +5752,7 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_mul_hi_u32 v5, v0, s3 ; GCN-NEXT: v_addc_u32_e64 v2, vcc, v1, v3, s[0:1] ; GCN-NEXT: v_mul_lo_u32 v6, v2, s3 -; GCN-NEXT: s_movk_i32 s5, 0x11e +; GCN-NEXT: s_mov_b32 s8, s4 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_mul_lo_u32 v5, v0, s3 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v6 @@ -5760,14 +5760,15 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_mul_hi_u32 v10, v0, v4 ; GCN-NEXT: v_mul_hi_u32 v9, v0, v5 ; GCN-NEXT: v_mul_hi_u32 v11, v2, v4 +; GCN-NEXT: s_mov_b32 s4, 0x9761f7c8 ; GCN-NEXT: s_mov_b32 s11, 0xf000 -; GCN-NEXT: s_mov_b32 s10, -1 ; GCN-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; GCN-NEXT: v_addc_u32_e32 v9, vcc, v8, v10, vcc ; GCN-NEXT: v_mul_lo_u32 v10, v2, v5 ; GCN-NEXT: v_mul_hi_u32 v5, v2, v5 ; GCN-NEXT: v_mul_lo_u32 v2, v2, v4 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; GCN-NEXT: s_mov_b32 s10, -1 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v9, v5, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v11, v7, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v5, v2 @@ -5790,26 +5791,25 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v5, v7, vcc ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v8, v2, vcc -; GCN-NEXT: v_mul_lo_u32 v2, v0, s4 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s12 -; GCN-NEXT: v_mul_lo_u32 v1, v1, s12 -; GCN-NEXT: v_mul_lo_u32 v0, v0, s12 +; GCN-NEXT: v_mul_lo_u32 v2, v0, s12 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s13 +; GCN-NEXT: v_mul_lo_u32 v1, v1, s13 +; GCN-NEXT: v_mul_lo_u32 v0, v0, s13 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GCN-NEXT: v_add_i32_e32 v1, vcc, v2, v1 ; GCN-NEXT: v_sub_i32_e32 v2, vcc, s7, v1 +; GCN-NEXT: v_mov_b32_e32 v3, s12 ; GCN-NEXT: v_sub_i32_e32 v0, vcc, s6, v0 -; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_subb_u32_e64 v2, s[0:1], v2, v3, vcc -; GCN-NEXT: v_subrev_i32_e64 v4, s[0:1], s12, v0 +; GCN-NEXT: v_subrev_i32_e64 v4, s[0:1], s13, v0 ; GCN-NEXT: v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1] ; GCN-NEXT: v_cmp_lt_u32_e64 s[2:3], s5, v5 -; GCN-NEXT: s_mov_b32 s6, 0x9761f7c8 ; GCN-NEXT: v_subb_u32_e64 v2, s[0:1], v2, v3, s[0:1] ; GCN-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[2:3] -; GCN-NEXT: v_cmp_lt_u32_e64 s[2:3], s6, v4 -; GCN-NEXT: v_subrev_i32_e64 v3, s[0:1], s12, v4 +; GCN-NEXT: v_cmp_lt_u32_e64 s[2:3], s4, v4 +; GCN-NEXT: v_subrev_i32_e64 v3, s[0:1], s13, v4 ; GCN-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[2:3] -; GCN-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, v5 +; GCN-NEXT: v_cmp_eq_u32_e64 s[2:3], s12, v5 ; GCN-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[2:3] ; GCN-NEXT: v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1] ; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 @@ -5818,9 +5818,9 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_subb_u32_e32 v1, vcc, v5, v1, vcc ; GCN-NEXT: v_cmp_lt_u32_e32 vcc, s5, v1 ; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; GCN-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0 +; GCN-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0 ; GCN-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s12, v1 ; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc @@ -6026,7 +6026,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, v8, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v2, v2, v4 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v10, v5 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v6, v7, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v5, v2 @@ -6199,7 +6199,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v2, v2, v5 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v8, v4, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v7, v2 @@ -6387,7 +6387,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v2, v2, v5 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v8, v4, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v7, v2 @@ -6542,7 +6542,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v2, v2, v5 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v8, v4, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v7, v2 @@ -6665,7 +6665,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GCN-NEXT: v_add_i32_e32 v12, vcc, v13, v12 ; GCN-NEXT: v_addc_u32_e32 v13, vcc, 0, v14, vcc ; GCN-NEXT: v_mul_lo_u32 v3, v3, v8 -; GCN-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GCN-NEXT: v_add_i32_e32 v9, vcc, v12, v9 ; GCN-NEXT: v_addc_u32_e32 v9, vcc, v13, v11, vcc ; GCN-NEXT: v_addc_u32_e32 v8, vcc, v10, v4, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, v9, v3 @@ -6807,7 +6807,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, v8, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v2, v2, v4 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v10, v5 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v6, v7, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v5, v2 @@ -6981,7 +6981,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v2, v2, v5 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v8, v4, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v7, v2 @@ -7190,7 +7190,7 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v2, v2, v5 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v8, v4, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v7, v2 @@ -7307,7 +7307,7 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GCN-NEXT: v_add_i32_e32 v12, vcc, v13, v12 ; GCN-NEXT: v_addc_u32_e32 v13, vcc, 0, v14, vcc ; GCN-NEXT: v_mul_lo_u32 v3, v3, v8 -; GCN-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GCN-NEXT: v_add_i32_e32 v9, vcc, v12, v9 ; GCN-NEXT: v_addc_u32_e32 v9, vcc, v13, v11, vcc ; GCN-NEXT: v_addc_u32_e32 v8, vcc, v10, v4, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, v9, v3 diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll new file mode 100644 index 0000000000000..412815fb9c825 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri -filetype=obj -mattr=-code-object-v3 | llvm-readobj -symbols -s -sd | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri -mattr=-code-object-v3 | llvm-mc -filetype=obj -triple amdgcn--amdpal -mcpu=kaveri -mattr=-code-object-v3 | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64,-code-object-v3 | FileCheck --check-prefix=GFX10-W32 %s +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 | FileCheck --check-prefix=GFX10-W64 %s + +; ELF: Section { +; ELF: Name: .text +; ELF: Type: SHT_PROGBITS (0x1) +; ELF: Flags [ (0x6) +; ELF: SHF_ALLOC (0x2) +; ELF: SHF_EXECINSTR (0x4) +; ELF: } + +; ELF: SHT_NOTE +; ELF: Flags [ (0x0) +; ELF: ] + +; ELF: Symbol { +; ELF: Name: simple +; ELF: Size: 36 +; ELF: Section: .text (0x2) +; ELF: } + +; GFX10-W32: NumSGPRsForWavesPerEU: 4 +; GFX10-W32: NumVGPRsForWavesPerEU: 3 +; GFX10-W64: NumSGPRsForWavesPerEU: 2 +; GFX10-W64: NumVGPRsForWavesPerEU: 3 + +define amdgpu_kernel void @simple(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir b/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir index 4ac48b1133aad..8e9aaa7035813 100644 --- a/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir +++ b/llvm/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir @@ -308,7 +308,6 @@ body: | ; GCN-LABEL: name: valu_inst_breaks_clause ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec - ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 @@ -327,7 +326,6 @@ body: | ; GCN-LABEL: name: salu_inst_breaks_clause ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 - ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 @@ -345,7 +343,6 @@ body: | ; GCN-LABEL: name: ds_inst_breaks_clause ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec - ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index fbfa6fb9cdedd..84eb94228decc 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -744,17 +744,13 @@ entry: ; GCN-LABEL: {{^}}tail_call_byval_align16: ; GCN-NOT: s32 -; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:20 -; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:16 +; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 +; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; GCN: s_getpc_b64 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GCN: buffer_store_dword v33, off, s[0:3], s32{{$}} -; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GCN-NOT: s32 ; GCN: s_setpc_b64 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { @@ -766,15 +762,11 @@ entry: ; GCN-LABEL: {{^}}tail_call_stack_passed_arg_alignment_v32i32_f64: ; GCN-NOT: s32 -; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; GCN: buffer_load_dword v33, off, s[0:3], s32{{$}} ; GCN: s_getpc_b64 ; GCN: buffer_store_dword v33, off, s[0:3], s32{{$}} ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 -; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GCN-NOT: s32 ; GCN: s_setpc_b64 define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll index c184ce778fa85..78c810d77f165 100644 --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -13,15 +13,15 @@ define void @use_vcc() #1 { } ; GCN-LABEL: {{^}}indirect_use_vcc: -; GCN: v_writelane_b32 v32, s33, 2 -; GCN: v_writelane_b32 v32, s30, 0 -; GCN: v_writelane_b32 v32, s31, 1 +; GCN: v_writelane_b32 v40, s33, 2 +; GCN: v_writelane_b32 v40, s30, 0 +; GCN: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s4, v32, 0 -; GCN: v_readlane_b32 s5, v32, 1 -; GCN: v_readlane_b32 s33, v32, 2 +; GCN: v_readlane_b32 s4, v40, 0 +; GCN: v_readlane_b32 s5, v40, 1 +; GCN: v_readlane_b32 s33, v40, 2 ; GCN: ; NumSgprs: 36 -; GCN: ; NumVgprs: 33 +; GCN: ; NumVgprs: 41 define void @indirect_use_vcc() #1 { call void @use_vcc() ret void @@ -32,7 +32,7 @@ define void @indirect_use_vcc() #1 { ; CI: ; NumSgprs: 38 ; VI-NOBUG: ; NumSgprs: 40 ; VI-BUG: ; NumSgprs: 96 -; GCN: ; NumVgprs: 33 +; GCN: ; NumVgprs: 41 define amdgpu_kernel void @indirect_2level_use_vcc_kernel(i32 addrspace(1)* %out) #0 { call void @indirect_use_vcc() ret void @@ -50,7 +50,7 @@ define void @use_flat_scratch() #1 { ; GCN-LABEL: {{^}}indirect_use_flat_scratch: ; CI: ; NumSgprs: 38 ; VI: ; NumSgprs: 40 -; GCN: ; NumVgprs: 33 +; GCN: ; NumVgprs: 41 define void @indirect_use_flat_scratch() #1 { call void @use_flat_scratch() ret void @@ -61,7 +61,7 @@ define void @indirect_use_flat_scratch() #1 { ; CI: ; NumSgprs: 38 ; VI-NOBUG: ; NumSgprs: 40 ; VI-BUG: ; NumSgprs: 96 -; GCN: ; NumVgprs: 33 +; GCN: ; NumVgprs: 41 define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(i32 addrspace(1)* %out) #0 { call void @indirect_use_flat_scratch() ret void @@ -76,7 +76,7 @@ define void @use_10_vgpr() #1 { } ; GCN-LABEL: {{^}}indirect_use_10_vgpr: -; GCN: ; NumVgprs: 33 +; GCN: ; NumVgprs: 41 define void @indirect_use_10_vgpr() #0 { call void @use_10_vgpr() ret void @@ -84,23 +84,23 @@ define void @indirect_use_10_vgpr() #0 { ; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr: ; GCN: is_dynamic_callstack = 0 -; GCN: ; NumVgprs: 33 +; GCN: ; NumVgprs: 41 define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 { call void @indirect_use_10_vgpr() ret void } -; GCN-LABEL: {{^}}use_40_vgpr: -; GCN: ; NumVgprs: 40 -define void @use_40_vgpr() #1 { - call void asm sideeffect "", "~{v39}"() #0 +; GCN-LABEL: {{^}}use_50_vgpr: +; GCN: ; NumVgprs: 50 +define void @use_50_vgpr() #1 { + call void asm sideeffect "", "~{v49}"() #0 ret void } -; GCN-LABEL: {{^}}indirect_use_40_vgpr: -; GCN: ; NumVgprs: 40 -define void @indirect_use_40_vgpr() #0 { - call void @use_40_vgpr() +; GCN-LABEL: {{^}}indirect_use_50_vgpr: +; GCN: ; NumVgprs: 50 +define void @indirect_use_50_vgpr() #0 { + call void @use_50_vgpr() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll index ebfc93e238502..0fd98be6ff8f2 100644 --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -23,22 +23,22 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_ ; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: ; GCN: buffer_store_dword -; GCN: v_writelane_b32 v32, s33, 4 -; GCN: v_writelane_b32 v32, s34, 0 -; GCN: v_writelane_b32 v32, s35, 1 -; GCN: v_writelane_b32 v32, s30, 2 -; GCN: v_writelane_b32 v32, s31, 3 +; GCN: v_writelane_b32 v40, s33, 4 +; GCN: v_writelane_b32 v40, s34, 0 +; GCN: v_writelane_b32 v40, s35, 1 +; GCN: v_writelane_b32 v40, s30, 2 +; GCN: v_writelane_b32 v40, s31, 3 ; GCN: s_swappc_b64 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s4, v32, 2 -; GCN-DAG: v_readlane_b32 s5, v32, 3 -; GCN: v_readlane_b32 s35, v32, 1 -; GCN: v_readlane_b32 s34, v32, 0 +; GCN-DAG: v_readlane_b32 s4, v40, 2 +; GCN-DAG: v_readlane_b32 s5, v40, 3 +; GCN: v_readlane_b32 s35, v40, 1 +; GCN: v_readlane_b32 s34, v40, 0 -; GCN: v_readlane_b32 s33, v32, 4 +; GCN: v_readlane_b32 s33, v40, 4 ; GCN: buffer_load_dword ; GCN: s_setpc_b64 define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { @@ -49,16 +49,16 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa } ; GCN-LABEL: {{^}}test_func_call_external_void_funcx2: -; GCN: buffer_store_dword v32 -; GCN: v_writelane_b32 v32, s33, 4 +; GCN: buffer_store_dword v40 +; GCN: v_writelane_b32 v40, s33, 4 ; GCN: s_mov_b32 s33, s32 ; GCN: s_add_u32 s32, s32, 0x400 ; GCN: s_swappc_b64 ; GCN-NEXT: s_swappc_b64 -; GCN: v_readlane_b32 s33, v32, 4 -; GCN: buffer_load_dword v32, +; GCN: v_readlane_b32 s33, v40, 4 +; GCN: buffer_load_dword v40, define void @test_func_call_external_void_funcx2() #0 { call void @external_void_func_void() call void @external_void_func_void() @@ -115,9 +115,9 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(i32 addrspace } ; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_v31: -; GCN: v_mov_b32_e32 v32, v31 +; GCN: v_mov_b32_e32 v40, v31 ; GCN-NEXT: s_swappc_b64 -; GCN-NEXT: v_mov_b32_e32 v31, v32 +; GCN-NEXT: v_mov_b32_e32 v31, v40 define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)* %out) #0 { %v31 = call i32 asm sideeffect "; def $0", "={v31}"() call void @external_void_func_void() @@ -177,31 +177,31 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s34(i32 addrspace( ret void } -; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v32: {{.*}} +; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v40: {{.*}} ; GCN-NOT: v32 ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+4 ; GCN: s_mov_b32 s32, 0 -; GCN-NOT: v32 +; GCN-NOT: v40 ; GCN: ;;#ASMSTART -; GCN-NEXT: ; def v32 +; GCN-NEXT: ; def v40 ; GCN-NEXT: ;;#ASMEND ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN-NOT: v32 +; GCN-NOT: v40 ; GCN: ;;#ASMSTART -; GCN-NEXT: ; use v32 +; GCN-NEXT: ; use v40 ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_endpgm -define amdgpu_kernel void @test_call_void_func_void_preserves_v32(i32 addrspace(1)* %out) #0 { - %v32 = call i32 asm sideeffect "; def $0", "={v32}"() +define amdgpu_kernel void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* %out) #0 { + %v40 = call i32 asm sideeffect "; def $0", "={v40}"() call void @external_void_func_void() - call void asm sideeffect "; use $0", "{v32}"(i32 %v32) + call void asm sideeffect "; use $0", "{v40}"(i32 %v40) ret void } @@ -255,12 +255,12 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 { ; GCN-LABEL: {{^}}callee_saved_sgpr_func: ; GCN-NOT: s40 -; GCN: v_writelane_b32 v32, s40 +; GCN: v_writelane_b32 v40, s40 ; GCN: s_swappc_b64 ; GCN-NOT: s40 ; GCN: ; use s40 ; GCN-NOT: s40 -; GCN: v_readlane_b32 s40, v32 +; GCN: v_readlane_b32 s40, v40 ; GCN-NOT: s40 define void @callee_saved_sgpr_func() #2 { %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 @@ -287,19 +287,19 @@ define amdgpu_kernel void @callee_saved_sgpr_kernel() #2 { ; First call preserved VGPR is used so it can't be used for SGPR spills. ; GCN-LABEL: {{^}}callee_saved_sgpr_vgpr_func: ; GCN-NOT: s40 -; GCN: v_writelane_b32 v33, s40 +; GCN: v_writelane_b32 v41, s40 ; GCN: s_swappc_b64 ; GCN-NOT: s40 ; GCN: ; use s40 ; GCN-NOT: s40 -; GCN: v_readlane_b32 s40, v33 +; GCN: v_readlane_b32 s40, v41 ; GCN-NOT: s40 define void @callee_saved_sgpr_vgpr_func() #2 { %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 - %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0 + %v40 = call i32 asm sideeffect "; def v40", "={v40}"() #0 call void @external_void_func_void() call void asm sideeffect "; use $0", "s"(i32 %s40) #0 - call void asm sideeffect "; use $0", "v"(i32 %v32) #0 + call void asm sideeffect "; use $0", "v"(i32 %v40) #0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll index 6c9a9af159bdd..9719365b9afb9 100644 --- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll @@ -64,11 +64,11 @@ define amdgpu_kernel void @call_no_wait_after_call(i32 addrspace(1)* %ptr, i32) ; GCN-NEXT: s_add_u32 s4, s4, func@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func@rel32@hi+4 ; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: v_mov_b32_e32 v32, 0 +; GCN-NEXT: v_mov_b32_e32 v40, 0 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, s34 ; GCN-NEXT: v_mov_b32_e32 v1, s35 -; GCN-NEXT: global_store_dword v[0:1], v32, off +; GCN-NEXT: global_store_dword v[0:1], v40, off ; GCN-NEXT: s_endpgm call void @func(i32 0) store i32 0, i32 addrspace(1)* %ptr diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index e989ea07926dc..56faa3b8c129f 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -127,8 +127,8 @@ define void @callee_with_stack_and_call() #0 { ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; GCN: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s4, v32, 0 -; GCN-DAG: v_readlane_b32 s5, v32, 1 +; GCN-DAG: v_readlane_b32 s4, v40, 0 +; GCN-DAG: v_readlane_b32 s5, v40, 1 ; GCN: s_sub_u32 s32, s32, 0x400 ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], [[FP_SPILL_LANE]] @@ -168,6 +168,7 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0 call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0 call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0 + call void asm sideeffect "", "~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #0 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 @@ -207,14 +208,14 @@ define void @spill_only_csr_sgpr() { ; GCN-NEXT:s_mov_b32 [[FP_COPY:s[0-9]+]], s33 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 -; GCN-DAG: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:8 ; GCN: ;;#ASMSTART -; GCN-NEXT: ; clobber v33 +; GCN-NEXT: ; clobber v41 ; GCN-NEXT: ;;#ASMEND -; GCN: buffer_load_dword v33, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN: s_add_u32 s32, s32, 0x300 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300 ; GCN-NEXT: s_mov_b32 s33, s4 @@ -223,7 +224,7 @@ define void @spill_only_csr_sgpr() { define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca - call void asm sideeffect "; clobber v33", "~{v33}"() + call void asm sideeffect "; clobber v41", "~{v41}"() ret void } @@ -232,7 +233,7 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 { ; GCN: s_waitcnt ; GCN-NEXT: v_writelane_b32 v1, s33, 63 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-COUNT-63: v_writelane_b32 v1 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:8 ; GCN: ;;#ASMSTART @@ -246,7 +247,7 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 { define void @last_lane_vgpr_for_fp_csr() #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca - call void asm sideeffect "; clobber v33", "~{v33}"() + call void asm sideeffect "; clobber v41", "~{v41}"() call void asm sideeffect "", "~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49} ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59} @@ -264,14 +265,14 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; GCN: s_waitcnt ; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-COUNT-64: v_writelane_b32 v1, ; GCN: buffer_store_dword ; GCN: ;;#ASMSTART ; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1 -; GCN: buffer_load_dword v33, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN: s_add_u32 s32, s32, 0x300 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]] @@ -280,7 +281,7 @@ define void @last_lane_vgpr_for_fp_csr() #1 { define void @no_new_vgpr_for_fp_csr() #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca - call void asm sideeffect "; clobber v33", "~{v33}"() + call void asm sideeffect "; clobber v41", "~{v41}"() call void asm sideeffect "", "~{s39},~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49} ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59} @@ -347,20 +348,20 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-DAG: v_writelane_b32 v32, s31, 1 +; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; GCN-DAG: buffer_store_dword ; GCN: s_add_u32 s32, s32, 0x300{{$}} ; GCN: ;;#ASMSTART -; GCN: v_readlane_b32 s4, v32, 0 -; GCN-NEXT: v_readlane_b32 s5, v32, 1 +; GCN: v_readlane_b32 s4, [[CSR_VGPR]], 0 +; GCN-NEXT: v_readlane_b32 s5, [[CSR_VGPR]], 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300{{$}} -; GCN-NEXT: v_readlane_b32 s33, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] @@ -377,11 +378,11 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29} ,~{s30},~{s31}"() #0 - call void asm sideeffect "; clobber nonpreserved VGPRs", + call void asm sideeffect "; clobber nonpreserved initial VGPRs", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} - ,~{v30},~{v31}"() #1 + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #1 ret void } @@ -394,19 +395,19 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; GCN-NEXT: v_mov_b32_e32 [[SCRATCH_VGPR:v[0-9]+]], 0x1008 ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-DAG: v_writelane_b32 v32, s31, 1 +; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; GCN-DAG: s_add_u32 s32, s32, 0x40300{{$}} ; GCN-DAG: buffer_store_dword ; GCN: ;;#ASMSTART -; GCN: v_readlane_b32 s4, v32, 0 -; GCN-NEXT: v_readlane_b32 s5, v32, 1 +; GCN: v_readlane_b32 s4, [[CSR_VGPR]], 0 +; GCN-NEXT: v_readlane_b32 s5, [[CSR_VGPR]], 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x40300{{$}} -; GCN-NEXT: v_readlane_b32 s33, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: v_mov_b32_e32 [[SCRATCH_VGPR:v[0-9]+]], 0x1008 ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Reload @@ -429,7 +430,7 @@ define void @scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval ali "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} - ,~{v30},~{v31}"() #1 + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll index 023bc6e21276e..c1aca8feb7458 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll @@ -144,7 +144,7 @@ define hidden void @use_workgroup_id_yz() #1 { ; GCN-NOT: s12 ; GCN-NOT: s13 ; GCN-NOT: s14 -; GCN: v_readlane_b32 s4, v32, 0 +; GCN: v_readlane_b32 s4, v40, 0 define hidden void @func_indirect_use_workgroup_id_x() #1 { call void @use_workgroup_id_x() ret void @@ -152,7 +152,7 @@ define hidden void @func_indirect_use_workgroup_id_x() #1 { ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: ; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v32, 0 +; GCN: v_readlane_b32 s4, v40, 0 define hidden void @func_indirect_use_workgroup_id_y() #1 { call void @use_workgroup_id_y() ret void @@ -160,7 +160,7 @@ define hidden void @func_indirect_use_workgroup_id_y() #1 { ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: ; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v32, 0 +; GCN: v_readlane_b32 s4, v40, 0 define hidden void @func_indirect_use_workgroup_id_z() #1 { call void @use_workgroup_id_z() ret void diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll index d69f70ffad57c..4991b72df7430 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -302,7 +302,7 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 { ; Argument is in right place already ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x: ; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v32, 0 +; GCN: v_readlane_b32 s4, v40, 0 define hidden void @func_indirect_use_workgroup_id_x() #1 { call void @use_workgroup_id_x() ret void @@ -310,7 +310,7 @@ define hidden void @func_indirect_use_workgroup_id_x() #1 { ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: ; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v32, 0 +; GCN: v_readlane_b32 s4, v40, 0 define hidden void @func_indirect_use_workgroup_id_y() #1 { call void @use_workgroup_id_y() ret void @@ -318,7 +318,7 @@ define hidden void @func_indirect_use_workgroup_id_y() #1 { ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: ; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v32, 0 +; GCN: v_readlane_b32 s4, v40, 0 define hidden void @func_indirect_use_workgroup_id_z() #1 { call void @use_workgroup_id_z() ret void diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index 265024e6bb8ff..bfd9a9bf4e963 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -396,13 +396,11 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { } ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x: -; VARABI: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VARABI: buffer_load_dword v32, off, s[0:3], s32{{$}} ; VARABI: v_and_b32_e32 v32, 0x3ff, v32 ; VARABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 -; VARABI: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; VARABI-NEXT: s_waitcnt +; VARABI: s_waitcnt ; VARABI-NEXT: s_setpc_b64 ; FIXEDABI: v_and_b32_e32 v31, 0x3ff, v31 @@ -514,15 +512,15 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; Requires loading and storing to stack slot. ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x: ; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}} -; GCN-DAG: buffer_store_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}} ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} ; GCN: s_swappc_b64 -; GCN: buffer_load_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN: s_sub_u32 s32, s32, 0x400{{$}} +; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN: s_setpc_b64 define void @too_many_args_call_too_many_args_use_workitem_id_x( i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, @@ -543,13 +541,11 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x( ; frame[2] = VGPR spill slot ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval: -; VARABI: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VARABI: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; VARABI-NEXT: s_waitcnt ; VARABI-NEXT: v_and_b32_e32 v32, 0x3ff, v32 ; VARABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32 ; VARABI: buffer_load_dword v0, off, s[0:3], s32{{$}} -; VARABI: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; VARABI: s_setpc_b64 @@ -700,10 +696,7 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ret void } -; Only one stack load should be emitted for all 3 values. ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz: -; VARABI: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VARABI: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VARABI-NOT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32{{$}} ; VARABI: buffer_load_dword v32, off, s[0:3], s32{{$}} ; VARABI-NOT: buffer_load_dword @@ -717,9 +710,7 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; VARABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Y]] ; VARABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Z]] -; VARABI: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; VARABI: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; VARABI-NEXT: s_waitcnt +; VARABI: s_waitcnt ; VARABI-NEXT: s_setpc_b64 @@ -826,7 +817,7 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 { ; GCN: s_waitcnt ; GCN-NEXT: s_setpc_b64 -; GCN: ScratchSize: 8 +; GCN: ScratchSize: 0 define void @too_many_args_use_workitem_id_x_stack_yz( i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll new file mode 100644 index 0000000000000..b99e82b312ee2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll @@ -0,0 +1,398 @@ +; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN-ISEL %s + +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CISI %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s + +; GCN-ISEL-LABEL: name: sadd64rr +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0.entry: +; GCN-ISEL: S_ADD_U64_PSEUDO + +; GCN-LABEL: @sadd64rr +; GCN: s_add_u32 +; GCN: s_addc_u32 +define amdgpu_kernel void @sadd64rr(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %add = add i64 %a, %b + store i64 %add, i64 addrspace(1)* %out + ret void +} + +; GCN-ISEL-LABEL: name: sadd64ri +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0.entry: +; GCN-ISEL: S_ADD_U64_PSEUDO + +; GCN-LABEL: @sadd64ri +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x56789876 +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1234 +define amdgpu_kernel void @sadd64ri(i64 addrspace(1)* %out, i64 %a) { +entry: + %add = add i64 20015998343286, %a + store i64 %add, i64 addrspace(1)* %out + ret void +} + +; GCN-ISEL-LABEL: name: vadd64rr +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0.entry: +; GCN-ISEL: V_ADD_U64_PSEUDO + +; GCN-LABEL: @vadd64rr +; +; CISI: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; CISI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; VI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} +; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +define amdgpu_kernel void @vadd64rr(i64 addrspace(1)* %out, i64 %a) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %add = add i64 %a, %tid.ext + store i64 %add, i64 addrspace(1)* %out + ret void +} + +; GCN-ISEL-LABEL: name: vadd64ri +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0.entry: +; GCN-ISEL: V_ADD_U64_PSEUDO + +; GCN-LABEL: @vadd64ri +; +; CISI: v_add_i32_e32 v0, vcc, 0x56789876, v0 +; CISI: v_mov_b32_e32 v1, 0x1234 +; CISI: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; +; VI: v_add_u32_e32 v0, vcc, 0x56789876, v0 +; VI: v_mov_b32_e32 v1, 0x1234 +; VI: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; +; GFX9: v_add_co_u32_e32 v0, vcc, 0x56789876, v0 +; GFX9: v_mov_b32_e32 v1, 0x1234 +; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; +; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} +; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0, 0x1234, [[CARRY]] +define amdgpu_kernel void @vadd64ri(i64 addrspace(1)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %add = add i64 20015998343286, %tid.ext + store i64 %add, i64 addrspace(1)* %out + ret void +} + +; GCN-ISEL-LABEL: name: suaddo32 +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0 +; GCN-ISEL: S_ADD_I32 +define amdgpu_kernel void @suaddo32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { + %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue { i32, i1 } %uadd, 0 + %carry = extractvalue { i32, i1 } %uadd, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + + +; GCN-ISEL-LABEL: name: uaddo32_vcc_user +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0 +; GCN-ISEL: V_ADD_I32_e64 + +; below we check selection to v_add/addc +; because the only user of VCC produced by the UADDOis v_cndmask. +; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC + +; GCN-LABEL: @uaddo32_vcc_user +; +; CISI: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; CISI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +; +; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +; +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +; +; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} +; GFX1010: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]] +define amdgpu_kernel void @uaddo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { + %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue { i32, i1 } %uadd, 0 + %carry = extractvalue { i32, i1 } %uadd, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; GCN-ISEL-LABEL: name: suaddo64 +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0 +; GCN-ISEL: S_ADD_U64_PSEUDO + +; GCN-LABEL: @suaddo64 +; +; GCN: s_add_u32 +; GCN: s_addc_u32 +define amdgpu_kernel void @suaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 { + %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue { i64, i1 } %uadd, 0 + %carry = extractvalue { i64, i1 } %uadd, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; GCN-ISEL-LABEL: name: vuaddo64 +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0 +; GCN-ISEL: V_ADD_U64_PSEUDO + +; GCN-LABEL: @vuaddo64 +; +; CISI: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 +; CISI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 +; VI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 +; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 +; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +define amdgpu_kernel void @vuaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %tid.ext) + %val = extractvalue { i64, i1 } %uadd, 0 + %carry = extractvalue { i64, i1 } %uadd, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN-ISEL %s + +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CISI %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s + +; GCN-ISEL-LABEL: name: ssub64rr +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0.entry: +; GCN-ISEL: S_SUB_U64_PSEUDO + +; GCN-LABEL: @ssub64rr +; GCN: s_sub_u32 +; GCN: s_subb_u32 +define amdgpu_kernel void @ssub64rr(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %sub = sub i64 %a, %b + store i64 %sub, i64 addrspace(1)* %out + ret void +} + +; GCN-ISEL-LABEL: name: ssub64ri +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0.entry: +; GCN-ISEL: S_SUB_U64_PSEUDO + +; GCN-LABEL: @ssub64ri +; GCN: s_sub_u32 s{{[0-9]+}}, 0x56789876, s{{[0-9]+}} +; GCN: s_subb_u32 s{{[0-9]+}}, 0x1234, s{{[0-9]+}} +define amdgpu_kernel void @ssub64ri(i64 addrspace(1)* %out, i64 %a) { +entry: + %sub = sub i64 20015998343286, %a + store i64 %sub, i64 addrspace(1)* %out + ret void +} + +; GCN-ISEL-LABEL: name: vsub64rr +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0.entry: +; GCN-ISEL: V_SUB_U64_PSEUDO + +; GCN-LABEL: @vsub64rr +; +; CISI: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; CISI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; VI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} +; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +define amdgpu_kernel void @vsub64rr(i64 addrspace(1)* %out, i64 %a) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %sub = sub i64 %a, %tid.ext + store i64 %sub, i64 addrspace(1)* %out + ret void +} + +; GCN-ISEL-LABEL: name: vsub64ri +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0.entry: +; GCN-ISEL: V_SUB_U64_PSEUDO + +; GCN-LABEL: @vsub64ri +; +; CISI: v_sub_i32_e32 v0, vcc, 0x56789876, v0 +; CISI: v_mov_b32_e32 v1, 0x1234 +; CISI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; +; VI: v_sub_u32_e32 v0, vcc, 0x56789876, v0 +; VI: v_mov_b32_e32 v1, 0x1234 +; VI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; +; GFX9: v_sub_co_u32_e32 v0, vcc, 0x56789876, v0 +; GFX9: v_mov_b32_e32 v1, 0x1234 +; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc +; +; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} +; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0x1234, 0, [[CARRY]] +define amdgpu_kernel void @vsub64ri(i64 addrspace(1)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %sub = sub i64 20015998343286, %tid.ext + store i64 %sub, i64 addrspace(1)* %out + ret void +} + +; GCN-ISEL-LABEL: name: susubo32 +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0 +; GCN-ISEL: S_SUB_I32 +define amdgpu_kernel void @susubo32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { + %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue { i32, i1 } %usub, 0 + %carry = extractvalue { i32, i1 } %usub, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + + +; GCN-ISEL-LABEL: name: usubo32_vcc_user +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0 +; GCN-ISEL: V_SUB_I32_e64 + +; below we check selection to v_sub/subb +; because the only user of VCC produced by the USUBOis v_cndmask. +; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC + +; GCN-LABEL: @usubo32_vcc_user +; +; CISI: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; CISI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +; +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +; +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +; +; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} +; GFX1010: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]] +define amdgpu_kernel void @usubo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { + %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue { i32, i1 } %usub, 0 + %carry = extractvalue { i32, i1 } %usub, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; GCN-ISEL-LABEL: name: susubo64 +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0 +; GCN-ISEL: S_SUB_U64_PSEUDO + +; GCN-LABEL: @susubo64 +; +; GCN: s_sub_u32 +; GCN: s_subb_u32 +define amdgpu_kernel void @susubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 { + %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue { i64, i1 } %usub, 0 + %carry = extractvalue { i64, i1 } %usub, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; GCN-ISEL-LABEL: name: vusubo64 +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.0 +; GCN-ISEL: V_SUB_U64_PSEUDO + +; GCN-LABEL: @vusubo64 +; +; CISI: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 +; CISI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 +; VI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 +; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; +; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 +; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +define amdgpu_kernel void @vusubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %tid.ext) + %val = extractvalue { i64, i1 } %usub, 0 + %carry = extractvalue { i64, i1 } %usub, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; GCN-ISEL-LABEL: name: sudiv64 +; GCN-ISEL-LABEL: body: +; GCN-ISEL-LABEL: bb.3 +; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 +; GCN-ISEL: S_ADD_CO_PSEUDO %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]] +; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 +; GCN-ISEL: S_SUB_CO_PSEUDO %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]] +define amdgpu_kernel void @sudiv64(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %result = udiv i64 %x, %y + store i64 %result, i64 addrspace(1)* %out + ret void +} + + + +declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1 + +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 + +declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #1 + +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index 4206c39caac74..79ea9dbee2fff 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -28,23 +28,23 @@ define float @call_split_type_used_outside_block_v2f32() #0 { ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s33, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v2f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_v2f32@rel32@hi+4 -; GCN-NEXT: v_writelane_b32 v32, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s4, v32, 0 -; GCN-NEXT: v_readlane_b32 s5, v32, 1 +; GCN-NEXT: v_readlane_b32 s4, v40, 0 +; GCN-NEXT: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s33, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[4:5] @@ -62,23 +62,23 @@ define float @call_split_type_used_outside_block_v3f32() #0 { ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s33, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v3f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_v3f32@rel32@hi+4 -; GCN-NEXT: v_writelane_b32 v32, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s4, v32, 0 -; GCN-NEXT: v_readlane_b32 s5, v32, 1 +; GCN-NEXT: v_readlane_b32 s4, v40, 0 +; GCN-NEXT: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s33, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[4:5] @@ -96,23 +96,23 @@ define half @call_split_type_used_outside_block_v4f16() #0 { ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s33, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v4f16@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_v4f16@rel32@hi+4 -; GCN-NEXT: v_writelane_b32 v32, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s4, v32, 0 -; GCN-NEXT: v_readlane_b32 s5, v32, 1 +; GCN-NEXT: v_readlane_b32 s4, v40, 0 +; GCN-NEXT: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s33, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[4:5] @@ -130,24 +130,24 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 { ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s33, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_struct@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_struct@rel32@hi+4 -; GCN-NEXT: v_writelane_b32 v32, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s4, v32, 0 -; GCN-NEXT: v_readlane_b32 s5, v32, 1 +; GCN-NEXT: v_readlane_b32 s4, v40, 0 +; GCN-NEXT: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: v_mov_b32_e32 v1, v4 ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s33, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index c3f213b9c22f5..0d1c61597848c 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -609,16 +609,16 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1) ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc ; VI-NEXT: v_add_u32_e32 v6, vcc, 3, v0 ; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc -; VI-NEXT: flat_load_ubyte v8, v[2:3] -; VI-NEXT: flat_load_ubyte v2, v[4:5] -; VI-NEXT: flat_load_ubyte v3, v[6:7] +; VI-NEXT: flat_load_ubyte v4, v[4:5] +; VI-NEXT: flat_load_ubyte v5, v[6:7] +; VI-NEXT: flat_load_ubyte v6, v[2:3] ; VI-NEXT: flat_load_ubyte v0, v[0:1] ; VI-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) -; VI-NEXT: v_cvt_f32_ubyte2_e32 v1, v8 +; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v4 ; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) -; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2 +; VI-NEXT: v_cvt_f32_ubyte0_e32 v3, v5 ; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; VI-NEXT: v_cvt_f32_ubyte0_e32 v3, v3 +; VI-NEXT: v_cvt_f32_ubyte2_e32 v1, v6 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 @@ -775,35 +775,35 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc ; VI-NEXT: v_add_u32_e32 v4, vcc, 2, v0 ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc -; VI-NEXT: v_add_u32_e32 v6, vcc, 6, v0 +; VI-NEXT: flat_load_ubyte v12, v[4:5] +; VI-NEXT: v_add_u32_e32 v4, vcc, 6, v0 +; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; VI-NEXT: v_add_u32_e32 v6, vcc, 4, v0 ; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc -; VI-NEXT: v_add_u32_e32 v8, vcc, 4, v0 +; VI-NEXT: v_add_u32_e32 v8, vcc, 5, v0 ; VI-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc -; VI-NEXT: v_add_u32_e32 v10, vcc, 5, v0 +; VI-NEXT: v_add_u32_e32 v10, vcc, 1, v0 ; VI-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc -; VI-NEXT: v_add_u32_e32 v12, vcc, 1, v0 -; VI-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; VI-NEXT: flat_load_ubyte v8, v[8:9] +; VI-NEXT: flat_load_ubyte v9, v[10:11] ; VI-NEXT: flat_load_ubyte v6, v[6:7] -; VI-NEXT: flat_load_ubyte v7, v[8:9] -; VI-NEXT: flat_load_ubyte v8, v[10:11] -; VI-NEXT: flat_load_ubyte v9, v[12:13] +; VI-NEXT: flat_load_ubyte v7, v[4:5] +; VI-NEXT: flat_load_ubyte v2, v[2:3] ; VI-NEXT: flat_load_ubyte v0, v[0:1] -; VI-NEXT: flat_load_ubyte v1, v[2:3] -; VI-NEXT: flat_load_ubyte v2, v[4:5] -; VI-NEXT: s_waitcnt vmcnt(6) lgkmcnt(6) -; VI-NEXT: v_cvt_f32_ubyte0_e32 v6, v6 ; VI-NEXT: s_waitcnt vmcnt(5) lgkmcnt(5) -; VI-NEXT: v_cvt_f32_ubyte0_e32 v4, v7 -; VI-NEXT: s_waitcnt vmcnt(4) lgkmcnt(4) ; VI-NEXT: v_cvt_f32_ubyte2_e32 v5, v8 +; VI-NEXT: s_waitcnt vmcnt(4) lgkmcnt(4) +; VI-NEXT: v_cvt_f32_ubyte2_e32 v1, v9 +; VI-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v4, v6 ; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) -; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; VI-NEXT: v_cvt_f32_ubyte0_e32 v6, v7 ; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; VI-NEXT: v_lshlrev_b32_e32 v3, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; VI-NEXT: v_or_b32_sdwa v2, v2, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v2 -; VI-NEXT: v_cvt_f32_ubyte2_e32 v1, v9 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v2 ; VI-NEXT: buffer_store_dwordx3 v[4:6], off, s[4:7], 0 offset:16 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 @@ -1052,19 +1052,18 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc ; VI-NEXT: v_add_u32_e32 v6, vcc, 1, v0 ; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc +; VI-NEXT: flat_load_ubyte v4, v[4:5] +; VI-NEXT: flat_load_ubyte v5, v[6:7] ; VI-NEXT: flat_load_ubyte v2, v[2:3] -; VI-NEXT: flat_load_ubyte v3, v[4:5] -; VI-NEXT: flat_load_ubyte v4, v[6:7] ; VI-NEXT: flat_load_ubyte v0, v[0:1] -; VI-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) +; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) ; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v2 -; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) -; VI-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v1 -; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v1 +; VI-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 -; VI-NEXT: v_cvt_f32_ubyte2_e32 v1, v4 +; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v1 +; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v1 +; VI-NEXT: v_cvt_f32_ubyte2_e32 v1, v5 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll index 34f8706ac66c5..9d30797693211 100644 --- a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll +++ b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll @@ -107,20 +107,21 @@ define i32 @global_load_2xi16_align1(i16 addrspace(1)* %p) #0 { ; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v4, vcc, 1, v0 ; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc -; GFX7-ALIGNED-NEXT: flat_load_ubyte v6, v[0:1] -; GFX7-ALIGNED-NEXT: v_add_i32_e32 v0, vcc, 3, v0 -; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX7-ALIGNED-NEXT: flat_load_ubyte v2, v[2:3] -; GFX7-ALIGNED-NEXT: flat_load_ubyte v3, v[4:5] +; GFX7-ALIGNED-NEXT: v_add_i32_e32 v6, vcc, 3, v0 +; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc ; GFX7-ALIGNED-NEXT: flat_load_ubyte v0, v[0:1] +; GFX7-ALIGNED-NEXT: flat_load_ubyte v1, v[6:7] +; GFX7-ALIGNED-NEXT: flat_load_ubyte v4, v[4:5] +; GFX7-ALIGNED-NEXT: flat_load_ubyte v2, v[2:3] +; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v3 +; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v3, 8, v4 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-ALIGNED-NEXT: v_or_b32_e32 v1, v1, v6 -; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-ALIGNED-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-UNALIGNED-LABEL: global_load_2xi16_align1: diff --git a/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir b/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir new file mode 100644 index 0000000000000..458bdcef1a584 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold_16bit_imm.mir @@ -0,0 +1,257 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: fold_simm_16_sub_to_lo +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_lo + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: [[COPY:%[0-9]+]]:sgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG [[COPY]] + %0:sreg_32 = S_MOV_B32 2048 + %1:sgpr_lo16 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_simm_16_sub_to_sub +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_sub + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]] + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:sreg_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_simm_16_sub_to_phys +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_phys + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: $sgpr0 = S_MOV_B32 2048 + ; GCN: SI_RETURN_TO_EPILOG $sgpr0_lo16 + %0:sreg_32 = S_MOV_B32 2048 + $sgpr0_lo16 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG $sgpr0_lo16 + +... + +--- +name: fold_aimm_16_sub_to_sub_2048 +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_2048 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:agpr_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_aimm_16_sub_to_sub_0 +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec + ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]] + %0:sreg_32 = S_MOV_B32 0 + %1.lo16:agpr_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_aimm_16_sub_to_phys +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_phys + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 0, implicit $exec + ; GCN: SI_RETURN_TO_EPILOG $agpr0_lo16 + %0:sreg_32 = S_MOV_B32 0 + $agpr0_lo16 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG $agpr0_lo16 + +... + +--- +name: fold_vimm_16_sub_to_lo +body: | + bb.0: + + ; GCN-LABEL: name: fold_vimm_16_sub_to_lo + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: [[COPY:%[0-9]+]]:vgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG [[COPY]] + %0:sreg_32 = S_MOV_B32 2048 + %1:vgpr_lo16 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_vimm_16_sub_to_sub +body: | + bb.0: + + ; GCN-LABEL: name: fold_vimm_16_sub_to_sub + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:vgpr_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_vimm_16_sub_to_phys +body: | + bb.0: + + ; GCN-LABEL: name: fold_vimm_16_sub_to_phys + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: $vgpr0_lo16 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG $vgpr0_lo16 + %0:sreg_32 = S_MOV_B32 2048 + $vgpr0_lo16 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG $vgpr0_lo16 + +... + +--- +name: fold_vimm_16_lo_to_hi +body: | + bb.0: + + ; GCN-LABEL: name: fold_vimm_16_lo_to_hi + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: %1.hi16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 2048 + %1.hi16:vgpr_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_vimm_16_hi_to_lo +body: | + bb.0: + + ; GCN-LABEL: name: fold_vimm_16_hi_to_lo + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].hi16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:vgpr_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_simm_16_sub_to_sub_lo_to_hi +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_sub_lo_to_hi + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: %1.hi16:sreg_32 = COPY killed [[S_MOV_B32_]].lo16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 2048 + %1.hi16:sreg_32 = COPY killed %0.lo16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_simm_16_sub_to_sub_hi_to_lo_2048 +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_2048 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]] + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:sreg_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048 +body: | + bb.0: + + ; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728 + ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]] + %0:sreg_32 = S_MOV_B32 134217728 + %1.lo16:sreg_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_aimm_16_sub_to_sub_hi_to_lo_2048 +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_2048 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048 + ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec + ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]] + %0:sreg_32 = S_MOV_B32 2048 + %1.lo16:agpr_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1 +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65536 + ; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 1, implicit $exec + ; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]] + %0:sreg_32 = S_MOV_B32 65536 + %1.lo16:agpr_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +... + +--- +name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048 +body: | + bb.0: + + ; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728 + ; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].hi16 + ; GCN: SI_RETURN_TO_EPILOG %1 + %0:sreg_32 = S_MOV_B32 134217728 + %1.lo16:agpr_32 = COPY killed %0.hi16 + SI_RETURN_TO_EPILOG %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll index a54b2f59e475d..38b8bc37f5f27 100644 --- a/llvm/test/CodeGen/AMDGPU/idot8s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll @@ -1248,58 +1248,43 @@ define amdgpu_kernel void @idot8_acc32_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s5, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s7, s[6:7], 0x0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_ashr_i64 s[8:9], s[4:5], 60 -; GFX7-NEXT: s_lshl_b32 s9, s5, 4 -; GFX7-NEXT: s_ashr_i64 s[14:15], s[8:9], 60 -; GFX7-NEXT: s_lshl_b32 s9, s5, 16 -; GFX7-NEXT: s_ashr_i64 s[16:17], s[8:9], 60 -; GFX7-NEXT: s_lshl_b32 s9, s5, 20 -; GFX7-NEXT: s_lshl_b32 s11, s5, 8 -; GFX7-NEXT: s_lshl_b32 s13, s5, 12 -; GFX7-NEXT: s_ashr_i64 s[18:19], s[8:9], 60 -; GFX7-NEXT: s_lshl_b32 s9, s5, 24 -; GFX7-NEXT: s_lshl_b32 s5, s5, 28 -; GFX7-NEXT: s_ashr_i64 s[4:5], s[4:5], 60 -; GFX7-NEXT: s_lshl_b32 s5, s7, 4 -; GFX7-NEXT: s_ashr_i64 s[24:25], s[4:5], 60 -; GFX7-NEXT: s_lshl_b32 s5, s7, 8 -; GFX7-NEXT: s_ashr_i64 s[26:27], s[4:5], 60 -; GFX7-NEXT: s_lshl_b32 s5, s7, 12 -; GFX7-NEXT: s_ashr_i64 s[28:29], s[4:5], 60 -; GFX7-NEXT: s_lshl_b32 s5, s7, 16 -; GFX7-NEXT: s_ashr_i64 s[30:31], s[4:5], 60 -; GFX7-NEXT: s_lshl_b32 s5, s7, 20 -; GFX7-NEXT: s_ashr_i64 s[34:35], s[4:5], 60 -; GFX7-NEXT: s_lshl_b32 s5, s7, 24 -; GFX7-NEXT: s_ashr_i64 s[36:37], s[4:5], 60 -; GFX7-NEXT: s_lshl_b32 s5, s7, 28 -; GFX7-NEXT: s_ashr_i64 s[22:23], s[6:7], 60 -; GFX7-NEXT: s_ashr_i64 s[6:7], s[4:5], 60 -; GFX7-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX7-NEXT: v_mov_b32_e32 v0, s6 -; GFX7-NEXT: s_ashr_i64 s[20:21], s[8:9], 60 -; GFX7-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX7-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 +; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s20, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_ashr_i32 s6, s4, 28 +; GFX7-NEXT: s_ashr_i32 s13, s5, 28 +; GFX7-NEXT: s_bfe_i32 s14, s5, 0x40018 +; GFX7-NEXT: s_bfe_i32 s15, s5, 0x40014 +; GFX7-NEXT: s_bfe_i32 s16, s5, 0x40010 +; GFX7-NEXT: s_bfe_i32 s17, s5, 0x4000c +; GFX7-NEXT: s_bfe_i32 s18, s5, 0x40008 +; GFX7-NEXT: s_bfe_i32 s19, s5, 0x40004 +; GFX7-NEXT: s_bfe_i32 s5, s5, 0x40000 +; GFX7-NEXT: s_bfe_i32 s7, s4, 0x40018 +; GFX7-NEXT: s_bfe_i32 s8, s4, 0x40014 +; GFX7-NEXT: s_bfe_i32 s9, s4, 0x40010 +; GFX7-NEXT: s_bfe_i32 s10, s4, 0x4000c +; GFX7-NEXT: s_bfe_i32 s11, s4, 0x40008 +; GFX7-NEXT: s_bfe_i32 s12, s4, 0x40004 +; GFX7-NEXT: s_bfe_i32 s4, s4, 0x40000 +; GFX7-NEXT: v_mov_b32_e32 v0, s5 +; GFX7-NEXT: v_mov_b32_e32 v1, s20 ; GFX7-NEXT: v_mad_i32_i24 v0, s4, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s36 -; GFX7-NEXT: v_mad_i32_i24 v0, s20, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s34 -; GFX7-NEXT: v_mad_i32_i24 v0, s18, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s30 -; GFX7-NEXT: v_mad_i32_i24 v0, s16, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s28 +; GFX7-NEXT: v_mov_b32_e32 v1, s19 ; GFX7-NEXT: v_mad_i32_i24 v0, s12, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s26 +; GFX7-NEXT: v_mov_b32_e32 v1, s18 +; GFX7-NEXT: v_mad_i32_i24 v0, s11, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s17 ; GFX7-NEXT: v_mad_i32_i24 v0, s10, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s24 -; GFX7-NEXT: v_mad_i32_i24 v0, s14, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s22 +; GFX7-NEXT: v_mov_b32_e32 v1, s16 +; GFX7-NEXT: v_mad_i32_i24 v0, s9, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s15 ; GFX7-NEXT: v_mad_i32_i24 v0, s8, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s14 +; GFX7-NEXT: v_mad_i32_i24 v0, s7, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s13 +; GFX7-NEXT: v_mad_i32_i24 v0, s6, v1, v0 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1308,58 +1293,43 @@ define amdgpu_kernel void @idot8_acc32_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_ashr_i64 s[6:7], s[2:3], 60 -; GFX8-NEXT: s_lshl_b32 s7, s3, 4 -; GFX8-NEXT: s_ashr_i64 s[14:15], s[6:7], 60 -; GFX8-NEXT: s_lshl_b32 s7, s3, 20 -; GFX8-NEXT: s_lshl_b32 s9, s3, 8 -; GFX8-NEXT: s_lshl_b32 s11, s3, 12 -; GFX8-NEXT: s_lshl_b32 s13, s3, 16 -; GFX8-NEXT: s_ashr_i64 s[16:17], s[6:7], 60 -; GFX8-NEXT: s_lshl_b32 s7, s3, 24 -; GFX8-NEXT: s_lshl_b32 s3, s3, 28 -; GFX8-NEXT: s_ashr_i64 s[2:3], s[2:3], 60 -; GFX8-NEXT: s_lshl_b32 s3, s5, 4 -; GFX8-NEXT: s_ashr_i64 s[22:23], s[2:3], 60 -; GFX8-NEXT: s_lshl_b32 s3, s5, 8 -; GFX8-NEXT: s_ashr_i64 s[24:25], s[2:3], 60 -; GFX8-NEXT: s_lshl_b32 s3, s5, 12 -; GFX8-NEXT: s_ashr_i64 s[26:27], s[2:3], 60 -; GFX8-NEXT: s_lshl_b32 s3, s5, 16 -; GFX8-NEXT: s_ashr_i64 s[28:29], s[2:3], 60 -; GFX8-NEXT: s_lshl_b32 s3, s5, 20 -; GFX8-NEXT: s_ashr_i64 s[30:31], s[2:3], 60 -; GFX8-NEXT: s_lshl_b32 s3, s5, 24 -; GFX8-NEXT: s_ashr_i64 s[34:35], s[2:3], 60 -; GFX8-NEXT: s_lshl_b32 s3, s5, 28 -; GFX8-NEXT: s_ashr_i64 s[20:21], s[4:5], 60 -; GFX8-NEXT: s_ashr_i64 s[4:5], s[2:3], 60 -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x0 -; GFX8-NEXT: v_mov_b32_e32 v0, s4 -; GFX8-NEXT: s_ashr_i64 s[18:19], s[6:7], 60 -; GFX8-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX8-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s18, s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_ashr_i32 s4, s2, 28 +; GFX8-NEXT: s_ashr_i32 s11, s3, 28 +; GFX8-NEXT: s_bfe_i32 s12, s3, 0x40018 +; GFX8-NEXT: s_bfe_i32 s13, s3, 0x40014 +; GFX8-NEXT: s_bfe_i32 s14, s3, 0x40010 +; GFX8-NEXT: s_bfe_i32 s15, s3, 0x4000c +; GFX8-NEXT: s_bfe_i32 s16, s3, 0x40008 +; GFX8-NEXT: s_bfe_i32 s17, s3, 0x40004 +; GFX8-NEXT: s_bfe_i32 s3, s3, 0x40000 +; GFX8-NEXT: s_bfe_i32 s5, s2, 0x40018 +; GFX8-NEXT: s_bfe_i32 s6, s2, 0x40014 +; GFX8-NEXT: s_bfe_i32 s7, s2, 0x40010 +; GFX8-NEXT: s_bfe_i32 s8, s2, 0x4000c +; GFX8-NEXT: s_bfe_i32 s9, s2, 0x40008 +; GFX8-NEXT: s_bfe_i32 s10, s2, 0x40004 +; GFX8-NEXT: s_bfe_i32 s2, s2, 0x40000 +; GFX8-NEXT: v_mov_b32_e32 v0, s3 +; GFX8-NEXT: v_mov_b32_e32 v1, s18 ; GFX8-NEXT: v_mad_i32_i24 v0, s2, v0, v1 -; GFX8-NEXT: v_mov_b32_e32 v1, s34 -; GFX8-NEXT: v_mad_i32_i24 v0, s18, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s30 -; GFX8-NEXT: v_mad_i32_i24 v0, s16, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s28 -; GFX8-NEXT: v_mad_i32_i24 v0, s12, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s26 +; GFX8-NEXT: v_mov_b32_e32 v1, s17 ; GFX8-NEXT: v_mad_i32_i24 v0, s10, v1, v0 -; GFX8-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 -; GFX8-NEXT: v_mov_b32_e32 v1, s24 +; GFX8-NEXT: v_mov_b32_e32 v1, s16 +; GFX8-NEXT: v_mad_i32_i24 v0, s9, v1, v0 +; GFX8-NEXT: v_mov_b32_e32 v1, s15 ; GFX8-NEXT: v_mad_i32_i24 v0, s8, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s22 -; GFX8-NEXT: v_mad_i32_i24 v0, s14, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s20 -; GFX8-NEXT: v_mad_i32_i24 v2, s6, v1, v0 +; GFX8-NEXT: v_mov_b32_e32 v1, s14 +; GFX8-NEXT: v_mad_i32_i24 v0, s7, v1, v0 +; GFX8-NEXT: v_mov_b32_e32 v1, s13 +; GFX8-NEXT: v_mad_i32_i24 v0, s6, v1, v0 +; GFX8-NEXT: v_mov_b32_e32 v1, s12 +; GFX8-NEXT: v_mad_i32_i24 v0, s5, v1, v0 +; GFX8-NEXT: v_mov_b32_e32 v1, s11 +; GFX8-NEXT: v_mad_i32_i24 v2, s4, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -1370,58 +1340,43 @@ define amdgpu_kernel void @idot8_acc32_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_ashr_i64 s[6:7], s[2:3], 60 -; GFX9-NEXT: s_lshl_b32 s7, s3, 4 -; GFX9-NEXT: s_ashr_i64 s[14:15], s[6:7], 60 -; GFX9-NEXT: s_lshl_b32 s7, s3, 20 -; GFX9-NEXT: s_lshl_b32 s9, s3, 8 -; GFX9-NEXT: s_lshl_b32 s11, s3, 12 -; GFX9-NEXT: s_lshl_b32 s13, s3, 16 -; GFX9-NEXT: s_ashr_i64 s[16:17], s[6:7], 60 -; GFX9-NEXT: s_lshl_b32 s7, s3, 24 -; GFX9-NEXT: s_lshl_b32 s3, s3, 28 -; GFX9-NEXT: s_ashr_i64 s[2:3], s[2:3], 60 -; GFX9-NEXT: s_lshl_b32 s3, s5, 4 -; GFX9-NEXT: s_ashr_i64 s[22:23], s[2:3], 60 -; GFX9-NEXT: s_lshl_b32 s3, s5, 8 -; GFX9-NEXT: s_ashr_i64 s[24:25], s[2:3], 60 -; GFX9-NEXT: s_lshl_b32 s3, s5, 12 -; GFX9-NEXT: s_ashr_i64 s[26:27], s[2:3], 60 -; GFX9-NEXT: s_lshl_b32 s3, s5, 16 -; GFX9-NEXT: s_ashr_i64 s[28:29], s[2:3], 60 -; GFX9-NEXT: s_lshl_b32 s3, s5, 20 -; GFX9-NEXT: s_ashr_i64 s[30:31], s[2:3], 60 -; GFX9-NEXT: s_lshl_b32 s3, s5, 24 -; GFX9-NEXT: s_ashr_i64 s[34:35], s[2:3], 60 -; GFX9-NEXT: s_lshl_b32 s3, s5, 28 -; GFX9-NEXT: s_ashr_i64 s[20:21], s[4:5], 60 -; GFX9-NEXT: s_ashr_i64 s[4:5], s[2:3], 60 -; GFX9-NEXT: s_load_dword s3, s[0:1], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: s_ashr_i64 s[18:19], s[6:7], 60 -; GFX9-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX9-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NEXT: s_load_dword s18, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: s_ashr_i32 s4, s2, 28 +; GFX9-NEXT: s_ashr_i32 s11, s3, 28 +; GFX9-NEXT: s_bfe_i32 s12, s3, 0x40018 +; GFX9-NEXT: s_bfe_i32 s13, s3, 0x40014 +; GFX9-NEXT: s_bfe_i32 s14, s3, 0x40010 +; GFX9-NEXT: s_bfe_i32 s15, s3, 0x4000c +; GFX9-NEXT: s_bfe_i32 s16, s3, 0x40008 +; GFX9-NEXT: s_bfe_i32 s17, s3, 0x40004 +; GFX9-NEXT: s_bfe_i32 s3, s3, 0x40000 +; GFX9-NEXT: s_bfe_i32 s5, s2, 0x40018 +; GFX9-NEXT: s_bfe_i32 s6, s2, 0x40014 +; GFX9-NEXT: s_bfe_i32 s7, s2, 0x40010 +; GFX9-NEXT: s_bfe_i32 s8, s2, 0x4000c +; GFX9-NEXT: s_bfe_i32 s9, s2, 0x40008 +; GFX9-NEXT: s_bfe_i32 s10, s2, 0x40004 +; GFX9-NEXT: s_bfe_i32 s2, s2, 0x40000 +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_mov_b32_e32 v1, s18 ; GFX9-NEXT: v_mad_i32_i24 v0, s2, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v1, s34 -; GFX9-NEXT: v_mad_i32_i24 v0, s18, v1, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, s30 -; GFX9-NEXT: v_mad_i32_i24 v0, s16, v1, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, s28 -; GFX9-NEXT: v_mad_i32_i24 v0, s12, v1, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, s26 +; GFX9-NEXT: v_mov_b32_e32 v1, s17 ; GFX9-NEXT: v_mad_i32_i24 v0, s10, v1, v0 -; GFX9-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 -; GFX9-NEXT: v_mov_b32_e32 v1, s24 +; GFX9-NEXT: v_mov_b32_e32 v1, s16 +; GFX9-NEXT: v_mad_i32_i24 v0, s9, v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, s15 ; GFX9-NEXT: v_mad_i32_i24 v0, s8, v1, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, s22 -; GFX9-NEXT: v_mad_i32_i24 v0, s14, v1, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, s20 -; GFX9-NEXT: v_mad_i32_i24 v2, s6, v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, s14 +; GFX9-NEXT: v_mad_i32_i24 v0, s7, v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-NEXT: v_mad_i32_i24 v0, s6, v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, s12 +; GFX9-NEXT: v_mad_i32_i24 v0, s5, v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, s11 +; GFX9-NEXT: v_mad_i32_i24 v2, s4, v1, v0 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_store_dword v[0:1], v2, off @@ -1432,58 +1387,13 @@ define amdgpu_kernel void @idot8_acc32_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_ashr_i64 s[6:7], s[2:3], 60 -; GFX9-DL-NEXT: s_lshl_b32 s7, s3, 4 -; GFX9-DL-NEXT: s_ashr_i64 s[14:15], s[6:7], 60 -; GFX9-DL-NEXT: s_lshl_b32 s7, s3, 20 -; GFX9-DL-NEXT: s_lshl_b32 s9, s3, 8 -; GFX9-DL-NEXT: s_lshl_b32 s11, s3, 12 -; GFX9-DL-NEXT: s_lshl_b32 s13, s3, 16 -; GFX9-DL-NEXT: s_ashr_i64 s[16:17], s[6:7], 60 -; GFX9-DL-NEXT: s_lshl_b32 s7, s3, 24 -; GFX9-DL-NEXT: s_lshl_b32 s3, s3, 28 -; GFX9-DL-NEXT: s_ashr_i64 s[2:3], s[2:3], 60 -; GFX9-DL-NEXT: s_lshl_b32 s3, s5, 4 -; GFX9-DL-NEXT: s_ashr_i64 s[22:23], s[2:3], 60 -; GFX9-DL-NEXT: s_lshl_b32 s3, s5, 8 -; GFX9-DL-NEXT: s_ashr_i64 s[24:25], s[2:3], 60 -; GFX9-DL-NEXT: s_lshl_b32 s3, s5, 12 -; GFX9-DL-NEXT: s_ashr_i64 s[26:27], s[2:3], 60 -; GFX9-DL-NEXT: s_lshl_b32 s3, s5, 16 -; GFX9-DL-NEXT: s_ashr_i64 s[28:29], s[2:3], 60 -; GFX9-DL-NEXT: s_lshl_b32 s3, s5, 20 -; GFX9-DL-NEXT: s_ashr_i64 s[30:31], s[2:3], 60 -; GFX9-DL-NEXT: s_lshl_b32 s3, s5, 24 -; GFX9-DL-NEXT: s_ashr_i64 s[34:35], s[2:3], 60 -; GFX9-DL-NEXT: s_lshl_b32 s3, s5, 28 -; GFX9-DL-NEXT: s_ashr_i64 s[20:21], s[4:5], 60 -; GFX9-DL-NEXT: s_ashr_i64 s[4:5], s[2:3], 60 +; GFX9-DL-NEXT: s_load_dword s2, s[6:7], 0x0 ; GFX9-DL-NEXT: s_load_dword s3, s[0:1], 0x0 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-DL-NEXT: s_ashr_i64 s[18:19], s[6:7], 60 -; GFX9-DL-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX9-DL-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 +; GFX9-DL-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s2, v0, v1 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s34 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s18, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s30 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s16, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s28 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s12, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s26 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s10, v1, v0 -; GFX9-DL-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s24 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s8, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s22 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s14, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s20 -; GFX9-DL-NEXT: v_mad_i32_i24 v2, s6, v1, v0 +; GFX9-DL-NEXT: v_dot8_i32_i4 v2, s4, v0, v1 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off @@ -1491,55 +1401,18 @@ define amdgpu_kernel void @idot8_acc32_vecMul(<8 x i4> addrspace(1)* %src1, ; ; GFX10-DL-LABEL: idot8_acc32_vecMul: ; GFX10-DL: ; %bb.0: ; %entry -; GFX10-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX10-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; GFX10-DL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34 +; GFX10-DL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX10-DL-NEXT: ; implicit-def: $vcc_hi ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-DL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX10-DL-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX10-DL-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-DL-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX10-DL-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX10-DL-NEXT: s_load_dword s1, s[2:3], 0x0 ; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-DL-NEXT: s_lshl_b32 s7, s3, 28 -; GFX10-DL-NEXT: s_lshl_b32 s9, s5, 28 -; GFX10-DL-NEXT: s_lshl_b32 s11, s3, 24 -; GFX10-DL-NEXT: s_lshl_b32 s13, s5, 24 -; GFX10-DL-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-DL-NEXT: s_ashr_i64 s[6:7], s[6:7], 60 -; GFX10-DL-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 -; GFX10-DL-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 -; GFX10-DL-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX10-DL-NEXT: s_lshl_b32 s7, s3, 20 -; GFX10-DL-NEXT: s_lshl_b32 s9, s5, 20 -; GFX10-DL-NEXT: v_mad_i32_i24 v0, s6, s8, v0 -; GFX10-DL-NEXT: s_lshl_b32 s11, s3, 16 -; GFX10-DL-NEXT: s_lshl_b32 s13, s5, 16 -; GFX10-DL-NEXT: s_ashr_i64 s[6:7], s[6:7], 60 -; GFX10-DL-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 -; GFX10-DL-NEXT: v_mad_i32_i24 v0, s10, s12, v0 -; GFX10-DL-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 -; GFX10-DL-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX10-DL-NEXT: s_lshl_b32 s7, s3, 12 -; GFX10-DL-NEXT: s_lshl_b32 s9, s5, 12 -; GFX10-DL-NEXT: v_mad_i32_i24 v0, s6, s8, v0 -; GFX10-DL-NEXT: s_lshl_b32 s11, s3, 8 -; GFX10-DL-NEXT: s_lshl_b32 s13, s5, 8 -; GFX10-DL-NEXT: s_ashr_i64 s[6:7], s[6:7], 60 -; GFX10-DL-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 -; GFX10-DL-NEXT: v_mad_i32_i24 v0, s10, s12, v0 -; GFX10-DL-NEXT: s_lshl_b32 s7, s3, 4 -; GFX10-DL-NEXT: s_lshl_b32 s9, s5, 4 -; GFX10-DL-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 -; GFX10-DL-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX10-DL-NEXT: v_mad_i32_i24 v0, s6, s8, v0 -; GFX10-DL-NEXT: s_ashr_i64 s[6:7], s[6:7], 60 -; GFX10-DL-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 -; GFX10-DL-NEXT: s_ashr_i64 s[2:3], s[2:3], 60 -; GFX10-DL-NEXT: s_ashr_i64 s[4:5], s[4:5], 60 -; GFX10-DL-NEXT: v_mad_i32_i24 v0, s10, s12, v0 -; GFX10-DL-NEXT: v_mad_i32_i24 v0, s6, s8, v0 -; GFX10-DL-NEXT: v_mad_i32_i24 v2, s2, s4, v0 -; GFX10-DL-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-DL-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-DL-NEXT: v_dot8_i32_i4 v2, s0, s1, v0 +; GFX10-DL-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-DL-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, @@ -1642,60 +1515,46 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshl_b32 s27, s3, 28 -; GFX8-NEXT: s_ashr_i64 s[16:17], s[2:3], 60 -; GFX8-NEXT: s_lshl_b32 s19, s3, 8 -; GFX8-NEXT: s_lshl_b32 s21, s3, 12 -; GFX8-NEXT: s_lshl_b32 s15, s1, 28 -; GFX8-NEXT: s_lshl_b32 s23, s3, 16 -; GFX8-NEXT: s_lshl_b32 s25, s3, 24 -; GFX8-NEXT: s_lshl_b32 s17, s3, 4 -; GFX8-NEXT: s_lshl_b32 s3, s3, 20 -; GFX8-NEXT: s_ashr_i64 s[4:5], s[0:1], 60 -; GFX8-NEXT: s_ashr_i64 s[26:27], s[26:27], 60 -; GFX8-NEXT: s_lshl_b32 s7, s1, 8 -; GFX8-NEXT: s_lshl_b32 s9, s1, 12 -; GFX8-NEXT: s_lshl_b32 s11, s1, 16 -; GFX8-NEXT: s_lshl_b32 s13, s1, 24 -; GFX8-NEXT: s_lshl_b32 s5, s1, 4 -; GFX8-NEXT: s_lshl_b32 s1, s1, 20 -; GFX8-NEXT: s_ashr_i64 s[24:25], s[24:25], 60 -; GFX8-NEXT: s_ashr_i64 s[2:3], s[2:3], 60 -; GFX8-NEXT: s_ashr_i64 s[14:15], s[14:15], 60 -; GFX8-NEXT: v_mov_b32_e32 v4, s26 -; GFX8-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX8-NEXT: s_ashr_i64 s[0:1], s[0:1], 60 -; GFX8-NEXT: v_mov_b32_e32 v3, s2 -; GFX8-NEXT: v_mov_b32_e32 v5, s24 -; GFX8-NEXT: s_ashr_i64 s[22:23], s[22:23], 60 +; GFX8-NEXT: s_bfe_i32 s8, s0, 0x40000 +; GFX8-NEXT: s_bfe_i32 s15, s1, 0x40000 +; GFX8-NEXT: s_bfe_i32 s10, s1, 0x40018 +; GFX8-NEXT: s_bfe_i32 s11, s1, 0x40014 +; GFX8-NEXT: s_bfe_i32 s12, s1, 0x40010 +; GFX8-NEXT: s_bfe_i32 s13, s1, 0x4000c +; GFX8-NEXT: s_bfe_i32 s14, s1, 0x40004 +; GFX8-NEXT: s_ashr_i32 s9, s1, 28 +; GFX8-NEXT: s_bfe_i32 s1, s1, 0x40008 +; GFX8-NEXT: v_mov_b32_e32 v4, s15 +; GFX8-NEXT: s_ashr_i32 s2, s0, 28 +; GFX8-NEXT: s_bfe_i32 s3, s0, 0x40018 +; GFX8-NEXT: s_bfe_i32 s4, s0, 0x40014 +; GFX8-NEXT: s_bfe_i32 s5, s0, 0x40010 +; GFX8-NEXT: s_bfe_i32 s6, s0, 0x4000c +; GFX8-NEXT: s_bfe_i32 s7, s0, 0x40004 +; GFX8-NEXT: s_bfe_i32 s0, s0, 0x40008 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mov_b32_e32 v5, s14 ; GFX8-NEXT: v_mul_i32_i24_e32 v3, s0, v3 -; GFX8-NEXT: s_ashr_i64 s[20:21], s[20:21], 60 -; GFX8-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 -; GFX8-NEXT: v_mov_b32_e32 v6, s22 -; GFX8-NEXT: s_ashr_i64 s[18:19], s[18:19], 60 -; GFX8-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 -; GFX8-NEXT: v_mov_b32_e32 v7, s20 -; GFX8-NEXT: s_ashr_i64 s[30:31], s[16:17], 60 -; GFX8-NEXT: s_ashr_i64 s[6:7], s[6:7], 60 -; GFX8-NEXT: v_mov_b32_e32 v8, s18 -; GFX8-NEXT: s_ashr_i64 s[28:29], s[4:5], 60 -; GFX8-NEXT: v_mov_b32_e32 v9, s30 +; GFX8-NEXT: v_mov_b32_e32 v6, s13 +; GFX8-NEXT: v_mov_b32_e32 v7, s12 +; GFX8-NEXT: v_mov_b32_e32 v8, s11 +; GFX8-NEXT: v_mov_b32_e32 v9, s10 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_i32_i24 v2, s14, v4, v2 -; GFX8-NEXT: v_mad_i32_i24 v2, s12, v5, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s8, v4, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s7, v5, v2 ; GFX8-NEXT: v_add_u32_sdwa v2, vcc, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 -; GFX8-NEXT: v_mad_i32_i24 v2, s10, v6, v2 -; GFX8-NEXT: v_mad_i32_i24 v2, s8, v7, v2 -; GFX8-NEXT: v_mad_i32_i24 v2, s6, v8, v2 -; GFX8-NEXT: v_mad_i32_i24 v2, s28, v9, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s16 -; GFX8-NEXT: v_mad_i32_i24 v2, s4, v3, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s6, v6, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s5, v7, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s4, v8, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s3, v9, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s9 +; GFX8-NEXT: v_mad_i32_i24 v2, s2, v3, v2 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -2021,83 +1880,69 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s33, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] ; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 +; GFX8-NEXT: s_mov_b32 s0, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshl_b32 s11, s1, 24 -; GFX8-NEXT: s_lshl_b32 s15, s1, 16 -; GFX8-NEXT: s_ashr_i64 s[20:21], s[2:3], 60 -; GFX8-NEXT: s_lshl_b32 s23, s3, 24 -; GFX8-NEXT: s_lshl_b32 s25, s3, 28 -; GFX8-NEXT: s_lshl_b32 s27, s3, 16 -; GFX8-NEXT: s_ashr_i64 s[8:9], s[0:1], 60 -; GFX8-NEXT: s_lshl_b32 s13, s1, 28 -; GFX8-NEXT: s_lshl_b32 s17, s3, 8 -; GFX8-NEXT: s_lshl_b32 s19, s3, 12 -; GFX8-NEXT: s_lshl_b32 s21, s3, 4 -; GFX8-NEXT: s_lshl_b32 s3, s3, 20 -; GFX8-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 -; GFX8-NEXT: s_ashr_i64 s[14:15], s[14:15], 60 -; GFX8-NEXT: s_ashr_i64 s[22:23], s[22:23], 60 -; GFX8-NEXT: s_ashr_i64 s[24:25], s[24:25], 60 -; GFX8-NEXT: s_ashr_i64 s[26:27], s[26:27], 60 -; GFX8-NEXT: s_lshl_b32 s5, s1, 8 -; GFX8-NEXT: s_lshl_b32 s7, s1, 12 -; GFX8-NEXT: s_lshl_b32 s9, s1, 4 -; GFX8-NEXT: s_lshl_b32 s1, s1, 20 -; GFX8-NEXT: s_ashr_i64 s[2:3], s[2:3], 60 -; GFX8-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX8-NEXT: v_mov_b32_e32 v6, s26 +; GFX8-NEXT: s_bfe_i32 s7, s1, 0x40004 +; GFX8-NEXT: s_bfe_i32 s9, s1, 0x4000c +; GFX8-NEXT: s_bfe_i32 s14, s2, 0x40004 +; GFX8-NEXT: s_bfe_i32 s15, s2, 0x40000 +; GFX8-NEXT: s_bfe_i32 s16, s2, 0x4000c +; GFX8-NEXT: s_bfe_i32 s3, s1, 0x40014 +; GFX8-NEXT: s_ashr_i32 s5, s1, 28 +; GFX8-NEXT: s_bfe_i32 s10, s2, 0x40014 +; GFX8-NEXT: s_bfe_i32 s11, s2, 0x40010 +; GFX8-NEXT: s_ashr_i32 s12, s2, 28 +; GFX8-NEXT: s_bfe_i32 s13, s2, 0x40018 +; GFX8-NEXT: s_bfe_i32 s2, s2, 0x40008 +; GFX8-NEXT: s_bfe_i32 s8, s1, 0x40000 +; GFX8-NEXT: v_mov_b32_e32 v4, s16 +; GFX8-NEXT: v_mov_b32_e32 v5, s9 +; GFX8-NEXT: v_mov_b32_e32 v6, s15 ; GFX8-NEXT: v_mov_b32_e32 v7, s14 -; GFX8-NEXT: v_mov_b32_e32 v8, s24 -; GFX8-NEXT: v_mov_b32_e32 v9, s22 -; GFX8-NEXT: v_mov_b32_e32 v10, s10 -; GFX8-NEXT: v_mul_i32_i24_sdwa v6, v7, v6 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_mul_i32_i24_e32 v7, s12, v8 -; GFX8-NEXT: v_mul_i32_i24_sdwa v8, v10, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: s_ashr_i64 s[0:1], s[0:1], 60 -; GFX8-NEXT: v_mov_b32_e32 v5, s2 -; GFX8-NEXT: v_mul_i32_i24_e32 v5, s0, v5 -; GFX8-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: s_ashr_i64 s[4:5], s[4:5], 60 -; GFX8-NEXT: s_ashr_i64 s[16:17], s[16:17], 60 -; GFX8-NEXT: v_or_b32_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v6, s33, v7 -; GFX8-NEXT: s_ashr_i64 s[18:19], s[18:19], 60 -; GFX8-NEXT: v_mov_b32_e32 v3, s20 -; GFX8-NEXT: v_mov_b32_e32 v4, s8 -; GFX8-NEXT: s_ashr_i64 s[30:31], s[20:21], 60 -; GFX8-NEXT: v_mul_i32_i24_sdwa v3, v4, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v5, v6, v5 -; GFX8-NEXT: s_ashr_i64 s[6:7], s[6:7], 60 -; GFX8-NEXT: v_mov_b32_e32 v4, s18 -; GFX8-NEXT: v_mov_b32_e32 v12, s16 -; GFX8-NEXT: v_mov_b32_e32 v13, s4 -; GFX8-NEXT: s_ashr_i64 s[28:29], s[8:9], 60 -; GFX8-NEXT: v_mov_b32_e32 v11, s30 -; GFX8-NEXT: v_mul_i32_i24_e32 v4, s6, v4 -; GFX8-NEXT: v_mul_i32_i24_sdwa v10, v13, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 8, v5 -; GFX8-NEXT: v_or_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_mul_i32_i24_e32 v9, s28, v11 -; GFX8-NEXT: v_or_b32_sdwa v3, v9, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v4, s33, v4 -; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v3 +; GFX8-NEXT: v_mov_b32_e32 v8, s7 +; GFX8-NEXT: v_mul_i32_i24_sdwa v4, v5, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_mul_i32_i24_e32 v5, s8, v6 +; GFX8-NEXT: v_mul_i32_i24_sdwa v6, v8, v7 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: s_bfe_i32 s4, s1, 0x40010 +; GFX8-NEXT: s_bfe_i32 s6, s1, 0x40018 +; GFX8-NEXT: v_mov_b32_e32 v9, s13 +; GFX8-NEXT: s_bfe_i32 s1, s1, 0x40008 +; GFX8-NEXT: v_mov_b32_e32 v3, s2 +; GFX8-NEXT: v_mov_b32_e32 v10, s12 +; GFX8-NEXT: v_mov_b32_e32 v11, s5 +; GFX8-NEXT: v_mov_b32_e32 v12, s11 +; GFX8-NEXT: v_mov_b32_e32 v13, s10 +; GFX8-NEXT: v_mov_b32_e32 v14, s3 +; GFX8-NEXT: v_mul_i32_i24_e32 v3, s1, v3 +; GFX8-NEXT: v_or_b32_sdwa v5, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_mul_i32_i24_e32 v7, s6, v9 +; GFX8-NEXT: v_mul_i32_i24_sdwa v8, v11, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_mul_i32_i24_e32 v9, s4, v12 +; GFX8-NEXT: v_mul_i32_i24_sdwa v10, v14, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_and_b32_e32 v5, s0, v5 +; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v9, v9, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_e32 v4, s0, v9 +; GFX8-NEXT: v_or_b32_e32 v3, v5, v3 +; GFX8-NEXT: v_or_b32_e32 v6, v4, v7 +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 8, v3 +; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v6 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v7, v2 -; GFX8-NEXT: v_add_u32_sdwa v2, vcc, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_0 -; GFX8-NEXT: v_add_u32_sdwa v2, vcc, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_add_u32_sdwa v2, vcc, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_0 +; GFX8-NEXT: v_add_u32_sdwa v2, vcc, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; GFX8-NEXT: v_add_u32_sdwa v2, vcc, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_add_u32_sdwa v2, vcc, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_add_u32_sdwa v2, vcc, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_add_u32_sdwa v2, vcc, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD ; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll index d4084e40fc8fc..a0a78e96b920a 100644 --- a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll @@ -3,7 +3,7 @@ ; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_hi16 $vgpr0_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}} define void @csr() #0 { - call void asm sideeffect "", "~{v0},~{v36},~{v37}"() #0 + call void asm sideeffect "", "~{v0},~{v44},~{v45}"() #0 ret void } @@ -24,13 +24,13 @@ define void @nothing() #0 { ret void } -; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 {{$}} +; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 $m0_hi16 $m0_lo16 {{$}} define void @special_regs() #0 { call void asm sideeffect "", "~{m0},~{scc}"() #0 ret void } -; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo {{$}} +; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo $vcc_hi_hi16 $vcc_hi_lo16 $vcc_lo_hi16 $vcc_lo_lo16 {{$}} define void @vcc() #0 { call void asm sideeffect "", "~{vcc}"() #0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll index 192dd233eb201..8a358ee59c963 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll @@ -19,7 +19,6 @@ define amdgpu_ps half @image_sample_2d_f16(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX81-NEXT: s_mov_b64 s[12:13], exec ; GFX81-NEXT: s_wqm_b64 exec, exec ; GFX81-NEXT: s_and_b64 exec, exec, s[12:13] -; GFX81-NEXT: s_nop 0 ; GFX81-NEXT: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16 ; GFX81-NEXT: s_waitcnt vmcnt(0) ; GFX81-NEXT: ; return to shader part epilog @@ -73,12 +72,9 @@ define amdgpu_ps half @image_sample_2d_f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> ; GFX81-NEXT: v_mov_b32_e32 v5, s13 ; GFX81-NEXT: v_mov_b32_e32 v3, v2 ; GFX81-NEXT: s_and_b64 exec, exec, s[14:15] -; GFX81-NEXT: s_nop 0 ; GFX81-NEXT: image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 tfe d16 ; GFX81-NEXT: s_waitcnt vmcnt(0) ; GFX81-NEXT: v_mov_b32_e32 v0, v2 -; GFX81-NEXT: s_nop 0 -; GFX81-NEXT: s_nop 0 ; GFX81-NEXT: flat_store_dword v[4:5], v3 ; GFX81-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX81-NEXT: ; return to shader part epilog @@ -173,8 +169,6 @@ define amdgpu_ps <2 x float> @image_sample_c_d_1d_v2f16_tfe(<8 x i32> inreg %rsr ; GFX81: ; %bb.0: ; %main_body ; GFX81-NEXT: v_mov_b32_e32 v4, 0 ; GFX81-NEXT: v_mov_b32_e32 v5, v4 -; GFX81-NEXT: s_nop 0 -; GFX81-NEXT: s_nop 0 ; GFX81-NEXT: image_sample_c_d v[4:5], v[0:3], s[0:7], s[8:11] dmask:0x3 tfe d16 ; GFX81-NEXT: s_waitcnt vmcnt(0) ; GFX81-NEXT: v_mov_b32_e32 v0, v4 @@ -231,7 +225,6 @@ define amdgpu_ps <2 x float> @image_sample_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 ; GFX81-NEXT: s_mov_b64 s[12:13], exec ; GFX81-NEXT: s_wqm_b64 exec, exec ; GFX81-NEXT: s_and_b64 exec, exec, s[12:13] -; GFX81-NEXT: s_nop 0 ; GFX81-NEXT: image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0xf d16 ; GFX81-NEXT: s_waitcnt vmcnt(0) ; GFX81-NEXT: ; return to shader part epilog @@ -288,7 +281,6 @@ define amdgpu_ps <4 x float> @image_sample_b_2d_v4f16_tfe(<8 x i32> inreg %rsrc, ; GFX81-NEXT: v_mov_b32_e32 v4, v3 ; GFX81-NEXT: v_mov_b32_e32 v5, v3 ; GFX81-NEXT: s_and_b64 exec, exec, s[12:13] -; GFX81-NEXT: s_nop 0 ; GFX81-NEXT: image_sample_b v[3:5], v[0:2], s[0:7], s[8:11] dmask:0xf tfe d16 ; GFX81-NEXT: s_waitcnt vmcnt(0) ; GFX81-NEXT: v_mov_b32_e32 v0, v3 diff --git a/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir new file mode 100644 index 0000000000000..074f5de9224e6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir @@ -0,0 +1,36 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass postrapseudos -amdgpu-fix-16-bit-physreg-copies -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: lo16_to_v32 +# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec +name: lo16_to_v32 +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr1_lo16 = COPY $vgpr0 + S_ENDPGM 0 +... + +# GCN-LABEL: name: v32_to_lo16 +# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec +name: v32_to_lo16 +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr1 = COPY $vgpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: name: samereg +# GCN: $vgpr0 = IMPLICIT_DEF +# GCN-NEXT: BUNDLE +# GCN-NEXT: S_ENDPGM +name: samereg +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr0 = COPY $vgpr0_lo16 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir index 40bfd60e8ccf5..7ca9106ae4777 100644 --- a/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir @@ -29,3 +29,27 @@ body: | $vgpr1_hi16 = COPY killed $sgpr0_lo16 S_ENDPGM 0 ... + +# GCN-LABEL: {{^}}lo_to_lo_illegal_agpr_to_sgpr: +# GCN: ; illegal copy a0.l to s1.l +# ERR: error: :0:0: in function lo_to_lo_illegal_agpr_to_sgpr void (): illegal SGPR to VGPR copy +name: lo_to_lo_illegal_agpr_to_sgpr +tracksRegLiveness: true +body: | + bb.0: + $agpr0 = IMPLICIT_DEF + $sgpr1_lo16 = COPY $agpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_hi_vgpr_to_agpr: +# GCN: ; illegal copy v0.h to a1.l +# ERR: error: :0:0: in function lo_to_hi_vgpr_to_agpr void (): Cannot use hi16 subreg with an AGPR! +name: lo_to_hi_vgpr_to_agpr +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $agpr1_lo16 = COPY killed $vgpr0_hi16 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-agpr.mir new file mode 100644 index 0000000000000..3e2cf90d26993 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-agpr.mir @@ -0,0 +1,52 @@ +# RUN: llc -march=amdgcn -mcpu=gfx908 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: {{^}}lo_to_lo_agpr_to_agpr: +# GCN: v_accvgpr_read_b32 [[TMP:v[0-9]+]], a0 +# GCN-NEXT: s_nop 1 +# GCN-NEXT: v_accvgpr_write_b32 a1, [[TMP]] +name: lo_to_lo_agpr_to_agpr +tracksRegLiveness: true +body: | + bb.0: + $agpr0 = IMPLICIT_DEF + $agpr1_lo16 = COPY $agpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_lo_samereg: +# GCN: s_waitcnt +# GCN-NEXT: s_endpgm +name: lo_to_lo_samereg +tracksRegLiveness: true +body: | + bb.0: + $agpr0 = IMPLICIT_DEF + $agpr0_lo16 = COPY $agpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_lo_undef_agpr_to_agpr: +# GCN: v_accvgpr_read_b32 [[TMP:v[0-9]+]], a1 +# GCN-NEXT: s_nop 1 +# GCN-NEXT: v_accvgpr_write_b32 a2, [[TMP]] +name: lo_to_lo_undef_agpr_to_agpr +tracksRegLiveness: true +body: | + bb.0: + $agpr1_lo16 = COPY undef $agpr0_lo16 + $agpr2 = COPY killed $agpr1 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_agpr: +# GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], s0 +# GCN-NEXT: s_nop 1 +# GCN-NEXT: v_accvgpr_write_b32 a1, [[TMP]] +name: lo_to_lo_sgpr_to_agpr +tracksRegLiveness: true +body: | + bb.0: + $sgpr0 = IMPLICIT_DEF + $agpr1_lo16 = COPY $sgpr0_lo16 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/max.i16.ll b/llvm/test/CodeGen/AMDGPU/max.i16.ll index 13ade66bf8ff9..7f7f2bae57aea 100644 --- a/llvm/test/CodeGen/AMDGPU/max.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/max.i16.ll @@ -137,21 +137,21 @@ define amdgpu_kernel void @v_test_imax_sge_v3i16(<3 x i16> addrspace(1)* %out, < ; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc ; VI-NEXT: v_add_u32_e32 v6, vcc, 4, v0 ; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc -; VI-NEXT: flat_load_ushort v8, v[6:7] -; VI-NEXT: flat_load_dword v9, v[0:1] +; VI-NEXT: flat_load_ushort v6, v[6:7] +; VI-NEXT: flat_load_dword v7, v[0:1] ; VI-NEXT: v_add_u32_e32 v0, vcc, 4, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc ; VI-NEXT: flat_load_ushort v0, v[0:1] -; VI-NEXT: flat_load_dword v1, v[2:3] -; VI-NEXT: v_add_u32_e32 v6, vcc, 4, v4 -; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v5, vcc +; VI-NEXT: flat_load_dword v8, v[2:3] +; VI-NEXT: v_add_u32_e32 v2, vcc, 4, v4 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v5, vcc ; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; VI-NEXT: v_max_i16_e32 v0, v8, v0 +; VI-NEXT: v_max_i16_e32 v0, v6, v0 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_max_i16_e32 v2, v9, v1 -; VI-NEXT: v_max_i16_sdwa v1, v9, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: v_or_b32_e32 v1, v2, v1 -; VI-NEXT: flat_store_short v[6:7], v0 +; VI-NEXT: v_max_i16_e32 v1, v7, v8 +; VI-NEXT: v_max_i16_sdwa v7, v7, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_or_b32_e32 v1, v1, v7 +; VI-NEXT: flat_store_short v[2:3], v0 ; VI-NEXT: flat_store_dword v[4:5], v1 ; VI-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll index 15a8e9283e388..5a435f01925c6 100644 --- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll +++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll @@ -8,16 +8,13 @@ define amdgpu_kernel void @vector_clause(<4 x i32> addrspace(1)* noalias nocaptu ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c ; GCN-NEXT: v_mov_b32_e32 v17, 0 ; GCN-NEXT: v_lshlrev_b32_e32 v16, 4, v0 -; GCN-NEXT: s_nop 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_load_dwordx4 v[0:3], v[16:17], s[2:3] ; GCN-NEXT: global_load_dwordx4 v[4:7], v[16:17], s[2:3] offset:16 ; GCN-NEXT: global_load_dwordx4 v[8:11], v[16:17], s[2:3] offset:32 ; GCN-NEXT: global_load_dwordx4 v[12:15], v[16:17], s[2:3] offset:48 ; GCN-NEXT: s_nop 0 ; GCN-NEXT: s_waitcnt vmcnt(3) -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_store_dwordx4 v[16:17], v[0:3], s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(3) ; GCN-NEXT: global_store_dwordx4 v[16:17], v[4:7], s[4:5] offset:16 @@ -74,8 +71,6 @@ define amdgpu_kernel void @scalar_clause(<4 x i32> addrspace(1)* noalias nocaptu ; GCN-NEXT: v_mov_b32_e32 v5, s5 ; GCN-NEXT: v_mov_b32_e32 v6, s6 ; GCN-NEXT: v_mov_b32_e32 v7, s7 -; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_store_dwordx4 v[12:13], v[0:3], off ; GCN-NEXT: global_store_dwordx4 v[12:13], v[4:7], off offset:16 ; GCN-NEXT: v_mov_b32_e32 v0, s12 @@ -85,8 +80,6 @@ define amdgpu_kernel void @scalar_clause(<4 x i32> addrspace(1)* noalias nocaptu ; GCN-NEXT: v_mov_b32_e32 v1, s13 ; GCN-NEXT: v_mov_b32_e32 v2, s14 ; GCN-NEXT: v_mov_b32_e32 v3, s15 -; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_store_dwordx4 v[12:13], v[8:11], off offset:32 ; GCN-NEXT: global_store_dwordx4 v[12:13], v[0:3], off offset:48 ; GCN-NEXT: s_endpgm @@ -116,8 +109,6 @@ define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %ar ; GCN-NEXT: v_lshlrev_b32_e32 v2, 4, v2 ; GCN-NEXT: v_add_u32_e32 v0, v0, v2 ; GCN-NEXT: v_add_u32_e32 v1, v1, v2 -; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_nop 0 ; GCN-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:20 ; GCN-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:24 ; GCN-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:28 @@ -134,11 +125,9 @@ define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %ar ; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 ; GCN-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12 ; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_nop 0 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:16 ; GCN-NEXT: s_nop 0 ; GCN-NEXT: s_waitcnt vmcnt(4) -; GCN-NEXT: s_nop 0 ; GCN-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(4) ; GCN-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 @@ -192,20 +181,15 @@ define amdgpu_kernel void @vector_clause_indirect(i64 addrspace(1)* noalias noca ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GCN-NEXT: s_nop 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_load_dwordx2 v[8:9], v[0:1], s[2:3] ; GCN-NEXT: s_nop 0 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_load_dwordx4 v[0:3], v[8:9], off ; GCN-NEXT: global_load_dwordx4 v[4:7], v[8:9], off offset:16 ; GCN-NEXT: v_mov_b32_e32 v9, s5 ; GCN-NEXT: v_mov_b32_e32 v8, s4 -; GCN-NEXT: s_nop 0 ; GCN-NEXT: s_waitcnt vmcnt(1) -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_store_dwordx4 v[8:9], v[0:3], off ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: global_store_dwordx4 v[8:9], v[4:7], off offset:16 @@ -230,15 +214,11 @@ define void @load_global_d16_hi(i16 addrspace(1)* %in, i16 %reg, <2 x i16> addrs ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v5, v2 -; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_load_short_d16_hi v5, v[0:1], off ; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_load_short_d16_hi v2, v[0:1], off offset:64 ; GCN-NEXT: s_nop 0 ; GCN-NEXT: s_waitcnt vmcnt(1) -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_store_dword v[3:4], v5, off ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: global_store_dword v[3:4], v2, off offset:128 @@ -263,15 +243,11 @@ define void @load_global_d16_lo(i16 addrspace(1)* %in, i32 %reg, <2 x i16> addrs ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v5, v2 -; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_load_short_d16 v5, v[0:1], off ; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_load_short_d16 v2, v[0:1], off offset:64 ; GCN-NEXT: s_nop 0 ; GCN-NEXT: s_waitcnt vmcnt(1) -; GCN-NEXT: s_nop 0 ; GCN-NEXT: global_store_dword v[3:4], v5, off ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: global_store_dword v[3:4], v2, off offset:128 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll index 64206d4522803..5cbcba17931bf 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -16,7 +16,7 @@ ; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v[2:3] ; W64: s_and_b64 [[CMP:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]] ; W64: s_and_saveexec_b64 [[CMP]], [[CMP]] -; W64: s_waitcnt vmcnt(0) +; W64: s_nop 0 ; W64: buffer_load_format_x [[RES:v[0-9]+]], v4, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen ; W64: s_xor_b64 exec, exec, [[CMP]] ; W64: s_cbranch_execnz [[LOOPBB]] @@ -34,7 +34,7 @@ ; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v[2:3] ; W32: s_and_b32 [[CMP:s[0-9]+]], vcc_lo, [[CMP0]] ; W32: s_and_saveexec_b32 [[CMP]], [[CMP]] -; W32: s_waitcnt vmcnt(0) +; W32: s_nop 0 ; W32: buffer_load_format_x [[RES:v[0-9]+]], v4, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen ; W32: s_xor_b32 exec_lo, exec_lo, [[CMP]] ; W32: s_cbranch_execnz [[LOOPBB]] @@ -59,7 +59,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v[2:3] ; W64: s_and_b64 [[CMP:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]] ; W64: s_and_saveexec_b64 [[CMP]], [[CMP]] -; W64: s_waitcnt vmcnt(0) +; W64: s_nop 0 ; W64: buffer_load_format_x [[RES0:v[0-9]+]], v8, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen ; W64: s_xor_b64 exec, exec, [[CMP]] ; W64: s_cbranch_execnz [[LOOPBB0]] @@ -77,7 +77,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v[6:7] ; W64: s_and_b64 [[CMP:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]] ; W64: s_and_saveexec_b64 [[CMP]], [[CMP]] -; W64: s_waitcnt vmcnt(0) +; W64: s_nop 0 ; W64: buffer_load_format_x [[RES1:v[0-9]+]], v8, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen ; W64: s_xor_b64 exec, exec, [[CMP]] ; W64: s_cbranch_execnz [[LOOPBB1]] @@ -99,7 +99,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v[2:3] ; W32: s_and_b32 [[CMP:s[0-9]+]], vcc_lo, [[CMP0]] ; W32: s_and_saveexec_b32 [[CMP]], [[CMP]] -; W32: s_waitcnt vmcnt(0) +; W32: s_nop 0 ; W32: buffer_load_format_x [[RES0:v[0-9]+]], v8, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen ; W32: s_xor_b32 exec_lo, exec_lo, [[CMP]] ; W32: s_cbranch_execnz [[LOOPBB0]] @@ -117,7 +117,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v[6:7] ; W32: s_and_b32 [[CMP:s[0-9]+]], vcc_lo, [[CMP0]] ; W32: s_and_saveexec_b32 [[CMP]], [[CMP]] -; W32: s_waitcnt vmcnt(0) +; W32: s_nop 0 ; W32: buffer_load_format_x [[RES1:v[0-9]+]], v8, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen ; W32: s_xor_b32 exec_lo, exec_lo, [[CMP]] ; W32: s_cbranch_execnz [[LOOPBB1]] @@ -150,7 +150,7 @@ entry: ; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v[2:3] ; W64: s_and_b64 [[CMP:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]] ; W64: s_and_saveexec_b64 [[CMP]], [[CMP]] -; W64: s_waitcnt vmcnt(0) +; W64: s_nop 0 ; W64: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen ; W64: s_xor_b64 exec, exec, [[CMP]] ; W64: s_cbranch_execnz [[LOOPBB0]] @@ -171,7 +171,7 @@ entry: ; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v[6:7] ; W64: s_and_b64 [[CMP:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]] ; W64: s_and_saveexec_b64 [[CMP]], [[CMP]] -; W64: s_waitcnt vmcnt(0) +; W64: s_nop 0 ; W64: buffer_load_format_x [[RES]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen ; W64: s_xor_b64 exec, exec, [[CMP]] ; W64: s_cbranch_execnz [[LOOPBB1]] @@ -196,7 +196,7 @@ entry: ; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v[2:3] ; W32: s_and_b32 [[CMP:s[0-9]+]], vcc_lo, [[CMP0]] ; W32: s_and_saveexec_b32 [[CMP]], [[CMP]] -; W32: s_waitcnt vmcnt(0) +; W32: s_nop 0 ; W32: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen ; W32: s_xor_b32 exec_lo, exec_lo, [[CMP]] ; W32: s_cbranch_execnz [[LOOPBB0]] @@ -217,7 +217,7 @@ entry: ; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v[6:7] ; W32: s_and_b32 [[CMP:s[0-9]+]], vcc_lo, [[CMP0]] ; W32: s_and_saveexec_b32 [[CMP]], [[CMP]] -; W32: s_waitcnt vmcnt(0) +; W32: s_nop 0 ; W32: buffer_load_format_x [[RES]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen ; W32: s_xor_b32 exec_lo, exec_lo, [[CMP]] ; W32: s_cbranch_execnz [[LOOPBB1]] @@ -240,11 +240,8 @@ entry: ; W64-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]: ; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], {{.*}} ; 4-byte Folded Reload -; W64-O0: s_waitcnt vmcnt(0) ; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], {{.*}} ; 4-byte Folded Reload -; W64-O0: s_waitcnt vmcnt(0) ; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], {{.*}} ; 4-byte Folded Reload -; W64-O0: s_waitcnt vmcnt(0) ; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], {{.*}} ; 4-byte Folded Reload ; W64-O0: s_waitcnt vmcnt(0) ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP0:[0-9]+]], v[[VRSRC0]] @@ -278,11 +275,8 @@ entry: ; W64-O0: [[LOOPBB1:BB[0-9]+_[0-9]+]]: ; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], {{.*}} ; 4-byte Folded Reload -; W64-O0: s_waitcnt vmcnt(0) ; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], {{.*}} ; 4-byte Folded Reload -; W64-O0: s_waitcnt vmcnt(0) ; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], {{.*}} ; 4-byte Folded Reload -; W64-O0: s_waitcnt vmcnt(0) ; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], {{.*}} ; 4-byte Folded Reload ; W64-O0: s_waitcnt vmcnt(0) ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP0:[0-9]+]], v[[VRSRC0]] diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll index ffba105f793bd..7328529596a22 100644 --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -187,44 +187,44 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v35, s33, 4 +; GFX9-NEXT: v_writelane_b32 v43, s33, 4 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_add_u32 s32, s32, 0x800 -; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: v_writelane_b32 v35, s34, 0 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v43, s34, 0 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+4 -; GFX9-NEXT: v_writelane_b32 v35, s35, 1 +; GFX9-NEXT: v_writelane_b32 v43, s35, 1 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v32, v1 -; GFX9-NEXT: v_mov_b32_e32 v33, v0 -; GFX9-NEXT: v_writelane_b32 v35, s30, 2 -; GFX9-NEXT: v_mul_u32_u24_e32 v0, v33, v32 -; GFX9-NEXT: v_writelane_b32 v35, s31, 3 -; GFX9-NEXT: v_and_b32_e32 v34, 0xffffff, v32 +; GFX9-NEXT: v_mov_b32_e32 v40, v1 +; GFX9-NEXT: v_mov_b32_e32 v41, v0 +; GFX9-NEXT: v_writelane_b32 v43, s30, 2 +; GFX9-NEXT: v_mul_u32_u24_e32 v0, v41, v40 +; GFX9-NEXT: v_writelane_b32 v43, s31, 3 +; GFX9-NEXT: v_and_b32_e32 v42, 0xffffff, v40 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_mad_u32_u24 v32, v33, v32, v34 -; GFX9-NEXT: v_mov_b32_e32 v0, v32 +; GFX9-NEXT: v_mad_u32_u24 v40, v41, v40, v42 +; GFX9-NEXT: v_mov_b32_e32 v0, v40 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_add_u32_e32 v0, v32, v34 +; GFX9-NEXT: v_add_u32_e32 v0, v40, v42 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s4, v35, 2 -; GFX9-NEXT: v_readlane_b32 s5, v35, 3 -; GFX9-NEXT: v_readlane_b32 s35, v35, 1 -; GFX9-NEXT: v_readlane_b32 s34, v35, 0 -; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-NEXT: v_readlane_b32 s4, v43, 2 +; GFX9-NEXT: v_readlane_b32 s5, v43, 3 +; GFX9-NEXT: v_readlane_b32 s35, v43, 1 +; GFX9-NEXT: v_readlane_b32 s34, v43, 0 +; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: s_sub_u32 s32, s32, 0x800 -; GFX9-NEXT: v_readlane_b32 s33, v35, 4 +; GFX9-NEXT: v_readlane_b32 s33, v43, 4 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll index 3e94a8e2f9523..406aa2e916516 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -12,23 +12,23 @@ declare void @external_void_func_i32(i32) #0 ; Spill CSR VGPR used for SGPR spilling ; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-DAG: v_writelane_b32 v32, s33, 2 +; GCN-DAG: v_writelane_b32 v40, s33, 2 ; GCN-DAG: s_mov_b32 s33, s32 ; GCN-DAG: s_add_u32 s32, s32, 0x400 -; GCN-DAG: v_writelane_b32 v32, s30, 0 -; GCN-DAG: v_writelane_b32 v32, s31, 1 +; GCN-DAG: v_writelane_b32 v40, s30, 0 +; GCN-DAG: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s4, v32, 0 -; GCN: v_readlane_b32 s5, v32, 1 +; GCN: v_readlane_b32 s4, v40, 0 +; GCN: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s33, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index 9aebce434faed..77913a52aaa44 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -17,11 +17,11 @@ define amdgpu_kernel void @clmem_read_simplified(i8 addrspace(1)* %buffer) { ; ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} @@ -220,16 +220,16 @@ define amdgpu_kernel void @Address32(i8 addrspace(1)* %buffer) { ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 ; ; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 @@ -299,8 +299,8 @@ define amdgpu_kernel void @Offset64(i8 addrspace(1)* %buffer) { ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; @@ -345,10 +345,10 @@ define amdgpu_kernel void @p32Offset64(i8 addrspace(1)* %buffer) { ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} ; ; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-2048 @@ -456,8 +456,8 @@ define amdgpu_kernel void @ReverseOrder(i8 addrspace(1)* %buffer) { ; ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir index 6a7402574dab2..0020e17a0b6fe 100644 --- a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir +++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir @@ -254,7 +254,7 @@ body: | ... # GCN-LABEL: csr{{$}} -# GCN: V_AND_B32_e32 $vgpr4, $vgpr0, +# GCN: V_AND_B32_e32 $vgpr37, $vgpr0, --- name: csr tracksRegLiveness: true @@ -364,3 +364,133 @@ body: | DS_WRITE2_B32_gfx9 %2, %1.sub14, %1.sub15, 14, 15, 0, implicit $exec S_ENDPGM 0 ... + +# GCN-LABEL: vgpr_lo16_sub{{$}} +# GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec +# GCN: renamable $vgpr1_lo16 = COPY renamable $vgpr0_lo16 +--- +name: vgpr_lo16_sub +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_lo16 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = V_AND_B32_e32 %1, %0, implicit $exec + %3 = COPY %2.lo16 + $vgpr1_lo16 = COPY %3 + SI_RETURN_TO_EPILOG $vgpr1_lo16 +... + +# GCN-LABEL: vgpr_lo16{{$}} +# GCN: $vgpr1_lo16 = COPY killed renamable $vgpr0_lo16 +--- +name: vgpr_lo16 +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_lo16, preferred-register: '$vgpr4_lo16' } +body: | + bb.0: + liveins: $vgpr0_lo16 + + %0 = COPY $vgpr0_lo16 + $vgpr1_lo16 = COPY %0 + SI_RETURN_TO_EPILOG $vgpr1_lo16 +... + +# GCN-LABEL: vgpr_hi16_sub{{$}} +# GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec +# GCN: renamable $vgpr1_hi16 = COPY renamable $vgpr0_hi16 +--- +name: vgpr_hi16_sub +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_hi16 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = V_AND_B32_e32 %1, %0, implicit $exec + %3 = COPY %2.hi16 + $vgpr1_hi16 = COPY %3 + SI_RETURN_TO_EPILOG $vgpr1_hi16 +... + +# GCN-LABEL: vgpr_hi16{{$}} +# GCN: $vgpr1_hi16 = COPY killed renamable $vgpr0_hi16 +--- +name: vgpr_hi16 +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_hi16, preferred-register: '$vgpr4_hi16' } +body: | + bb.0: + liveins: $vgpr0_hi16 + + %0 = COPY $vgpr0_hi16 + $vgpr1_hi16 = COPY %0 + SI_RETURN_TO_EPILOG $vgpr1_hi16 +... + +# GCN-LABEL: sgpr_lo16_sub{{$}} +# GCN: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr14, $sgpr0, implicit-def $scc +# GCN: renamable $sgpr1_lo16 = COPY renamable $sgpr0_lo16 +--- +name: sgpr_lo16_sub +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_32, preferred-register: '$sgpr16' } + - { id: 1, class: sgpr_32 } + - { id: 2, class: sgpr_lo16 } +body: | + bb.0: + %0 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + %1 = S_AND_B32 %0, $sgpr0, implicit-def $scc + %2 = COPY %1.lo16 + $sgpr1_lo16 = COPY %2 + SI_RETURN_TO_EPILOG $sgpr1_lo16 +... + +# GCN-LABEL: sgpr_lo16{{$}} +# GCN: $sgpr1_lo16 = COPY killed renamable $sgpr0_lo16 +--- +name: sgpr_lo16 +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_lo16, preferred-register: '$sgpr4_lo16' } +body: | + bb.0: + liveins: $sgpr0_lo16 + + %0 = COPY $sgpr0_lo16 + $sgpr1_lo16 = COPY %0 + SI_RETURN_TO_EPILOG $sgpr1_lo16 +... + +# Check that we do not use VGPR3 which we would use otherwise. +# We cannot use it because of interference with VGPR3_LO16. +# GCN-LABEL: v1_vs_v5_src_interence{{$}} +# GCN: V_AND_B32_e32 killed $vgpr7, killed $vgpr1, +--- +name: v1_vs_v5_src_interence +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + $vgpr3_lo16 = IMPLICIT_DEF + %2 = V_AND_B32_e32 %1, %0, implicit $exec + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir index 443999bdea5f9..79af30b8d59ca 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir @@ -33,7 +33,7 @@ body: | ; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; CHECK: undef %11.sub1:vreg_512 = COPY [[COPY]].sub1 - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[COPY1]], 851978 /* regdef:SGPR_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead [[COPY1]], 851978 /* regdef:VGPR_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1 ; CHECK: %11.sub0:vreg_512 = COPY [[COPY]].sub0 ; CHECK: %11.sub3:vreg_512 = COPY [[COPY]].sub3 ; CHECK: dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir index 3398e0f10b361..d0eea78bd2353 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir @@ -36,18 +36,18 @@ body: | ; CHECK: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead %11 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead %11 ; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; CHECK: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3) - ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %15, 851978 /* regdef:SGPR_LO16 */, def %16 + ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def %15, 851978 /* regdef:VGPR_LO16 */, def %16 ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec ; CHECK: [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec ; CHECK: [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec - ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %21, 851978 /* regdef:SGPR_LO16 */, def %22 + ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def %21, 851978 /* regdef:VGPR_LO16 */, def %22 ; CHECK: [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:SGPR_LO16 */, %15, 851977 /* reguse:SGPR_LO16 */, %16, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_2]] + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:VGPR_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:VGPR_LO16 */, %15, 851977 /* reguse:VGPR_LO16 */, %16, 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_2]] ; CHECK: %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]] ; CHECK: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3) ; CHECK: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index edbade80ad318..9ad18f4127082 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -69,7 +69,7 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, v7, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v3, v3, v5 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v10, v6 ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v8, v1, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, v6, v3 @@ -422,15 +422,15 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) { ; GCN-IR-NEXT: v_mov_b32_e32 v1, v5 ; GCN-IR-NEXT: v_cndmask_b32_e64 v7, v10, 0, s[6:7] ; GCN-IR-NEXT: s_and_b64 s[4:5], s[8:9], s[4:5] -; GCN-IR-NEXT: v_mov_b32_e32 v17, v18 +; GCN-IR-NEXT: v_mov_b32_e32 v15, v18 ; GCN-IR-NEXT: v_cndmask_b32_e64 v0, v9, 0, s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB1_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v15, vcc, 1, v11 -; GCN-IR-NEXT: v_addc_u32_e32 v16, vcc, 0, v12, vcc +; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, 1, v11 +; GCN-IR-NEXT: v_addc_u32_e32 v17, vcc, 0, v12, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v0, s[4:5], 63, v11 -; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[15:16], v[11:12] +; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[16:17], v[11:12] ; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 ; GCN-IR-NEXT: v_lshl_b64 v[7:8], v[9:10], v0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 @@ -440,39 +440,39 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) { ; GCN-IR-NEXT: s_cbranch_execz BB1_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, -1, v2 -; GCN-IR-NEXT: v_lshr_b64 v[15:16], v[9:10], v15 +; GCN-IR-NEXT: v_lshr_b64 v[16:17], v[9:10], v16 ; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, -1, v3, vcc ; GCN-IR-NEXT: v_not_b32_e32 v10, v13 ; GCN-IR-NEXT: v_not_b32_e32 v11, v18 ; GCN-IR-NEXT: v_add_i32_e32 v13, vcc, v10, v14 -; GCN-IR-NEXT: v_addc_u32_e32 v14, vcc, v11, v17, vcc -; GCN-IR-NEXT: v_mov_b32_e32 v17, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v18, 0 +; GCN-IR-NEXT: v_addc_u32_e32 v14, vcc, v11, v15, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v19, 0 ; GCN-IR-NEXT: BB1_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[15:16], v[15:16], 1 +; GCN-IR-NEXT: v_lshl_b64 v[16:17], v[16:17], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v10, 31, v8 -; GCN-IR-NEXT: v_or_b32_e32 v10, v15, v10 +; GCN-IR-NEXT: v_or_b32_e32 v10, v16, v10 ; GCN-IR-NEXT: v_lshl_b64 v[7:8], v[7:8], 1 ; GCN-IR-NEXT: v_sub_i32_e32 v11, vcc, v0, v10 -; GCN-IR-NEXT: v_subb_u32_e32 v11, vcc, v9, v16, vcc -; GCN-IR-NEXT: v_or_b32_e32 v7, v17, v7 -; GCN-IR-NEXT: v_add_i32_e32 v17, vcc, 1, v13 +; GCN-IR-NEXT: v_subb_u32_e32 v11, vcc, v9, v17, vcc +; GCN-IR-NEXT: v_or_b32_e32 v7, v18, v7 +; GCN-IR-NEXT: v_add_i32_e32 v18, vcc, 1, v13 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v15, 31, v11 -; GCN-IR-NEXT: v_or_b32_e32 v8, v18, v8 -; GCN-IR-NEXT: v_addc_u32_e32 v18, vcc, 0, v14, vcc -; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, v[17:18], v[13:14] -; GCN-IR-NEXT: v_mov_b32_e32 v13, v17 +; GCN-IR-NEXT: v_or_b32_e32 v8, v19, v8 +; GCN-IR-NEXT: v_addc_u32_e32 v19, vcc, 0, v14, vcc +; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, v[18:19], v[13:14] +; GCN-IR-NEXT: v_mov_b32_e32 v13, v18 ; GCN-IR-NEXT: v_mov_b32_e32 v12, 0 ; GCN-IR-NEXT: v_and_b32_e32 v11, 1, v15 -; GCN-IR-NEXT: v_and_b32_e32 v19, v15, v3 +; GCN-IR-NEXT: v_and_b32_e32 v20, v15, v3 ; GCN-IR-NEXT: v_and_b32_e32 v15, v15, v2 -; GCN-IR-NEXT: v_sub_i32_e64 v15, s[4:5], v10, v15 -; GCN-IR-NEXT: v_mov_b32_e32 v14, v18 -; GCN-IR-NEXT: v_mov_b32_e32 v18, v12 -; GCN-IR-NEXT: v_subb_u32_e64 v16, s[4:5], v16, v19, s[4:5] +; GCN-IR-NEXT: v_sub_i32_e64 v16, s[4:5], v10, v15 +; GCN-IR-NEXT: v_mov_b32_e32 v14, v19 +; GCN-IR-NEXT: v_mov_b32_e32 v19, v12 +; GCN-IR-NEXT: v_subb_u32_e64 v17, s[4:5], v17, v20, s[4:5] ; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] -; GCN-IR-NEXT: v_mov_b32_e32 v17, v11 +; GCN-IR-NEXT: v_mov_b32_e32 v18, v11 ; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz BB1_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow @@ -1193,7 +1193,7 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, v2, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v4, v6 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v8, v1, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4 @@ -1499,21 +1499,21 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB11_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v4 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v5, vcc -; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[4:5] +; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v4 +; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[8:9], v[4:5] ; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 63, v4 -; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 ; GCN-IR-NEXT: v_lshl_b64 v[4:5], 24, v4 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 ; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-IR-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB11_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_lshr_b64 v[12:13], 24, v6 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_lshr_b64 v[12:13], 24, v8 +; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, -1, v1, vcc ; GCN-IR-NEXT: v_sub_i32_e32 v10, vcc, 58, v10 ; GCN-IR-NEXT: v_mov_b32_e32 v14, 0 ; GCN-IR-NEXT: v_subb_u32_e32 v11, vcc, 0, v11, vcc @@ -1521,28 +1521,28 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) { ; GCN-IR-NEXT: BB11_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[12:13], v[12:13], 1 -; GCN-IR-NEXT: v_lshrrev_b32_e32 v8, 31, v5 -; GCN-IR-NEXT: v_or_b32_e32 v12, v12, v8 +; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v5 +; GCN-IR-NEXT: v_or_b32_e32 v12, v12, v6 ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v8, vcc, v6, v12 -; GCN-IR-NEXT: v_subb_u32_e32 v8, vcc, v7, v13, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v8, v12 +; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v9, v13, vcc ; GCN-IR-NEXT: v_or_b32_e32 v4, v14, v4 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v14, 31, v8 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v14, 31, v6 ; GCN-IR-NEXT: v_and_b32_e32 v17, v14, v0 -; GCN-IR-NEXT: v_and_b32_e32 v8, 1, v14 +; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v14 ; GCN-IR-NEXT: v_and_b32_e32 v16, v14, v1 ; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, 1, v10 ; GCN-IR-NEXT: v_or_b32_e32 v5, v15, v5 ; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, 0, v11, vcc ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, v[14:15], v[10:11] ; GCN-IR-NEXT: v_mov_b32_e32 v10, v14 -; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 ; GCN-IR-NEXT: v_sub_i32_e64 v12, s[4:5], v12, v17 ; GCN-IR-NEXT: v_mov_b32_e32 v11, v15 -; GCN-IR-NEXT: v_mov_b32_e32 v15, v9 +; GCN-IR-NEXT: v_mov_b32_e32 v15, v7 ; GCN-IR-NEXT: v_subb_u32_e64 v13, s[4:5], v13, v16, s[4:5] ; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] -; GCN-IR-NEXT: v_mov_b32_e32 v14, v8 +; GCN-IR-NEXT: v_mov_b32_e32 v14, v6 ; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz BB11_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow @@ -1550,8 +1550,8 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) { ; GCN-IR-NEXT: BB11_5: ; %Flow3 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[4:5], 1 -; GCN-IR-NEXT: v_or_b32_e32 v7, v9, v1 -; GCN-IR-NEXT: v_or_b32_e32 v6, v8, v0 +; GCN-IR-NEXT: v_or_b32_e32 v7, v7, v1 +; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v0 ; GCN-IR-NEXT: BB11_6: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] ; GCN-IR-NEXT: v_xor_b32_e32 v0, v6, v2 @@ -1715,23 +1715,23 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v4 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v5, vcc -; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[4:5] +; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v4 +; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[8:9], v[4:5] ; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 63, v4 ; GCN-IR-NEXT: v_lshl_b64 v[4:5], s[8:9], v4 -; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 ; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-IR-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB12_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_mov_b32 s5, 0 ; GCN-IR-NEXT: s_mov_b32 s4, 0x8000 -; GCN-IR-NEXT: v_lshr_b64 v[12:13], s[4:5], v6 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_lshr_b64 v[12:13], s[4:5], v8 +; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, -1, v1, vcc ; GCN-IR-NEXT: v_sub_i32_e32 v10, vcc, 47, v10 ; GCN-IR-NEXT: v_mov_b32_e32 v14, 0 ; GCN-IR-NEXT: v_subb_u32_e32 v11, vcc, 0, v11, vcc @@ -1739,28 +1739,28 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: BB12_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[12:13], v[12:13], 1 -; GCN-IR-NEXT: v_lshrrev_b32_e32 v8, 31, v5 -; GCN-IR-NEXT: v_or_b32_e32 v12, v12, v8 +; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v5 +; GCN-IR-NEXT: v_or_b32_e32 v12, v12, v6 ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v8, vcc, v6, v12 -; GCN-IR-NEXT: v_subb_u32_e32 v8, vcc, v7, v13, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v8, v12 +; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v9, v13, vcc ; GCN-IR-NEXT: v_or_b32_e32 v4, v14, v4 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v14, 31, v8 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v14, 31, v6 ; GCN-IR-NEXT: v_and_b32_e32 v17, v14, v0 -; GCN-IR-NEXT: v_and_b32_e32 v8, 1, v14 +; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v14 ; GCN-IR-NEXT: v_and_b32_e32 v16, v14, v1 ; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, 1, v10 ; GCN-IR-NEXT: v_or_b32_e32 v5, v15, v5 ; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, 0, v11, vcc ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, v[14:15], v[10:11] ; GCN-IR-NEXT: v_mov_b32_e32 v10, v14 -; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 ; GCN-IR-NEXT: v_sub_i32_e64 v12, s[4:5], v12, v17 ; GCN-IR-NEXT: v_mov_b32_e32 v11, v15 -; GCN-IR-NEXT: v_mov_b32_e32 v15, v9 +; GCN-IR-NEXT: v_mov_b32_e32 v15, v7 ; GCN-IR-NEXT: v_subb_u32_e64 v13, s[4:5], v13, v16, s[4:5] ; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] -; GCN-IR-NEXT: v_mov_b32_e32 v14, v8 +; GCN-IR-NEXT: v_mov_b32_e32 v14, v6 ; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz BB12_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow @@ -1768,8 +1768,8 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: BB12_5: ; %Flow3 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[4:5], 1 -; GCN-IR-NEXT: v_or_b32_e32 v7, v9, v1 -; GCN-IR-NEXT: v_or_b32_e32 v6, v8, v0 +; GCN-IR-NEXT: v_or_b32_e32 v7, v7, v1 +; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v0 ; GCN-IR-NEXT: BB12_6: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] ; GCN-IR-NEXT: v_xor_b32_e32 v0, v6, v2 diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll index c96ff256c8c63..da15cff4c91a1 100644 --- a/llvm/test/CodeGen/AMDGPU/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.ll @@ -742,8 +742,8 @@ define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add ; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 ; GCN-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 ; GCN-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 -; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_load_dwordx4 v[11:14], off, s[4:7], 0 offset:48 +; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_lshl_b64 v[2:3], v[2:3], v10 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_lshl_b64 v[6:7], v[6:7], v13 diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 0b9eec73e191f..ebcae6774c606 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -152,9 +152,6 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l ; FIXME: Why load and store same location for stack args? ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_a32i32: -; GCN-DAG: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-DAG: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill - ; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s32{{$}} ; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s32 offset:4 @@ -163,9 +160,6 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l ; GCN-DAG: buffer_store_dword [[LOAD_0]], off, s[0:3], s32{{$}} ; GCN-DAG: buffer_store_dword [[LOAD_1]], off, s[0:3], s32 offset:4 -; GCN-DAG: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-DAG: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload - ; GCN-NOT: s32 ; GCN: s_setpc_b64 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { @@ -176,7 +170,7 @@ entry: ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_a32i32_stack_object: ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 -; GCN: buffer_store_dword [[NINE]], off, s[0:3], s32 offset:40 +; GCN: buffer_store_dword [[NINE]], off, s[0:3], s32 offset:28 ; GCN: s_setpc_b64 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 { entry: @@ -203,15 +197,15 @@ entry: ; Have another non-tail in the function ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_other_call: ; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 -; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec ; GCN: s_mov_b32 s33, s32 ; GCN-DAG: s_add_u32 s32, s32, 0x400 -; GCN-DAG: buffer_store_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-DAG: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-DAG: v_writelane_b32 v34, s34, 0 -; GCN-DAG: v_writelane_b32 v34, s35, 1 +; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-DAG: v_writelane_b32 v42, s34, 0 +; GCN-DAG: v_writelane_b32 v42, s35, 1 ; GCN-DAG: s_getpc_b64 s[4:5] ; GCN-DAG: s_add_u32 s4, s4, i32_fastcc_i32_i32@gotpcrel32@lo+4 @@ -220,11 +214,11 @@ entry: ; GCN: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s34, v34, 0 -; GCN-DAG: v_readlane_b32 s35, v34, 1 +; GCN-DAG: v_readlane_b32 s34, v42, 0 +; GCN-DAG: v_readlane_b32 s35, v42, 1 -; GCN: buffer_load_dword v33, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN: buffer_load_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 @@ -233,7 +227,7 @@ entry: ; GCN: s_sub_u32 s32, s32, 0x400 ; GCN-NEXT: v_readlane_b32 s33, ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_setpc_b64 s[4:5] define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 { @@ -248,11 +242,11 @@ entry: ; GCN-LABEL: {{^}}sibling_call_stack_objecti32_fastcc_i32_i32_a32i32: ; GCN-NOT: s33 -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 offset: +; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset: ; GCN-NOT: s33 -; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset: +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 offset: ; GCN: s_setpc_b64 s[4:5] define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll index c98e344b50095..086cbf974383b 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll @@ -2,17 +2,17 @@ ; GCN-LABEL: {{^}}spill_csr_s5_copy: ; GCN: s_or_saveexec_b64 -; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec -; GCN: v_writelane_b32 v32, s33, 2 +; GCN: v_writelane_b32 v40, s33, 2 ; GCN: s_swappc_b64 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 9 ; GCN: buffer_store_dword [[K]], off, s[0:3], s33{{$}} -; GCN: v_readlane_b32 s33, v32, 2 +; GCN: v_readlane_b32 s33, v40, 2 ; GCN: s_or_saveexec_b64 -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN: s_mov_b64 exec ; GCN: s_setpc_b64 define void @spill_csr_s5_copy() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index dea0242b05db3..a0e16ae0cef6a 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -63,7 +63,7 @@ define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, v2, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v4, v6 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v8, v1, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4 @@ -948,7 +948,7 @@ define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, v7, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v3, v3, v5 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v10, v6 ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v8, v1, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, v6, v3 @@ -1379,7 +1379,7 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, v2, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v4, v6 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v8, v1, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4 @@ -1676,21 +1676,21 @@ define i64 @v_test_srem_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB11_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v5, vcc, 1, v3 -; GCN-IR-NEXT: v_addc_u32_e32 v6, vcc, 0, v4, vcc +; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v3 +; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v3 -; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[5:6], v[3:4] -; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 +; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[3:4] +; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], 24, v2 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-IR-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB11_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, -1, v0 -; GCN-IR-NEXT: v_lshr_b64 v[10:11], 24, v5 -; GCN-IR-NEXT: v_addc_u32_e32 v5, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_lshr_b64 v[10:11], 24, v6 +; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, -1, v1, vcc ; GCN-IR-NEXT: v_sub_i32_e32 v8, vcc, 58, v8 ; GCN-IR-NEXT: v_mov_b32_e32 v12, 0 ; GCN-IR-NEXT: v_subb_u32_e32 v9, vcc, 0, v9, vcc @@ -1698,28 +1698,28 @@ define i64 @v_test_srem_k_num_i64(i64 %x) { ; GCN-IR-NEXT: BB11_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 -; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v6 +; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 +; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v4, v10 -; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v5, v11, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v6, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v7, v11, vcc ; GCN-IR-NEXT: v_or_b32_e32 v2, v12, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v6 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v4 ; GCN-IR-NEXT: v_and_b32_e32 v15, v12, v0 -; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v12 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v12 ; GCN-IR-NEXT: v_and_b32_e32 v14, v12, v1 ; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 1, v8 ; GCN-IR-NEXT: v_or_b32_e32 v3, v13, v3 ; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, 0, v9, vcc ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, v[12:13], v[8:9] ; GCN-IR-NEXT: v_mov_b32_e32 v8, v12 -; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_sub_i32_e64 v10, s[4:5], v10, v15 ; GCN-IR-NEXT: v_mov_b32_e32 v9, v13 -; GCN-IR-NEXT: v_mov_b32_e32 v13, v7 +; GCN-IR-NEXT: v_mov_b32_e32 v13, v5 ; GCN-IR-NEXT: v_subb_u32_e64 v11, s[4:5], v11, v14, s[4:5] ; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] -; GCN-IR-NEXT: v_mov_b32_e32 v12, v6 +; GCN-IR-NEXT: v_mov_b32_e32 v12, v4 ; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz BB11_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow @@ -1727,8 +1727,8 @@ define i64 @v_test_srem_k_num_i64(i64 %x) { ; GCN-IR-NEXT: BB11_5: ; %Flow3 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_or_b32_e32 v5, v7, v3 -; GCN-IR-NEXT: v_or_b32_e32 v2, v6, v2 +; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3 +; GCN-IR-NEXT: v_or_b32_e32 v2, v4, v2 ; GCN-IR-NEXT: BB11_6: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] ; GCN-IR-NEXT: v_mul_lo_u32 v3, v0, v5 @@ -1890,23 +1890,23 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, 1, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc -; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[4:5], v[2:3] +; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 +; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[2:3] ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[8:9], v2 -; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-IR-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB12_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_mov_b32 s5, 0 ; GCN-IR-NEXT: s_mov_b32 s4, 0x8000 -; GCN-IR-NEXT: v_lshr_b64 v[10:11], s[4:5], v4 -; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v5, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_lshr_b64 v[10:11], s[4:5], v6 +; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, -1, v1, vcc ; GCN-IR-NEXT: v_sub_i32_e32 v8, vcc, 47, v8 ; GCN-IR-NEXT: v_mov_b32_e32 v12, 0 ; GCN-IR-NEXT: v_subb_u32_e32 v9, vcc, 0, v9, vcc @@ -1914,28 +1914,28 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: BB12_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 -; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v6 +; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 +; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v4, v10 -; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v5, v11, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v6, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v7, v11, vcc ; GCN-IR-NEXT: v_or_b32_e32 v2, v12, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v6 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v4 ; GCN-IR-NEXT: v_and_b32_e32 v15, v12, v0 -; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v12 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v12 ; GCN-IR-NEXT: v_and_b32_e32 v14, v12, v1 ; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 1, v8 ; GCN-IR-NEXT: v_or_b32_e32 v3, v13, v3 ; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, 0, v9, vcc ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, v[12:13], v[8:9] ; GCN-IR-NEXT: v_mov_b32_e32 v8, v12 -; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_sub_i32_e64 v10, s[4:5], v10, v15 ; GCN-IR-NEXT: v_mov_b32_e32 v9, v13 -; GCN-IR-NEXT: v_mov_b32_e32 v13, v7 +; GCN-IR-NEXT: v_mov_b32_e32 v13, v5 ; GCN-IR-NEXT: v_subb_u32_e64 v11, s[4:5], v11, v14, s[4:5] ; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] -; GCN-IR-NEXT: v_mov_b32_e32 v12, v6 +; GCN-IR-NEXT: v_mov_b32_e32 v12, v4 ; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz BB12_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow @@ -1943,8 +1943,8 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: BB12_5: ; %Flow3 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_or_b32_e32 v5, v7, v3 -; GCN-IR-NEXT: v_or_b32_e32 v4, v6, v2 +; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3 +; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v2 ; GCN-IR-NEXT: BB12_6: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] ; GCN-IR-NEXT: v_mul_lo_u32 v2, v0, v5 diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll index 497d35ea3d710..c8fb1ec72d505 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -29,7 +29,7 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, < ; GCN-NEXT: s_mov_b64 s[0:1], s[36:37] ; GCN-NEXT: s_mov_b64 s[2:3], s[38:39] ; GCN-NEXT: s_mov_b32 s32, 0xc0000 -; GCN-NEXT: v_add_nc_u32_e64 v32, 4, 0x4000 +; GCN-NEXT: v_add_nc_u32_e64 v40, 4, 0x4000 ; GCN-NEXT: ; implicit-def: $vcc_hi ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, svm_eval_nodes@rel32@lo+4 @@ -41,8 +41,8 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, < ; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GCN-NEXT: s_cbranch_execz BB0_2 ; GCN-NEXT: ; %bb.1: ; %if.then4.i -; GCN-NEXT: buffer_load_dword v0, v32, s[36:39], s32 offen -; GCN-NEXT: buffer_load_dword v1, v32, s[36:39], s32 offen offset:4 +; GCN-NEXT: buffer_load_dword v0, v40, s[36:39], s32 offen +; GCN-NEXT: buffer_load_dword v1, v40, s[36:39], s32 offen offset:4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_add_nc_u32_e32 v0, v1, v0 ; GCN-NEXT: v_mul_lo_u32 v0, 0x41c64e6d, v0 diff --git a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir index 488bbfeefa10f..ee47432238476 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir @@ -25,9 +25,9 @@ body: | ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]] - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_]] + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub0, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub1 ; CHECK: S_NOP 0, implicit %0.sub1 ; CHECK: $sgpr10 = S_MOV_B32 -1 ; CHECK: S_BRANCH %bb.1 @@ -63,9 +63,9 @@ body: | ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]] - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0 + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_]] + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub1, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub0 ; CHECK: S_NOP 0, implicit %0.sub1 ; CHECK: $sgpr10 = S_MOV_B32 -1 ; CHECK: S_BRANCH %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index ef0c50c80ded1..6b29f29620436 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -62,7 +62,7 @@ define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, v2, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v4, v6 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v8, v1, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4 @@ -768,7 +768,7 @@ define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48 ; GCN-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; GCN-NEXT: v_addc_u32_e32 v12, vcc, v9, v13, vcc ; GCN-NEXT: v_mul_lo_u32 v3, v3, v5 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v11 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v11, v6 ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v12, v10, vcc ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v8, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, v6, v3 @@ -997,7 +997,7 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, v2, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v4, v6 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v8, v1, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4 @@ -1267,52 +1267,52 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB9_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, 1, v4 -; GCN-IR-NEXT: v_addc_u32_e32 v11, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v4 +; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v5, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v4 +; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[8:9], v2 -; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 -; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[10:11], v[4:5] +; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-IR-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB9_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v5, vcc, -1, v1, vcc ; GCN-IR-NEXT: s_mov_b32 s5, 0 ; GCN-IR-NEXT: s_mov_b32 s4, 0x8000 +; GCN-IR-NEXT: v_lshr_b64 v[10:11], s[4:5], v6 +; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, -1, v1, vcc ; GCN-IR-NEXT: v_sub_i32_e32 v8, vcc, 47, v8 ; GCN-IR-NEXT: v_mov_b32_e32 v12, 0 -; GCN-IR-NEXT: v_lshr_b64 v[10:11], s[4:5], v10 ; GCN-IR-NEXT: v_subb_u32_e32 v9, vcc, 0, v9, vcc ; GCN-IR-NEXT: v_mov_b32_e32 v13, 0 ; GCN-IR-NEXT: BB9_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 -; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v6 +; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 +; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v4, v10 -; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v5, v11, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v6, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v7, v11, vcc ; GCN-IR-NEXT: v_or_b32_e32 v2, v12, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v6 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v4 ; GCN-IR-NEXT: v_and_b32_e32 v15, v12, v0 -; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v12 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v12 ; GCN-IR-NEXT: v_and_b32_e32 v14, v12, v1 ; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 1, v8 ; GCN-IR-NEXT: v_or_b32_e32 v3, v13, v3 ; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, 0, v9, vcc ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, v[12:13], v[8:9] ; GCN-IR-NEXT: v_mov_b32_e32 v8, v12 -; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_sub_i32_e64 v10, s[4:5], v10, v15 ; GCN-IR-NEXT: v_mov_b32_e32 v9, v13 -; GCN-IR-NEXT: v_mov_b32_e32 v13, v7 +; GCN-IR-NEXT: v_mov_b32_e32 v13, v5 ; GCN-IR-NEXT: v_subb_u32_e64 v11, s[4:5], v11, v14, s[4:5] ; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] -; GCN-IR-NEXT: v_mov_b32_e32 v12, v6 +; GCN-IR-NEXT: v_mov_b32_e32 v12, v4 ; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz BB9_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow @@ -1320,8 +1320,8 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: BB9_5: ; %Flow3 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[2:3], 1 -; GCN-IR-NEXT: v_or_b32_e32 v3, v7, v1 -; GCN-IR-NEXT: v_or_b32_e32 v2, v6, v0 +; GCN-IR-NEXT: v_or_b32_e32 v3, v5, v1 +; GCN-IR-NEXT: v_or_b32_e32 v2, v4, v0 ; GCN-IR-NEXT: BB9_6: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] ; GCN-IR-NEXT: v_mov_b32_e32 v0, v2 @@ -1477,7 +1477,7 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_mul_lo_u32 v10, v2, v6 ; GCN-NEXT: v_mul_hi_u32 v6, v2, v6 ; GCN-NEXT: v_mul_lo_u32 v2, v2, v4 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v10 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v9, v6, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v11, v7, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v5, v2 @@ -1665,7 +1665,7 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-NEXT: v_mul_lo_u32 v12, v4, v8 ; GCN-NEXT: v_mul_hi_u32 v8, v4, v8 ; GCN-NEXT: v_mul_lo_u32 v4, v4, v6 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v12, v7 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v12 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v8, vcc ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v13, v9, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4 diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index a652fb17f959f..0aac641dae974 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -63,7 +63,7 @@ define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 % ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, v2, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v4, v6 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v8, v1, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4 @@ -805,7 +805,7 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GCN-NEXT: v_addc_u32_e32 v11, vcc, v2, v12, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v4, v6 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, v11, v9, vcc ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v8, v1, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v7, v4 @@ -1007,7 +1007,7 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) ; GCN-NEXT: v_mul_lo_u32 v10, v2, v6 ; GCN-NEXT: v_mul_hi_u32 v6, v2, v6 ; GCN-NEXT: v_mul_lo_u32 v2, v2, v4 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v10 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v9, v6, vcc ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v11, v7, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v5, v2 @@ -1280,23 +1280,23 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB8_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, 1, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc -; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[4:5], v[2:3] +; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 +; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GCN-IR-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[2:3] ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[8:9], v2 -; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-IR-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB8_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_mov_b32 s5, 0 ; GCN-IR-NEXT: s_mov_b32 s4, 0x8000 -; GCN-IR-NEXT: v_lshr_b64 v[10:11], s[4:5], v4 -; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v5, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_lshr_b64 v[10:11], s[4:5], v6 +; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, -1, v1, vcc ; GCN-IR-NEXT: v_sub_i32_e32 v8, vcc, 47, v8 ; GCN-IR-NEXT: v_mov_b32_e32 v12, 0 ; GCN-IR-NEXT: v_subb_u32_e32 v9, vcc, 0, v9, vcc @@ -1304,28 +1304,28 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: BB8_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 -; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v6 +; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 +; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v4, v10 -; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v5, v11, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v6, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v7, v11, vcc ; GCN-IR-NEXT: v_or_b32_e32 v2, v12, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v6 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v4 ; GCN-IR-NEXT: v_and_b32_e32 v15, v12, v0 -; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v12 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v12 ; GCN-IR-NEXT: v_and_b32_e32 v14, v12, v1 ; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 1, v8 ; GCN-IR-NEXT: v_or_b32_e32 v3, v13, v3 ; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, 0, v9, vcc ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, v[12:13], v[8:9] ; GCN-IR-NEXT: v_mov_b32_e32 v8, v12 -; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_sub_i32_e64 v10, s[4:5], v10, v15 ; GCN-IR-NEXT: v_mov_b32_e32 v9, v13 -; GCN-IR-NEXT: v_mov_b32_e32 v13, v7 +; GCN-IR-NEXT: v_mov_b32_e32 v13, v5 ; GCN-IR-NEXT: v_subb_u32_e64 v11, s[4:5], v11, v14, s[4:5] ; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] -; GCN-IR-NEXT: v_mov_b32_e32 v12, v6 +; GCN-IR-NEXT: v_mov_b32_e32 v12, v4 ; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz BB8_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow @@ -1333,8 +1333,8 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: BB8_5: ; %Flow3 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_or_b32_e32 v5, v7, v3 -; GCN-IR-NEXT: v_or_b32_e32 v4, v6, v2 +; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3 +; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v2 ; GCN-IR-NEXT: BB8_6: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] ; GCN-IR-NEXT: v_mul_lo_u32 v2, v0, v5 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll new file mode 100644 index 0000000000000..c6f2ec5f3e343 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -0,0 +1,170 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s + +declare void @extern_func() + +define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { +; The vgpr tuple8 operand in image_gather4_c_b_cl instruction needs not be +; preserved across the call and should get 8 scratch registers. + +; GFX9-LABEL: non_preserved_vgpr_tuple8: +; GFX9: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill + +; GFX9: v_mov_b32_e32 v37, v11 +; GFX9-NEXT: v_mov_b32_e32 v38, v10 +; GFX9-NEXT: v_mov_b32_e32 v49, v9 +; GFX9-NEXT: v_writelane_b32 v44, s30, 0 +; GFX9-NEXT: v_mov_b32_e32 v36, v16 +; GFX9-NEXT: v_mov_b32_e32 v35, v15 +; GFX9-NEXT: v_mov_b32_e32 v34, v14 +; GFX9-NEXT: v_mov_b32_e32 v33, v13 +; GFX9-NEXT: v_mov_b32_e32 v32, v12 + +; GFX9: ;;#ASMSTART +; GFX9-NEXT: ;;#ASMEND + +; GFX9: image_gather4_c_b_cl v[40:43], v[32:39], s[4:11], s[12:15] dmask:0x1 +; GFX9-NEXT: s_getpc_b64 s[4:5] +; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+4 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] + +; GFX9: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX9: buffer_load_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9: s_setpc_b64 s[4:5] +; +; GFX10-LABEL: non_preserved_vgpr_tuple8: +; GFX10: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX10: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill + +; GFX10: v_mov_b32_e32 v36, v16 +; GFX10-NEXT: v_mov_b32_e32 v35, v15 +; GFX10-NEXT: v_mov_b32_e32 v34, v14 +; GFX10-NEXT: v_mov_b32_e32 v33, v13 +; GFX10-NEXT: v_mov_b32_e32 v32, v12 + +; GFX10: ;;#ASMSTART +; GFX10-NEXT: ;;#ASMEND + +; GFX10: image_gather4_c_b_cl v[40:43], v[32:39], s[4:11], s[12:15] dmask:0x1 +; GFX10-NEXT: v_nop +; GFX10-NEXT: s_getpc_b64 s[4:5] +; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+4 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] + +; GFX10: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload + +; GFX10: buffer_load_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX10: s_setpc_b64 s[4:5] +main_body: + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 + call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0 + call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0 + call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0 + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + call void @extern_func() + ret <4 x float> %v +} + +define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { +; The vgpr tuple8 operand in image_gather4_c_b_cl instruction needs to be preserved +; across the call and should get allcoated to 8 CSRs. +; Only the lower 5 sub-registers of the tuple are preserved. +; The upper 3 sub-registers are unused. + +; GFX9-LABEL: call_preserved_vgpr_tuple8: +; GFX9: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill + +; GFX9: v_mov_b32_e32 v44, v16 +; GFX9-NEXT: v_mov_b32_e32 v43, v15 +; GFX9-NEXT: v_mov_b32_e32 v42, v14 +; GFX9-NEXT: v_mov_b32_e32 v41, v13 +; GFX9-NEXT: v_mov_b32_e32 v40, v12 + +; GFX9: s_getpc_b64 s[4:5] +; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+4 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[40:47], s[36:43], s[44:47] dmask:0x1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[40:47], s[36:43], s[44:47] dmask:0x1 + +; GFX9: buffer_load_dword v44, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload + +; GFX9: buffer_load_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX9: s_setpc_b64 s[4:5] +; +; GFX10-LABEL: call_preserved_vgpr_tuple8: +; GFX10: buffer_store_dword v45, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX10: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill + +; GFX10: s_getpc_b64 s[4:5] +; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+4 +; GFX10-NEXT: v_mov_b32_e32 v40, v16 +; GFX10-NEXT: v_mov_b32_e32 v41, v15 +; GFX10-NEXT: image_gather4_c_b_cl v[0:3], v[12:19], s[36:43], s[44:47] dmask:0x1 +; GFX10-NEXT: v_mov_b32_e32 v42, v14 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v43, v13 +; GFX10-NEXT: v_mov_b32_e32 v44, v12 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v44, v43, v42, v41, v40], s[36:43], s[44:47] dmask:0x1 + +; GFX10: buffer_load_dword v44, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX10: buffer_load_dword v45, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX10: s_setpc_b64 s[4:5] +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* undef + call void @extern_func() + %v1 = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + ret <4 x float> %v1 +} + +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +attributes #0 = { nounwind writeonly } +attributes #1 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir index 98849ba3cbc43..d1766d1c0f9de 100644 --- a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir @@ -29,7 +29,7 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: undef_identity_copy - ; CHECK: renamable $vgpr32_vgpr33_vgpr34_vgpr35 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1) + ; CHECK: renamable $vgpr40_vgpr41_vgpr42_vgpr43 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1) ; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 ; CHECK: $sgpr4 = COPY $sgpr95 @@ -38,9 +38,9 @@ body: | ; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 ; CHECK: $sgpr4 = COPY $sgpr95 - ; CHECK: $vgpr0 = COPY renamable $vgpr32 - ; CHECK: $vgpr1 = COPY renamable $vgpr33 - ; CHECK: $vgpr2 = COPY renamable $vgpr34 + ; CHECK: $vgpr0 = COPY renamable $vgpr40 + ; CHECK: $vgpr1 = COPY renamable $vgpr41 + ; CHECK: $vgpr2 = COPY renamable $vgpr42 ; CHECK: $vgpr3 = KILL undef renamable $vgpr3 ; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0 ; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 diff --git a/llvm/test/CodeGen/AMDGPU/wait.ll b/llvm/test/CodeGen/AMDGPU/wait.ll index dcc7bfa2c01e0..8d6864f967e99 100644 --- a/llvm/test/CodeGen/AMDGPU/wait.ll +++ b/llvm/test/CodeGen/AMDGPU/wait.ll @@ -9,7 +9,6 @@ ; DEFAULT: s_load_dwordx4 ; DEFAULT: s_waitcnt lgkmcnt(0) ; DEFAULT: buffer_load_format_xyzw -; DEFAULT: s_waitcnt vmcnt(0) ; DEFAULT: buffer_load_format_xyzw ; DEFAULT: s_waitcnt vmcnt(0) ; DEFAULT: exp diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir new file mode 100644 index 0000000000000..90009b6084289 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX9 %s + +# Two buffer loads with overlapping outputs. No waitcnt required. +--- +name: buffer_buffer +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 + ; GFX9-LABEL: name: buffer_buffer + ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 + ; GFX9: S_WAITCNT 0 + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec + ; GFX9: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, 0, 0, 0, 0, 0, implicit $exec +... + +# Two tbuffer loads with overlapping outputs. No waitcnt required. +--- +name: tbuffer_tbuffer +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 + ; GFX9-LABEL: name: tbuffer_tbuffer + ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 + ; GFX9: S_WAITCNT 0 + ; GFX9: $vgpr0_vgpr1_vgpr2 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec + ; GFX9: $vgpr0 = TBUFFER_LOAD_FORMAT_X_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec + $vgpr0_vgpr1_vgpr2 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = TBUFFER_LOAD_FORMAT_X_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec +... + +# Two gathers with overlapping outputs. (Note gathers can't be trimmed because +# dmask means something different.) No waitcnt required. +--- +name: gather_gather +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + ; GFX9-LABEL: name: gather_gather + ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + ; GFX9: S_WAITCNT 0 + ; GFX9: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr0_vgpr1_vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + ; GFX9: $vgpr13_vgpr14_vgpr15_vgpr16 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr0_vgpr1_vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr13_vgpr14_vgpr15_vgpr16 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec +... + +# Image load vs image sample. Waitcnt required because they are not guaranteed +# to write their results in order, despite both using the s_waitcnt vmcnt +# counter. +--- +name: nosampler_sampler +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-LABEL: name: nosampler_sampler + ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: S_WAITCNT 0 + ; GFX9: $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec + ; GFX9: S_WAITCNT 3952 + ; GFX9: $vgpr4 = IMAGE_SAMPLE_L_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr4 = IMAGE_SAMPLE_L_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +... diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index f9d4e3a5abadd..2bfe82d426faa 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1058,30 +1058,30 @@ declare void @external_void_func_void() #1 ; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]]], -1{{$}} -; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: v_nop ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 v32, s33, 2 +; GCN-NEXT: v_writelane_b32 v40, s33, 2 ; GCN: s_mov_b32 s33, s32 ; GFX1064: s_add_u32 s32, s32, 0x400 ; GFX1032: s_add_u32 s32, s32, 0x200 -; GCN-DAG: v_writelane_b32 v32, s30, 0 -; GCN-DAG: v_writelane_b32 v32, s31, 1 +; GCN-DAG: v_writelane_b32 v40, s30, 0 +; GCN-DAG: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s4, v32, 0 -; GCN-DAG: v_readlane_b32 s5, v32, 1 +; GCN-DAG: v_readlane_b32 s4, v40, 0 +; GCN-DAG: v_readlane_b32 s5, v40, 1 ; GFX1064: s_sub_u32 s32, s32, 0x400 ; GFX1032: s_sub_u32 s32, s32, 0x200 -; GCN: v_readlane_b32 s33, v32, 2 +; GCN: v_readlane_b32 s33, v40, 2 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}} -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: v_nop ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC1]] diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 60e9932e95f66..1f5e22ee48f4a 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -30,6 +30,7 @@ ; CHECK-NEXT: Remove unreachable blocks from the CFG ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Constant Hoisting @@ -70,6 +71,7 @@ ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Lazy Branch Probability Analysis ; CHECK-NEXT: Lazy Block Frequency Analysis diff --git a/llvm/test/CodeGen/ARM/emit-big-cst.ll b/llvm/test/CodeGen/ARM/emit-big-cst.ll index 7453e8caa9465..e0c6d4e893e1f 100644 --- a/llvm/test/CodeGen/ARM/emit-big-cst.ll +++ b/llvm/test/CodeGen/ARM/emit-big-cst.ll @@ -4,7 +4,9 @@ ; CHECK: bigCst: ; CHECK-NEXT: .long 1694510592 ; CHECK-NEXT: .long 2960197 -; CHECK-NEXT: .long 26220 +; CHECK-NEXT: .short 26220 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .zero 1 ; CHECK-NEXT: .size bigCst, 12 @bigCst = internal constant i82 483673642326615442599424 diff --git a/llvm/test/CodeGen/ARM/tail-call-results.ll b/llvm/test/CodeGen/ARM/tail-call-results.ll new file mode 100644 index 0000000000000..8b90e18f8010a --- /dev/null +++ b/llvm/test/CodeGen/ARM/tail-call-results.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7-linux-gnueabi %s -o - | FileCheck -check-prefix=SOFTFLOAT %s +; RUN: llc -mtriple=thumbv7-linux-gnueabihf -disable-tail-calls %s -o - | FileCheck -check-prefix=HF-NOTAIL %s + +; On hard-float targets, the register used to store a float return value +; changes if the call signature is varargs. The HF-NOTAIL lines are there to +; easily see when this happens. + +declare float @callee_float() +declare i32 @callee_int() +declare float @callee_float_vararg(i32, ...) +declare i32 @callee_int_vararg(i32, ...) + +define float @caller_float__callee_float() { +; CHECK-LABEL: caller_float__callee_float: +; CHECK: @ %bb.0: +; CHECK-NEXT: b callee_float +; +; SOFTFLOAT-LABEL: caller_float__callee_float: +; SOFTFLOAT: @ %bb.0: +; SOFTFLOAT-NEXT: b callee_float +; +; HF-NOTAIL-LABEL: caller_float__callee_float: +; HF-NOTAIL: @ %bb.0: +; HF-NOTAIL-NEXT: .save {r7, lr} +; HF-NOTAIL-NEXT: push {r7, lr} +; HF-NOTAIL-NEXT: bl callee_float +; HF-NOTAIL-NEXT: pop {r7, pc} + %r = tail call float @callee_float() + ret float %r +} + +define float @caller_float__callee_float_vararg() { +; CHECK-LABEL: caller_float__callee_float_vararg: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: bl callee_float_vararg +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: pop {r7, pc} +; +; SOFTFLOAT-LABEL: caller_float__callee_float_vararg: +; SOFTFLOAT: @ %bb.0: +; SOFTFLOAT-NEXT: movs r0, #0 +; SOFTFLOAT-NEXT: b callee_float_vararg +; +; HF-NOTAIL-LABEL: caller_float__callee_float_vararg: +; HF-NOTAIL: @ %bb.0: +; HF-NOTAIL-NEXT: .save {r7, lr} +; HF-NOTAIL-NEXT: push {r7, lr} +; HF-NOTAIL-NEXT: movs r0, #0 +; HF-NOTAIL-NEXT: bl callee_float_vararg +; HF-NOTAIL-NEXT: vmov s0, r0 +; HF-NOTAIL-NEXT: pop {r7, pc} + %r = tail call float (i32, ...) @callee_float_vararg(i32 0) + ret float %r +} + +define float @caller_float_vararg__callee_float(i32, ...) { +; CHECK-LABEL: caller_float_vararg__callee_float: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: bl callee_float +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: pop {r7, pc} +; +; SOFTFLOAT-LABEL: caller_float_vararg__callee_float: +; SOFTFLOAT: @ %bb.0: +; SOFTFLOAT-NEXT: b callee_float +; +; HF-NOTAIL-LABEL: caller_float_vararg__callee_float: +; HF-NOTAIL: @ %bb.0: +; HF-NOTAIL-NEXT: .save {r7, lr} +; HF-NOTAIL-NEXT: push {r7, lr} +; HF-NOTAIL-NEXT: bl callee_float +; HF-NOTAIL-NEXT: vmov r0, s0 +; HF-NOTAIL-NEXT: pop {r7, pc} + %r = tail call float @callee_float() + ret float %r +} + +define float @caller_float_vararg__callee_float_vararg(i32, ...) { +; CHECK-LABEL: caller_float_vararg__callee_float_vararg: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: b callee_float_vararg +; +; SOFTFLOAT-LABEL: caller_float_vararg__callee_float_vararg: +; SOFTFLOAT: @ %bb.0: +; SOFTFLOAT-NEXT: movs r0, #0 +; SOFTFLOAT-NEXT: b callee_float_vararg +; +; HF-NOTAIL-LABEL: caller_float_vararg__callee_float_vararg: +; HF-NOTAIL: @ %bb.0: +; HF-NOTAIL-NEXT: .save {r7, lr} +; HF-NOTAIL-NEXT: push {r7, lr} +; HF-NOTAIL-NEXT: movs r0, #0 +; HF-NOTAIL-NEXT: bl callee_float_vararg +; HF-NOTAIL-NEXT: pop {r7, pc} + %r = tail call float (i32, ...) @callee_float_vararg(i32 0) + ret float %r +} + +define i32 @caller_int__callee_int() { +; CHECK-LABEL: caller_int__callee_int: +; CHECK: @ %bb.0: +; CHECK-NEXT: b callee_int +; +; SOFTFLOAT-LABEL: caller_int__callee_int: +; SOFTFLOAT: @ %bb.0: +; SOFTFLOAT-NEXT: b callee_int +; +; HF-NOTAIL-LABEL: caller_int__callee_int: +; HF-NOTAIL: @ %bb.0: +; HF-NOTAIL-NEXT: .save {r7, lr} +; HF-NOTAIL-NEXT: push {r7, lr} +; HF-NOTAIL-NEXT: bl callee_int +; HF-NOTAIL-NEXT: pop {r7, pc} + %r = tail call i32 @callee_int() + ret i32 %r +} + +define i32 @caller_int__callee_int_vararg() { +; CHECK-LABEL: caller_int__callee_int_vararg: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: b callee_int_vararg +; +; SOFTFLOAT-LABEL: caller_int__callee_int_vararg: +; SOFTFLOAT: @ %bb.0: +; SOFTFLOAT-NEXT: movs r0, #0 +; SOFTFLOAT-NEXT: b callee_int_vararg +; +; HF-NOTAIL-LABEL: caller_int__callee_int_vararg: +; HF-NOTAIL: @ %bb.0: +; HF-NOTAIL-NEXT: .save {r7, lr} +; HF-NOTAIL-NEXT: push {r7, lr} +; HF-NOTAIL-NEXT: movs r0, #0 +; HF-NOTAIL-NEXT: bl callee_int_vararg +; HF-NOTAIL-NEXT: pop {r7, pc} + %r = tail call i32 (i32, ...) @callee_int_vararg(i32 0) + ret i32 %r +} + +define i32 @caller_int_vararg__callee_int(i32, ...) { +; CHECK-LABEL: caller_int_vararg__callee_int: +; CHECK: @ %bb.0: +; CHECK-NEXT: b callee_int +; +; SOFTFLOAT-LABEL: caller_int_vararg__callee_int: +; SOFTFLOAT: @ %bb.0: +; SOFTFLOAT-NEXT: b callee_int +; +; HF-NOTAIL-LABEL: caller_int_vararg__callee_int: +; HF-NOTAIL: @ %bb.0: +; HF-NOTAIL-NEXT: .save {r7, lr} +; HF-NOTAIL-NEXT: push {r7, lr} +; HF-NOTAIL-NEXT: bl callee_int +; HF-NOTAIL-NEXT: pop {r7, pc} + %r = tail call i32 @callee_int() + ret i32 %r +} + +define i32 @caller_int_vararg__callee_int_vararg(i32, ...) { +; CHECK-LABEL: caller_int_vararg__callee_int_vararg: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: b callee_int_vararg +; +; SOFTFLOAT-LABEL: caller_int_vararg__callee_int_vararg: +; SOFTFLOAT: @ %bb.0: +; SOFTFLOAT-NEXT: movs r0, #0 +; SOFTFLOAT-NEXT: b callee_int_vararg +; +; HF-NOTAIL-LABEL: caller_int_vararg__callee_int_vararg: +; HF-NOTAIL: @ %bb.0: +; HF-NOTAIL-NEXT: .save {r7, lr} +; HF-NOTAIL-NEXT: push {r7, lr} +; HF-NOTAIL-NEXT: movs r0, #0 +; HF-NOTAIL-NEXT: bl callee_int_vararg +; HF-NOTAIL-NEXT: pop {r7, pc} + %r = tail call i32 (i32, ...) @callee_int_vararg(i32 0) + ret i32 %r +} diff --git a/llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll b/llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll new file mode 100644 index 0000000000000..aedbd6101c4f9 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK: .LBB0_1: +; CHECK: [[R1:r[0-9]+]] = memw_locked(r0) +; CHECK-DAG: [[R2:r[0-9]+]] = and([[R1]], +; CHECK-DAG: [[R3:r[0-9]+]] = add([[R1]], +; CHECK: [[R2]] |= and([[R3]], +; CHECK: memw_locked(r0,[[P0:p[0-3]]]) = [[R2]] +; CHECK: if (![[P0]]) jump:nt .LBB0_1 + + +%struct.a = type { i8 } + +define void @b() #0 { + %d = alloca %struct.a + %c = getelementptr %struct.a, %struct.a* %d, i32 0, i32 0 + atomicrmw add i8* %c, i8 2 monotonic + ret void +} + +attributes #0 = { "target-cpu"="hexagonv66" } + diff --git a/llvm/test/CodeGen/Hexagon/check-subregister-for-latency.ll b/llvm/test/CodeGen/Hexagon/check-subregister-for-latency.ll new file mode 100644 index 0000000000000..8290f768585dd --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/check-subregister-for-latency.ll @@ -0,0 +1,66 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv67t < %s +; REQUIRES: asserts + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +%s.0 = type { double, double, double, double, double, double, i32, double, double, double, double, i8*, i8, [9 x i8], double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, [200 x i8*], [32 x i8*], [32 x i8], i32 } + +define hidden fastcc void @f0() unnamed_addr #0 { +b0: + %v0 = getelementptr inbounds %s.0, %s.0* null, i32 0, i32 33 + %v1 = getelementptr inbounds %s.0, %s.0* null, i32 0, i32 34 + br label %b1 + +b1: ; preds = %b1, %b0 + %v2 = phi i32* [ undef, %b0 ], [ %v27, %b1 ] + %v3 = load i32, i32* %v2, align 1, !tbaa !1 + %v4 = getelementptr inbounds [0 x %s.0*], [0 x %s.0*]* null, i32 0, i32 %v3 + %v5 = load %s.0*, %s.0** %v4, align 1, !tbaa !5 + %v6 = load double, double* undef, align 1, !tbaa !7 + %v7 = fdiv double 1.000000e+00, %v6 + %v8 = fmul double %v7, 0.000000e+00 + %v9 = fmul double %v7, 0.000000e+00 + %v10 = fmul double %v8, -4.800000e+01 + %v11 = fmul double %v9, 1.680000e+02 + %v12 = fmul double %v7, 0.000000e+00 + %v13 = load double, double* null, align 1, !tbaa !7 + %v14 = fmul double %v7, %v13 + %v15 = fmul double %v12, 0.000000e+00 + %v16 = getelementptr inbounds %s.0, %s.0* %v5, i32 0, i32 30 + %v17 = fsub double 0.000000e+00, %v15 + store double %v17, double* %v16, align 8, !tbaa !9 + %v18 = fmul double %v14, 0.000000e+00 + %v19 = getelementptr inbounds %s.0, %s.0* %v5, i32 0, i32 32 + %v20 = load double, double* %v19, align 8, !tbaa !11 + %v21 = fsub double %v20, %v18 + store double %v21, double* %v19, align 8, !tbaa !11 + %v22 = fmul double %v10, 0.000000e+00 + %v23 = fadd double 0.000000e+00, %v22 + %v24 = fmul double 0.000000e+00, %v11 + %v25 = fadd double %v23, %v24 + %v26 = fsub double 0.000000e+00, %v25 + store double %v26, double* %v0, align 8, !tbaa !12 + store double 0.000000e+00, double* %v1, align 8, !tbaa !13 + %v27 = getelementptr i32, i32* %v2, i32 1 + br label %b1 +} + +attributes #0 = { "use-soft-float"="false" } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"EnableSplitLTOUnit", i32 0} +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !3, i64 0} +!7 = !{!8, !8, i64 0} +!8 = !{!"double", !3, i64 0} +!9 = !{!10, !8, i64 232} +!10 = !{!"", !8, i64 0, !8, i64 8, !8, i64 16, !8, i64 24, !8, i64 32, !8, i64 40, !2, i64 48, !8, i64 56, !8, i64 64, !8, i64 72, !8, i64 80, !6, i64 88, !3, i64 92, !3, i64 93, !8, i64 104, !8, i64 112, !8, i64 120, !8, i64 128, !8, i64 136, !8, i64 144, !8, i64 152, !8, i64 160, !8, i64 168, !8, i64 176, !8, i64 184, !8, i64 192, !8, i64 200, !8, i64 208, !8, i64 216, !8, i64 224, !8, i64 232, !8, i64 240, !8, i64 248, !8, i64 256, !8, i64 264, !8, i64 272, !8, i64 280, !8, i64 288, !8, i64 296, !3, i64 304, !3, i64 1104, !3, i64 1232, !2, i64 1264} +!11 = !{!10, !8, i64 248} +!12 = !{!10, !8, i64 256} +!13 = !{!10, !8, i64 264} diff --git a/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll b/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll new file mode 100644 index 0000000000000..3a52d8546b85a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK-LABEL: danny +; CHECK: memw_locked +define i8 @danny(i8* %a0) unnamed_addr #0 { +start: + %v0 = cmpxchg i8* %a0, i8 0, i8 1 seq_cst seq_cst + %v1 = extractvalue { i8, i1 } %v0, 0 + ret i8 %v1 +} + +; CHECK-LABEL: sammy +; CHECK: memw_locked +define i16 @sammy(i16* %a0) unnamed_addr #0 { +start: + %v0 = cmpxchg i16* %a0, i16 0, i16 1 seq_cst seq_cst + %v1 = extractvalue { i16, i1 } %v0, 0 + ret i16 %v1 +} + +; CHECK-LABEL: kirby +; CHECK: memw_locked +define i32 @kirby(i32* %a0) unnamed_addr #0 { +start: + %v0 = cmpxchg i32* %a0, i32 0, i32 1 seq_cst seq_cst + %v1 = extractvalue { i32, i1 } %v0, 0 + ret i32 %v1 +} diff --git a/llvm/test/CodeGen/Hexagon/regalloc-coal-extend-short-subrange.mir b/llvm/test/CodeGen/Hexagon/regalloc-coal-extend-short-subrange.mir new file mode 100644 index 0000000000000..c60a93f8e33e0 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/regalloc-coal-extend-short-subrange.mir @@ -0,0 +1,45 @@ +# RUN: llc -march=hexagon -run-pass simple-register-coalescing -verify-coalescing %s -o - | FileCheck %s +# +# Check that this doesn't crash. +# CHECK: ENDLOOP + +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $v0, $v1, $v2, $v3 + + %0:hvxwr = COPY $v0 + %1:intregs = COPY $r0 + %2:predregs = C2_cmpgtui %1, 2 + %3:intregs = COPY $r1 + %4:intregs = COPY $r2 + %5:hvxvr = COPY $v1 + %6:hvxwr = V6_vcombine $v1, $v0 + %7:hvxqr = IMPLICIT_DEF + J2_loop0r %bb.1, %3, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + J2_jumpf %2, %bb.2, implicit-def $pc + J2_jump %bb.1, implicit-def $pc + + bb.1: + successors: %bb.2(0x04000000), %bb.1(0x7c000000) + + %8:hvxwr = COPY %0 + %9:hvxvr = V6_vL32b_ai %4, 0 + %10:hvxvr = V6_vL32b_ai %4, 128 + %5:hvxvr = V6_vaddwq %7, %5, %9 + %11:hvxqr = V6_vgtuw %10, %0.vsub_lo + %12:hvxvr = V6_vL32b_ai %4, 256 + %13:hvxvr = V6_vL32b_ai %4, 384 + %5:hvxvr = V6_vaddwq %11, %5, %13 + %7:hvxqr = V6_vgtuw %6.vsub_hi, %13 + %0:hvxwr = COPY %8 + %0:hvxwr = V6_vmpyhv_acc %0, %12, %12 + %6:hvxwr = COPY %8 + ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.2, implicit-def $pc + + bb.2: +... diff --git a/llvm/test/CodeGen/Mips/emit-big-cst.ll b/llvm/test/CodeGen/Mips/emit-big-cst.ll index 9bc96c89307d7..67c2f107db19a 100644 --- a/llvm/test/CodeGen/Mips/emit-big-cst.ll +++ b/llvm/test/CodeGen/Mips/emit-big-cst.ll @@ -1,10 +1,32 @@ -; RUN: llc -march=mips < %s | FileCheck %s +; RUN: llc -march=mips < %s | FileCheck %s --check-prefix=BE +; RUN: llc -march=mipsel < %s | FileCheck %s --check-prefix=LE ; Check assembly printing of odd constants. -; CHECK: bigCst: -; CHECK-NEXT: .8byte 1845068520838224192 -; CHECK-NEXT: .8byte 11776 -; CHECK-NEXT: .size bigCst, 16 +; BE-LABEL: bigCst: +; BE-NEXT: .8byte 28829195638097253 +; BE-NEXT: .2byte 46 +; BE-NEXT: .byte 0 +; BE-NEXT: .space 5 +; BE-NEXT: .size bigCst, 16 + +; LE-LABEL: bigCst: +; LE-NEXT: .8byte 12713950999227904 +; LE-NEXT: .2byte 26220 +; LE-NEXT: .byte 0 +; LE-NEXT: .space 5 +; LE-NEXT: .size bigCst, 16 + +; BE-LABEL: smallCst: +; BE-NEXT: .2byte 4386 +; BE-NEXT: .byte 51 +; BE-NEXT: .space 1 +; BE-NEXT: .size smallCst, 4 + +; LE-LABEL: smallCst: +; LE-NEXT: .2byte 8755 +; LE-NEXT: .byte 17 +; LE-NEXT: .space 1 +; LE-NEXT: .size smallCst, 4 @bigCst = internal constant i82 483673642326615442599424 @@ -15,3 +37,5 @@ define void @accessBig(i64* %storage) { store i82 %tmp, i82* %addr ret void } + +@smallCst = internal constant i24 1122867 diff --git a/llvm/test/CodeGen/Mips/implicit-sret.ll b/llvm/test/CodeGen/Mips/implicit-sret.ll new file mode 100644 index 0000000000000..e86cec37d5100 --- /dev/null +++ b/llvm/test/CodeGen/Mips/implicit-sret.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mips64-unknown-freebsd -O0 -o - %s | FileCheck %s + +; Previously SelectionDAGBuilder would always set alignment to 1 for hidden sret +; parameters so we were generating ldl+ldr/lwl+lwr to load those values even +; though we know that they are aligned (since we allocated an aligned frame index) + +declare dso_local void @use_sret(i32, i128, i64) unnamed_addr +declare dso_local { i32, i128, i64 } @implicit_sret_decl() unnamed_addr + +define internal void @test() unnamed_addr nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: daddiu $sp, $sp, -48 +; CHECK-NEXT: sd $ra, 40($sp) # 8-byte Folded Spill +; CHECK-NEXT: daddiu $4, $sp, 8 +; CHECK-NEXT: jal implicit_sret_decl +; CHECK-NEXT: nop +; CHECK-NEXT: ld $6, 24($sp) +; CHECK-NEXT: ld $5, 16($sp) +; CHECK-NEXT: ld $7, 32($sp) +; CHECK-NEXT: lw $1, 8($sp) +; CHECK-NEXT: # implicit-def: $v0_64 +; CHECK-NEXT: move $2, $1 +; CHECK-NEXT: move $4, $2 +; CHECK-NEXT: jal use_sret +; CHECK-NEXT: nop +; CHECK-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload +; CHECK-NEXT: daddiu $sp, $sp, 48 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: nop +start: + %0 = call { i32, i128, i64 } @implicit_sret_decl() + %1 = extractvalue { i32, i128, i64 } %0, 0 + %2 = extractvalue { i32, i128, i64 } %0, 1 + %3 = extractvalue { i32, i128, i64 } %0, 2 + call void @use_sret(i32 %1, i128 %2, i64 %3) + ret void +} + +define internal { i32, i128, i64 } @implicit_sret_impl() unnamed_addr nounwind { +; CHECK-LABEL: implicit_sret_impl: +; CHECK: # %bb.0: +; CHECK-NEXT: move $1, $4 +; CHECK-NEXT: daddiu $2, $zero, 20 +; CHECK-NEXT: sd $2, 16($4) +; CHECK-NEXT: daddiu $2, $zero, 0 +; CHECK-NEXT: sd $zero, 8($4) +; CHECK-NEXT: daddiu $3, $zero, 30 +; CHECK-NEXT: sd $3, 24($4) +; CHECK-NEXT: addiu $5, $zero, 10 +; CHECK-NEXT: sw $5, 0($4) +; CHECK-NEXT: jr $ra +; CHECK-NEXT: nop + ret { i32, i128, i64 } { i32 10, i128 20, i64 30 } +} + +declare dso_local { i32, i32, i32, i32, i32, i32 } @implicit_sret_decl2() unnamed_addr +declare dso_local void @use_sret2(i32, i32, i32) unnamed_addr +define internal void @test2() unnamed_addr nounwind { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: daddiu $sp, $sp, -32 +; CHECK-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill +; CHECK-NEXT: daddiu $4, $sp, 0 +; CHECK-NEXT: jal implicit_sret_decl2 +; CHECK-NEXT: nop +; CHECK-NEXT: lw $1, 20($sp) +; CHECK-NEXT: lw $2, 12($sp) +; CHECK-NEXT: lw $3, 4($sp) +; CHECK-NEXT: # implicit-def: $a0_64 +; CHECK-NEXT: move $4, $3 +; CHECK-NEXT: # implicit-def: $a1_64 +; CHECK-NEXT: move $5, $2 +; CHECK-NEXT: # implicit-def: $a2_64 +; CHECK-NEXT: move $6, $1 +; CHECK-NEXT: jal use_sret2 +; CHECK-NEXT: nop +; CHECK-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload +; CHECK-NEXT: daddiu $sp, $sp, 32 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: nop +start: + %0 = call { i32, i32, i32, i32, i32, i32 } @implicit_sret_decl2() + %1 = extractvalue { i32, i32, i32, i32, i32, i32 } %0, 1 + %2 = extractvalue { i32, i32, i32, i32, i32, i32 } %0, 3 + %3 = extractvalue { i32, i32, i32, i32, i32, i32 } %0, 5 + call void @use_sret2(i32 %1, i32 %2, i32 %3) + ret void +} + + +define internal { i32, i32, i32, i32, i32, i32 } @implicit_sret_impl2() unnamed_addr nounwind { +; CHECK-LABEL: implicit_sret_impl2: +; CHECK: # %bb.0: +; CHECK-NEXT: move $1, $4 +; CHECK-NEXT: addiu $2, $zero, 6 +; CHECK-NEXT: sw $2, 20($4) +; CHECK-NEXT: addiu $2, $zero, 5 +; CHECK-NEXT: sw $2, 16($4) +; CHECK-NEXT: addiu $2, $zero, 4 +; CHECK-NEXT: sw $2, 12($4) +; CHECK-NEXT: addiu $2, $zero, 3 +; CHECK-NEXT: sw $2, 8($4) +; CHECK-NEXT: addiu $2, $zero, 2 +; CHECK-NEXT: sw $2, 4($4) +; CHECK-NEXT: addiu $2, $zero, 1 +; CHECK-NEXT: sw $2, 0($4) +; CHECK-NEXT: jr $ra +; CHECK-NEXT: nop + ret { i32, i32, i32, i32, i32, i32 } { i32 1, i32 2, i32 3, i32 4, i32 5, i32 6 } +} diff --git a/llvm/test/CodeGen/PowerPC/aix-LinkOnceAnyLinkage.ll b/llvm/test/CodeGen/PowerPC/aix-LinkOnceAnyLinkage.ll new file mode 100644 index 0000000000000..6672976d310bb --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-LinkOnceAnyLinkage.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s + +define linkonce void @_Z3fooIiEvT_() { +entry: + ret void +} + +; CHECK: .weak _Z3fooIiEvT_[DS] +; CHECK: .weak ._Z3fooIiEvT_ diff --git a/llvm/test/CodeGen/PowerPC/aix-LinkOnceODRLinkage.ll b/llvm/test/CodeGen/PowerPC/aix-LinkOnceODRLinkage.ll index 7c9c80b9cc48d..c605e2c0f952b 100644 --- a/llvm/test/CodeGen/PowerPC/aix-LinkOnceODRLinkage.ll +++ b/llvm/test/CodeGen/PowerPC/aix-LinkOnceODRLinkage.ll @@ -9,5 +9,5 @@ entry: ret void } -; CHECK: .weak _Z3fooIiEvT_ +; CHECK: .weak _Z3fooIiEvT_[DS] ; CHECK: .weak ._Z3fooIiEvT_ diff --git a/llvm/test/CodeGen/PowerPC/aix-WeakODRLinkage.ll b/llvm/test/CodeGen/PowerPC/aix-WeakODRLinkage.ll new file mode 100644 index 0000000000000..b3fa229b0089f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-WeakODRLinkage.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s + +define weak_odr void @_Z3fooIiEvT_() { +entry: + ret void +} + +; CHECK: .weak _Z3fooIiEvT_[DS] +; CHECK: .weak ._Z3fooIiEvT_ diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index 97303d25ea0c1..36f070e7f162d 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -2333,7 +2333,7 @@ define void @caller_mix() { ; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 156(1) ; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 160(1) ; ASM32PWR4-DAG: stw [[REG:[0-9]+]], 164(1) -; ASM32PWR4-DAG: bl .mix_floats +; ASM32PWR4: bl .mix_floats ; ASM64PWR4: mflr 0 ; ASM64PWR4-DAG: std 0, 16(1) @@ -2352,9 +2352,4 @@ define void @caller_mix() { ; ASM64PWR4-DAG: std [[REG:[0-9]+]], 200(1) ; ASM64PWR4-DAG: std [[REG:[0-9]+]], 208(1) ; ASM64PWR4-DAG: std [[REG:[0-9]+]], 216(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 224(1) -; ASM64PWR4-DAG: std [[REG:[0-9]+]], 232(1) -; ASM64PWR4-DAG: bl .mix_floats -; ASM64PWR4-DAG: ld [[REGF1:[0-9]+]], 232(1) -; ASM64PWR4-DAG: ld [[REGF2:[0-9]+]], 224(1) -; ASM64PWR4-DAG: blr +; ASM64PWR4: bl .mix_floats diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll deleted file mode 100644 index eca919bee34bb..0000000000000 --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s -; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s - -%struct.S = type { [65 x i8] } - -define void @foo(%struct.S* byval(%struct.S) align 1 %s) { -entry: - ret void -} - -; CHECK: LLVM ERROR: Passing ByVals split between registers and stack not yet implemented. diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll index a87a5899b2935..3f24a43ad6b2c 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll @@ -176,8 +176,6 @@ entry: ret void } -declare void @test_byval_mem3(i32, float, %struct_S57* byval(%struct_S57) align 1) - ; CHECK-LABEL: name: call_test_byval_mem3 ; Confirm the expected memcpy call is independent of the call to test_byval_mem3. @@ -236,6 +234,53 @@ declare void @test_byval_mem3(i32, float, %struct_S57* byval(%struct_S57) align ; ASM64BIT: bl .test_byval_mem3 ; ASM64BIT: addi 1, 1, 128 +define void @test_byval_mem3(i32, float, %struct_S57* byval(%struct_S57) align 1 %s) { +entry: + ret void +} + + +;CHECK-LABEL: name: test_byval_mem3 + +; 32BIT: fixedStack: +; 32BIT-NEXT: - { id: 0, type: default, offset: 32, size: 60, alignment: 16, stack-id: default, + +; 32BIT: bb.0.entry: +; 32BIT-NEXT: liveins: $r5, $r6, $r7, $r8, $r9, $r10 + +; 32BIT-DAG: %2:gprc = COPY $r5 +; 32BIT-DAG: %3:gprc = COPY $r6 +; 32BIT-DAG: %4:gprc = COPY $r7 +; 32BIT-DAG: %5:gprc = COPY $r8 +; 32BIT-DAG: %6:gprc = COPY $r9 +; 32BIT-DAG: %7:gprc = COPY $r10 +; 32BIT-NEXT: STW %2, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0 +; 32BIT-DAG: STW %3, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4 +; 32BIT-DAG: STW %4, 8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8 +; 32BIT-DAG: STW %5, 12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12 +; 32BIT-DAG: STW %6, 16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16 +; 32BIT-DAG: STW %7, 20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20 +; 32BIT-NEXT: BLR implicit $lr, implicit $rm + +; 64BIT: fixedStack: +; 64BIT-NEXT: - { id: 0, type: default, offset: 64, size: 64, alignment: 16, stack-id: default, + +; 64BIT: bb.0.entry +; 64BIT-NEXT: liveins: $x5, $x6, $x7, $x8, $x9, $x10 + +; 64BIT-DAG: %2:g8rc = COPY $x5 +; 64BIT-DAG: %3:g8rc = COPY $x6 +; 64BIT-DAG: %4:g8rc = COPY $x7 +; 64BIT-DAG: %5:g8rc = COPY $x8 +; 64BIT-DAG: %6:g8rc = COPY $x9 +; 64BIT-DAG: %7:g8rc = COPY $x10 +; 64BIT-NEXT: STD %2, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0, align 16) +; 64BIT-DAG: STD %3, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8) +; 64BIT-DAG: STD %4, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16, align 16) +; 64BIT-DAG: STD %5, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24) +; 64BIT-DAG: STD %6, 32, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 32, align 16) +; 64BIT-DAG: STD %7, 40, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 40) +; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm %struct_S31 = type { [31 x i8] } @@ -247,7 +292,6 @@ entry: ret void } -declare void @test_byval_mem4(i32, %struct_S31* byval(%struct_S31) align 1, %struct_S256* byval(%struct_S256) align 1) ; CHECK-LABEL: name: call_test_byval_mem4 @@ -340,3 +384,58 @@ declare void @test_byval_mem4(i32, %struct_S31* byval(%struct_S31) align 1, %str ; ASM64BIT-DAG: ld 10, 16([[REG1]]) ; ASM64BIT: bl .test_byval_mem4 ; ASM64BIT: addi 1, 1, 352 + +define void @test_byval_mem4(i32, %struct_S31* byval(%struct_S31) align 1, %struct_S256* byval(%struct_S256) align 1 %s) { +entry: + ret void +} + +; CHECK-LABEL: name: test_byval_mem4 + +; 32BIT: fixedStack: +; 32BIT: - { id: 0, type: default, offset: 60, size: 256, alignment: 4, stack-id: default, +; 32BIT: - { id: 1, type: default, offset: 28, size: 32, alignment: 4, stack-id: default, +; 32BIT: stack: [] + +; 32BIT: bb.0.entry: +; 32BIT-NEXT: liveins: $r4, $r5, $r6, $r7, $r8, $r9, $r10 + +; 32BIT-DAG: %1:gprc = COPY $r4 +; 32BIT-DAG: %2:gprc = COPY $r5 +; 32BIT-DAG: %3:gprc = COPY $r6 +; 32BIT-DAG: %4:gprc = COPY $r7 +; 32BIT-DAG: %5:gprc = COPY $r8 +; 32BIT-DAG: %6:gprc = COPY $r9 +; 32BIT-DAG: %7:gprc = COPY $r10 +; 32BIT-NEXT: STW %1, 0, %fixed-stack.1 :: (store 4 into %fixed-stack.1 +; 32BIT-DAG: STW %2, 4, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 4 +; 32BIT-DAG: STW %3, 8, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 8 +; 32BIT-DAG: STW %4, 12, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 12 +; 32BIT-DAG: STW %5, 16, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 16 +; 32BIT-DAG: STW %6, 20, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 20 +; 32BIT-DAG: STW %7, 24, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 24 +; 32BIT-NEXT: BLR implicit $lr, implicit $rm + +; 64BIT: fixedStack: +; 64BIT: - { id: 0, type: default, offset: 88, size: 256, alignment: 8, stack-id: default, +; 64BIT: - { id: 1, type: default, offset: 56, size: 32, alignment: 8, stack-id: default, +; 64BIT: stack: [] + +; 64BIT: bb.0.entry: +; 64BIT-NEXT: liveins: $x4, $x5, $x6, $x7, $x8, $x9, $x10 + +; 64BIT-DAG: %1:g8rc = COPY $x4 +; 64BIT-DAG: %2:g8rc = COPY $x5 +; 64BIT-DAG: %3:g8rc = COPY $x6 +; 64BIT-DAG: %4:g8rc = COPY $x7 +; 64BIT-DAG: %5:g8rc = COPY $x8 +; 64BIT-DAG: %6:g8rc = COPY $x9 +; 64BIT-DAG: %7:g8rc = COPY $x10 +; 64BIT-NEXT: STD %1, 0, %fixed-stack.1 :: (store 8 into %fixed-stack.1 +; 64BIT-DAG: STD %2, 8, %fixed-stack.1 :: (store 8 into %fixed-stack.1 + 8 +; 64BIT-DAG: STD %3, 16, %fixed-stack.1 :: (store 8 into %fixed-stack.1 + 16 +; 64BIT-DAG: STD %4, 24, %fixed-stack.1 :: (store 8 into %fixed-stack.1 + 24 +; 64BIT-DAG: STD %5, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0 +; 64BIT-DAG: STD %6, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8 +; 64BIT-DAG: STD %7, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16 +; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll index 535998af9190f..e9155f236c9e1 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll @@ -1,10 +1,10 @@ -; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp \ -; RUN: -mcpu=pwr4 -mattr=-altivec -verify-machineinstrs 2>&1 < %s | FileCheck %s +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp \ +; RUN: -mcpu=pwr4 -mattr=-altivec -verify-machineinstrs 2>&1 < %s | \ +; RUN: FileCheck --check-prefix=CHECK32 %s -; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp \ -; RUN: -mcpu=pwr4 -mattr=-altivec -verify-machineinstrs 2>&1 < %s | FileCheck %s - -; CHECK: LLVM ERROR: Passing ByVals split between registers and stack not yet implemented. +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp \ +; RUN: -mcpu=pwr4 -mattr=-altivec -verify-machineinstrs 2>&1 < %s | \ +; RUN: FileCheck --check-prefix=CHECK64 %s %struct.Spill = type { [12 x i64 ] } @GS = external global %struct.Spill, align 4 @@ -18,3 +18,64 @@ entry: %add = add i64 %a, %b ret i64 %add } + +; CHECK32: name: test +; CHECK32: liveins: +; CHECK32: - { reg: '$r3', virtual-reg: '' } +; CHECK32: - { reg: '$r4', virtual-reg: '' } +; CHECK32: - { reg: '$r5', virtual-reg: '' } +; CHECK32: - { reg: '$r6', virtual-reg: '' } +; CHECK32: - { reg: '$r7', virtual-reg: '' } +; CHECK32: - { reg: '$r8', virtual-reg: '' } +; CHECK32: - { reg: '$r9', virtual-reg: '' } +; CHECK32: - { reg: '$r10', virtual-reg: '' } +; CHECK32: fixedStack: +; CHECK32: - { id: 0, type: default, offset: 24, size: 96, alignment: 8, stack-id: default, +; CHECK32: stack: [] + +; CHECK32: bb.0.entry: +; CHECK32-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + +; CHECK32-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0 +; CHECK32-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4 +; CHECK32-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8 +; CHECK32-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12 +; CHECK32-DAG: STW renamable $r7, 16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16 +; CHECK32-DAG: STW renamable $r8, 20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20 +; CHECK32-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 24 +; CHECK32-DAG: STW killed renamable $r10, 28, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 28 +; CHECK32: renamable $r[[REG1:[0-9]+]] = LWZ 84, %fixed-stack.0 +; CHECK32: renamable $r[[REG2:[0-9]+]] = LWZ 80, %fixed-stack.0 +; CHECK32: renamable $r4 = ADDC killed renamable $r8, killed renamable $r[[REG1]], implicit-def $carry +; CHECK32: renamable $r3 = ADDE killed renamable $r7, killed renamable $r[[REG2]], implicit-def dead $carry, implicit killed $carry +; CHECK32: BLR implicit $lr, implicit $rm, implicit $r3, implicit $r4 + + +; CHECK64: name: test +; CHECK64: liveins: +; CHECK64: - { reg: '$x3', virtual-reg: '' } +; CHECK64: - { reg: '$x4', virtual-reg: '' } +; CHECK64: - { reg: '$x5', virtual-reg: '' } +; CHECK64: - { reg: '$x6', virtual-reg: '' } +; CHECK64: - { reg: '$x7', virtual-reg: '' } +; CHECK64: - { reg: '$x8', virtual-reg: '' } +; CHECK64: - { reg: '$x9', virtual-reg: '' } +; CHECK64: - { reg: '$x10', virtual-reg: '' } +; CHECK64: fixedStack: +; CHECK64: - { id: 0, type: default, offset: 48, size: 96, alignment: 16, stack-id: default, +; CHECK64: stack: [] + +; CHECK64: bb.0.entry: +; CHECK64: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 + +; CHECK64: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0 +; CHECK64: STD killed renamable $x4, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8 +; CHECK64: STD renamable $x5, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16 +; CHECK64: STD killed renamable $x6, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24 +; CHECK64: STD killed renamable $x7, 32, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 32 +; CHECK64: STD killed renamable $x8, 40, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 40 +; CHECK64: STD killed renamable $x9, 48, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 48 +; CHECK64: STD killed renamable $x10, 56, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 56 +; CHECK64: renamable $x[[REG1:[0-9]+]] = LD 80, %fixed-stack.0 +; CHECK64: renamable $x3 = ADD8 killed renamable $x5, killed renamable $x[[REG1]] +; CHECK64: BLR8 implicit $lr8, implicit $rm, implicit $x3 diff --git a/llvm/test/CodeGen/PowerPC/aix-extern-weak.ll b/llvm/test/CodeGen/PowerPC/aix-extern-weak.ll new file mode 100644 index 0000000000000..e2e6ff18aefa6 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-extern-weak.ll @@ -0,0 +1,308 @@ +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec < %s | FileCheck --check-prefixes=COMMON,BIT32 %s + +; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec < %s | FileCheck --check-prefixes=COMMON,BIT64 %s + +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec -filetype=obj -o %t.o < %s +; RUN: llvm-readobj --symbols %t.o | FileCheck --check-prefix=CHECKSYM %s + +; RUN: not --crash llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc64-ibm-aix-xcoff \ +; RUN: -mattr=-altivec -filetype=obj -o %t.o 2>&1 < %s | FileCheck --check-prefix=XCOFF64 %s +; XCOFF64: LLVM ERROR: 64-bit XCOFF object files are not supported yet. + + +@foo_ext_weak_p = global void (...)* bitcast (void ()* @foo_ext_weak_ref to void (...)*) +@b_w = extern_weak global i32 + +declare extern_weak void @foo_ext_weak_ref() + +define i32 @main() { +entry: + %0 = load void (...)*, void (...)** @foo_ext_weak_p + %callee.knr.cast = bitcast void (...)* %0 to void ()* + call void %callee.knr.cast() + call void @foo_ext_weak(i32* @b_w) + ret i32 0 +} + +declare extern_weak void @foo_ext_weak(i32*) + +; COMMON: .globl main[DS] # -- Begin function main +; COMMON-NEXT: .globl .main +; COMMON-NEXT: .align 4 +; COMMON-NEXT: .csect main[DS] +; BIT32-NEXT: .long .main # @main +; BIT32-NEXT: .long TOC[TC0] +; BIT32-NEXT: .long 0 +; BIT64-NEXT: .llong .main # @main +; BIT64-NEXT: .llong TOC[TC0] +; BIT64-NEXT: .llong 0 +; COMMON-NEXT: .csect .text[PR] +; COMMON-NEXT: .main: + +; COMMON: .csect .data[RW] +; COMMON: .globl foo_ext_weak_p +; BIT32-NEXT: .align 2 +; BIT64-NEXT: .align 3 +; COMMON-NEXT: foo_ext_weak_p: +; BIT32-NEXT: .long foo_ext_weak_ref[DS] +; BIT64-NEXT: .llong foo_ext_weak_ref[DS] +; COMMON-NEXT: .weak b_w[UA] +; COMMON-NEXT: .weak foo_ext_weak_ref[DS] +; COMMON-NEXT: .weak .foo_ext_weak +; COMMON-NEXT: .weak foo_ext_weak[DS] +; COMMON-NEXT: .toc +; COMMON-NEXT: LC0: +; COMMON-NEXT: .tc foo_ext_weak_p[TC],foo_ext_weak_p +; COMMON-NEXT: LC1: +; COMMON-NEXT: .tc b_w[TC],b_w[UA] + +; CHECK-NOT: .weak .foo_ext_weak_ref + +; CHECKSYM: Symbols [ +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index:]] +; CHECKSYM-NEXT: Name: .foo_ext_weak +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: N_UNDEF +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_WEAKEXT (0x6F) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+1]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+2]] +; CHECKSYM-NEXT: Name: foo_ext_weak_ref +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: N_UNDEF +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_WEAKEXT (0x6F) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+3]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) +; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+4]] +; CHECKSYM-NEXT: Name: b_w +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: N_UNDEF +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_WEAKEXT (0x6F) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+5]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) +; CHECKSYM-NEXT: StorageMappingClass: XMC_UA (0x4) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+6]] +; CHECKSYM-NEXT: Name: foo_ext_weak +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: N_UNDEF +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_WEAKEXT (0x6F) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+7]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) +; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+8]] +; CHECKSYM-NEXT: Name: .text +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: .text +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+9]] +; CHECKSYM-NEXT: SectionLen: 80 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 4 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+10]] +; CHECKSYM-NEXT: Name: .main +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: .text +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+11]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+8]] +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_LD (0x2) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+12]] +; CHECKSYM-NEXT: Name: .data +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x50 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+13]] +; CHECKSYM-NEXT: SectionLen: 4 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_RW (0x5) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+14]] +; CHECKSYM-NEXT: Name: foo_ext_weak_p +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x50 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+15]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+12]] +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_LD (0x2) +; CHECKSYM-NEXT: StorageMappingClass: XMC_RW (0x5) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+16]] +; CHECKSYM-NEXT: Name: main +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x54 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+17]] +; CHECKSYM-NEXT: SectionLen: 12 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+18]] +; CHECKSYM-NEXT: Name: TOC +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x60 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+19]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_TC0 (0xF) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+20]] +; CHECKSYM-NEXT: Name: foo_ext_weak_p +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x60 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+21]] +; CHECKSYM-NEXT: SectionLen: 4 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+22]] +; CHECKSYM-NEXT: Name: b_w +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x64 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+23]] +; CHECKSYM-NEXT: SectionLen: 4 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: ] diff --git a/llvm/test/CodeGen/PowerPC/aix-extern.ll b/llvm/test/CodeGen/PowerPC/aix-extern.ll new file mode 100644 index 0000000000000..e5ec5db235a6e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-extern.ll @@ -0,0 +1,386 @@ +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec < %s | FileCheck --check-prefixes=COMMON,BIT32 %s + +; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec < %s | FileCheck --check-prefixes=COMMON,BIT64 %s + +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec -filetype=obj -o %t.o < %s +; RUN: llvm-readobj --symbols %t.o | FileCheck --check-prefix=CHECKSYM %s + +; RUN: not --crash llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc64-ibm-aix-xcoff \ +; RUN: -mattr=-altivec -filetype=obj -o %t.o 2>&1 < %s | FileCheck --check-prefix=XCOFF64 %s +; XCOFF64: LLVM ERROR: 64-bit XCOFF object files are not supported yet. + +@bar_p = global i32 (...)* @bar_ref, align 4 +@b_e = external global i32, align 4 + +; Function Attrs: noinline nounwind optnone +define void @foo() { +entry: + ret void +} + +declare i32 @bar_ref(...) + +; Function Attrs: noinline nounwind optnone +define i32 @main() { +entry: + %call = call i32 @bar_extern(i32* @b_e) + call void @foo() + %0 = load i32 (...)*, i32 (...)** @bar_p, align 4 + %callee.knr.cast = bitcast i32 (...)* %0 to i32 ()* + %call1 = call i32 %callee.knr.cast() + %call2 = call i32 bitcast (i32 (...)* @bar_ref to i32 ()*)() + ret i32 0 +} + +declare i32 @bar_extern(i32*) + + +; COMMON: .globl foo[DS] # -- Begin function foo +; COMMON-NEXT: .globl .foo +; COMMON-NEXT: .align 4 +; COMMON-NEXT: .csect foo[DS] +; BIT32-NEXT: .long .foo # @foo +; BIT32-NEXT: .long TOC[TC0] +; BIT32-NEXT: .long 0 +; BIT64-NEXT: .llong .foo # @foo +; BIT64-NEXT: .llong TOC[TC0] +; BIT64-NEXT: .llong 0 +; COMMON-NEXT: .csect .text[PR] +; COMMON-NEXT: .foo: + +; COMMON: .globl main[DS] # -- Begin function main +; COMMON-NEXT: .globl .main +; COMMON-NEXT: .align 4 +; COMMON-NEXT: .csect main[DS] +; BIT32-NEXT: .long .main # @main +; BIT32-NEXT: .long TOC[TC0] +; BIT32-NEXT: .long 0 +; BIT64-NEXT: .llong .main # @main +; BIT64-NEXT: .llong TOC[TC0] +; BIT64-NEXT: .llong 0 +; COMMON-NEXT: .csect .text[PR] +; COMMON-NEXT: .main: + +; COMMON: .csect .data[RW] +; COMMON-NEXT: .globl bar_p +; BIT32-NEXT: .align 2 +; BIT64-NEXT: .align 3 +; COMMON-NEXT: bar_p: +; BIT32-NEXT: .long bar_ref[DS] +; BIT64-NEXT: .llong bar_ref[DS] +; COMMON-NEXT: .extern b_e[UA] +; COMMON-NEXT: .extern .bar_ref +; COMMON-NEXT: .extern bar_ref[DS] +; COMMON-NEXT: .extern .bar_extern +; COMMON-NEXT: .extern bar_extern[DS] +; COMMON-NEXT: .toc +; COMMON-NEXT: LC0: +; COMMON-NEXT: .tc b_e[TC],b_e[UA] +; COMMON-NEXT: LC1: +; COMMON-NEXT: .tc bar_p[TC],bar_p + +; CHECKSYM: Symbols [ +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index:]] +; CHECKSYM-NEXT: Name: .bar_extern +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: N_UNDEF +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+1]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+2]] +; CHECKSYM-NEXT: Name: .bar_ref +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: N_UNDEF +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+3]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+4]] +; CHECKSYM-NEXT: Name: bar_ref +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: N_UNDEF +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+5]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) +; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+6]] +; CHECKSYM-NEXT: Name: b_e +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: N_UNDEF +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+7]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) +; CHECKSYM-NEXT: StorageMappingClass: XMC_UA (0x4) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+8]] +; CHECKSYM-NEXT: Name: bar_extern +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: N_UNDEF +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+9]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) +; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+10]] +; CHECKSYM-NEXT: Name: .text +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: .text +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+11]] +; CHECKSYM-NEXT: SectionLen: 112 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 4 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+12]] +; CHECKSYM-NEXT: Name: .foo +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: .text +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+13]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+10]] +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_LD (0x2) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+14]] +; CHECKSYM-NEXT: Name: .main +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x10 +; CHECKSYM-NEXT: Section: .text +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+15]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+10]] +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_LD (0x2) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+16]] +; CHECKSYM-NEXT: Name: .data +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x70 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+17]] +; CHECKSYM-NEXT: SectionLen: 4 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_RW (0x5) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+18]] +; CHECKSYM-NEXT: Name: bar_p +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x70 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+19]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+16]] +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_LD (0x2) +; CHECKSYM-NEXT: StorageMappingClass: XMC_RW (0x5) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+20]] +; CHECKSYM-NEXT: Name: foo +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x74 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+21]] +; CHECKSYM-NEXT: SectionLen: 12 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+22]] +; CHECKSYM-NEXT: Name: main +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x80 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+23]] +; CHECKSYM-NEXT: SectionLen: 12 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+24]] +; CHECKSYM-NEXT: Name: TOC +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x8C +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+25]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_TC0 (0xF) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+26]] +; CHECKSYM-NEXT: Name: b_e +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x8C +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+27]] +; CHECKSYM-NEXT: SectionLen: 4 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+28]] +; CHECKSYM-NEXT: Name: bar_p +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x90 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+29]] +; CHECKSYM-NEXT: SectionLen: 4 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: ] diff --git a/llvm/test/CodeGen/PowerPC/aix-reference-func-addr-const.ll b/llvm/test/CodeGen/PowerPC/aix-reference-func-addr-const.ll index 88d5f3d1427dc..80bd75b206f27 100644 --- a/llvm/test/CodeGen/PowerPC/aix-reference-func-addr-const.ll +++ b/llvm/test/CodeGen/PowerPC/aix-reference-func-addr-const.ll @@ -20,6 +20,7 @@ entry: ;CHECK-NEXT: .align 2 ;CHECK-NEXT: bar_ptr1: ;CHECK-NEXT: .long bar[DS] +;CHECK-NEXT: .extern foo[DS] ;CHECK64: .csect .data[RW] ;CHECK64-NEXT: .globl foo_ptr @@ -30,3 +31,4 @@ entry: ;CHECK64-NEXT: .align 3 ;CHECK64-NEXT: bar_ptr1: ;CHECK64-NEXT: .llong bar[DS] +;CHECK64-NEXT: .extern foo[DS] diff --git a/llvm/test/CodeGen/PowerPC/aix-weak.ll b/llvm/test/CodeGen/PowerPC/aix-weak.ll new file mode 100644 index 0000000000000..3a7f050049071 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-weak.ll @@ -0,0 +1,362 @@ +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec < %s | FileCheck --check-prefixes=COMMON,BIT32 %s + +; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec < %s | FileCheck --check-prefixes=COMMON,BIT64 %s + +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec -filetype=obj -o %t.o < %s +; RUN: llvm-readobj --symbols %t.o | FileCheck --check-prefix=CHECKSYM %s + +; RUN: not --crash llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc64-ibm-aix-xcoff \ +; RUN: -mattr=-altivec -filetype=obj -o %t.o 2>&1 < %s | FileCheck --check-prefix=XCOFF64 %s +; XCOFF64: LLVM ERROR: 64-bit XCOFF object files are not supported yet. + +@foo_weak_p = global void (...)* bitcast (void ()* @foo_ref_weak to void (...)*), align 4 +@b = weak global i32 0, align 4 + +define weak void @foo_weak(i32* %p) { +entry: + %p.addr = alloca i32*, align 4 + store i32* %p, i32** %p.addr, align 4 + %0 = load i32*, i32** %p.addr, align 4 + %1 = load i32, i32* %0, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* %0, align 4 + ret void +} + +define weak void @foo_ref_weak() { +entry: + ret void +} + +define i32 @main() { +entry: + %0 = load void (...)*, void (...)** @foo_weak_p, align 4 + %callee.knr.cast = bitcast void (...)* %0 to void ()* + call void %callee.knr.cast() + call void @foo_weak(i32* @b) + call void @foo_ref_weak() + ret i32 0 +} + +; COMMON: .weak foo_weak[DS] # -- Begin function foo_weak +; COMMON-NEXT: .weak .foo_weak +; COMMON-NEXT: .align 4 +; COMMON-NEXT: .csect foo_weak[DS] +; BIT32-NEXT: .long .foo_weak # @foo_weak +; BIT32-NEXT: .long TOC[TC0] +; BIT32-NEXT: .long 0 +; BIT64-NEXT: .llong .foo_weak # @foo_weak +; BIT64-NEXT: .llong TOC[TC0] +; BIT64-NEXT: .llong 0 +; COMMON-NEXT: .csect .text[PR] +; COMMON-NEXT: .foo_weak: + +; COMMON: .weak foo_ref_weak[DS] # -- Begin function foo_ref_weak +; COMMON-NEXT: .weak .foo_ref_weak +; COMMON-NEXT: .align 4 +; COMMON-NEXT: .csect foo_ref_weak[DS] +; BIT32-NEXT: .long .foo_ref_weak # @foo_ref_weak +; BIT32-NEXT: .long TOC[TC0] +; BIT32-NEXT: .long 0 +; BIT64-NEXT: .llong .foo_ref_weak # @foo_ref_weak +; BIT64-NEXT: .llong TOC[TC0] +; BIT64-NEXT: .llong 0 +; COMMON-NEXT: .csect .text[PR] +; COMMON-NEXT: .foo_ref_weak: + +; COMMON: .globl main[DS] # -- Begin function main +; COMMON-NEXT: .globl .main +; COMMON-NEXT: .align 4 +; COMMON-NEXT: .csect main[DS] +; BIT32-NEXT: .long .main # @main +; BIT32-NEXT: .long TOC[TC0] +; BIT32-NEXT: .long 0 +; BIT64-NEXT: .llong .main # @main +; BIT64-NEXT: .llong TOC[TC0] +; BIT64-NEXT: .llong 0 +; COMMON-NEXT: .csect .text[PR] +; COMMON-NEXT: .main: + +; COMMON: .csect .data[RW] +; COMMON-NEXT: .globl foo_weak_p +; BIT32-NEXT: .align 2 +; BIT64-NEXT: .align 3 +; COMMON-NEXT: foo_weak_p: +; BIT32-NEXT: .long foo_ref_weak[DS] +; BIT64-NEXT: .llong foo_ref_weak[DS] +; COMMON-NEXT: .weak b +; COMMON-NEXT: .align 2 +; COMMON-NEXT: b: +; COMMON-NEXT: .long 0 # 0x0 +; COMMON-NEXT: .toc +; COMMON-NEXT: LC0: +; COMMON-NEXT: .tc foo_weak_p[TC],foo_weak_p +; COMMON-NEXT: LC1: +; COMMON-NEXT: .tc b[TC],b + + +; CHECKSYM: Symbols [ +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index:]] +; CHECKSYM-NEXT: Name: .text +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: .text +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+1]] +; CHECKSYM-NEXT: SectionLen: 136 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 4 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+2]] +; CHECKSYM-NEXT: Name: .foo_weak +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: .text +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_WEAKEXT (0x6F) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+3]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index]] +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_LD (0x2) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+4]] +; CHECKSYM-NEXT: Name: .foo_ref_weak +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x20 +; CHECKSYM-NEXT: Section: .text +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_WEAKEXT (0x6F) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+5]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index]] +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_LD (0x2) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+6]] +; CHECKSYM-NEXT: Name: .main +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x30 +; CHECKSYM-NEXT: Section: .text +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+7]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index]] +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_LD (0x2) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+8]] +; CHECKSYM-NEXT: Name: .data +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x88 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+9]] +; CHECKSYM-NEXT: SectionLen: 8 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_RW (0x5) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+10]] +; CHECKSYM-NEXT: Name: foo_weak_p +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x88 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+11]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+8]] +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_LD (0x2) +; CHECKSYM-NEXT: StorageMappingClass: XMC_RW (0x5) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+12]] +; CHECKSYM-NEXT: Name: b +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x8C +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_WEAKEXT (0x6F) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+13]] +; CHECKSYM-NEXT: ContainingCsectSymbolIndex: [[#Index+8]] +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_LD (0x2) +; CHECKSYM-NEXT: StorageMappingClass: XMC_RW (0x5) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+14]] +; CHECKSYM-NEXT: Name: foo_weak +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x90 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_WEAKEXT (0x6F) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+15]] +; CHECKSYM-NEXT: SectionLen: 12 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+16]] +; CHECKSYM-NEXT: Name: foo_ref_weak +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x9C +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_WEAKEXT (0x6F) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+17]] +; CHECKSYM-NEXT: SectionLen: 12 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+18]] +; CHECKSYM-NEXT: Name: main +; CHECKSYM-NEXT: Value (RelocatableAddress): 0xA8 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+19]] +; CHECKSYM-NEXT: SectionLen: 12 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_DS (0xA) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+20]] +; CHECKSYM-NEXT: Name: TOC +; CHECKSYM-NEXT: Value (RelocatableAddress): 0xB4 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+21]] +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_TC0 (0xF) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+22]] +; CHECKSYM-NEXT: Name: foo_weak_p +; CHECKSYM-NEXT: Value (RelocatableAddress): 0xB4 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+23]] +; CHECKSYM-NEXT: SectionLen: 4 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: Symbol { +; CHECKSYM-NEXT: Index: [[#Index+24]] +; CHECKSYM-NEXT: Name: b +; CHECKSYM-NEXT: Value (RelocatableAddress): 0xB8 +; CHECKSYM-NEXT: Section: .data +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_HIDEXT (0x6B) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: [[#Index+25]] +; CHECKSYM-NEXT: SectionLen: 4 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 2 +; CHECKSYM-NEXT: SymbolType: XTY_SD (0x1) +; CHECKSYM-NEXT: StorageMappingClass: XMC_TC (0x3) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: ] diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-const.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-const.ll index 5c1f55f4aac44..f987a20ccbab9 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-const.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-const.ll @@ -25,15 +25,15 @@ entry: ;CHECK: .csect .rodata[RO] ;CHECK-NEXT: .align 4 ;CHECK-NEXT: .L__const.main.cnst32: -;CHECK-NEXT: .llong 4611686018427387954 # 0x4000000000000032 +;CHECK-NEXT: .llong 4611686018427387954 ;CHECK-NEXT: .long 0 # 0x0 ;CHECK-NEXT: .space 4 -;CHECK-NEXT: .llong 0 # 0x0 +;CHECK-NEXT: .llong 0 ;CHECK-NEXT: .long 0 # 0x0 ;CHECK-NEXT: .space 4 ;CHECK-NEXT: .align 3 ;CHECK-NEXT: .L__const.main.cnst16: -;CHECK-NEXT: .llong 4611686018427387926 # 0x4000000000000016 +;CHECK-NEXT: .llong 4611686018427387926 ;CHECK-NEXT: .long 0 # 0x0 ;CHECK-NEXT: .space 4 ;CHECK-NEXT: .align 3 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll index 04ce0f9307e4d..3aa1f8636854c 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll @@ -36,7 +36,7 @@ declare i32 @bar(i32) ; OBJ-NEXT: NumberOfSections: 2 ; OBJ-NEXT: TimeStamp: None (0x0) ; OBJ-NEXT: SymbolTableOffset: 0x13C -; OBJ-NEXT: SymbolTableEntries: 24 +; OBJ-NEXT: SymbolTableEntries: 26 ; OBJ-NEXT: OptionalHeaderSize: 0x0 ; OBJ-NEXT: Flags: 0x0 ; OBJ-NEXT: } @@ -86,7 +86,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { ; RELOC-NEXT: Virtual Address: 0x1A -; RELOC-NEXT: Symbol: globalA (20) +; RELOC-NEXT: Symbol: globalA (22) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 16 @@ -94,7 +94,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { ; RELOC-NEXT: Virtual Address: 0x1E -; RELOC-NEXT: Symbol: globalB (22) +; RELOC-NEXT: Symbol: globalB (24) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 16 @@ -104,7 +104,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: Section (index: 2) .data { ; RELOC-NEXT: Relocation { ; RELOC-NEXT: Virtual Address: 0x70 -; RELOC-NEXT: Symbol: arr (12) +; RELOC-NEXT: Symbol: arr (14) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 32 @@ -112,7 +112,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { ; RELOC-NEXT: Virtual Address: 0x74 -; RELOC-NEXT: Symbol: .foo (4) +; RELOC-NEXT: Symbol: .foo (6) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 32 @@ -120,7 +120,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { ; RELOC-NEXT: Virtual Address: 0x78 -; RELOC-NEXT: Symbol: TOC (18) +; RELOC-NEXT: Symbol: TOC (20) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 32 @@ -128,7 +128,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { ; RELOC-NEXT: Virtual Address: 0x80 -; RELOC-NEXT: Symbol: globalA (8) +; RELOC-NEXT: Symbol: globalA (10) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 32 @@ -136,7 +136,7 @@ declare i32 @bar(i32) ; RELOC-NEXT: } ; RELOC-NEXT: Relocation { ; RELOC-NEXT: Virtual Address: 0x84 -; RELOC-NEXT: Symbol: globalB (10) +; RELOC-NEXT: Symbol: globalB (12) ; RELOC-NEXT: IsSigned: No ; RELOC-NEXT: FixupBitValue: 0 ; RELOC-NEXT: Length: 32 @@ -168,6 +168,26 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: Symbol { ; SYM-NEXT: Index: 2 +; SYM-NEXT: Name: bar +; SYM-NEXT: Value (RelocatableAddress): 0x0 +; SYM-NEXT: Section: N_UNDEF +; SYM-NEXT: Type: 0x0 +; SYM-NEXT: StorageClass: C_EXT (0x2) +; SYM-NEXT: NumberOfAuxEntries: 1 +; SYM-NEXT: CSECT Auxiliary Entry { +; SYM-NEXT: Index: 3 +; SYM-NEXT: SectionLen: 0 +; SYM-NEXT: ParameterHashIndex: 0x0 +; SYM-NEXT: TypeChkSectNum: 0x0 +; SYM-NEXT: SymbolAlignmentLog2: 0 +; SYM-NEXT: SymbolType: XTY_ER (0x0) +; SYM-NEXT: StorageMappingClass: XMC_DS (0xA) +; SYM-NEXT: StabInfoIndex: 0x0 +; SYM-NEXT: StabSectNum: 0x0 +; SYM-NEXT: } +; SYM-NEXT: } +; SYM-NEXT: Symbol { +; SYM-NEXT: Index: 4 ; SYM-NEXT: Name: .text ; SYM-NEXT: Value (RelocatableAddress): 0x0 ; SYM-NEXT: Section: .text @@ -175,7 +195,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: 3 +; SYM-NEXT: Index: 5 ; SYM-NEXT: SectionLen: 64 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 @@ -187,7 +207,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: 4 +; SYM-NEXT: Index: 6 ; SYM-NEXT: Name: .foo ; SYM-NEXT: Value (RelocatableAddress): 0x0 ; SYM-NEXT: Section: .text @@ -195,8 +215,8 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: 5 -; SYM-NEXT: ContainingCsectSymbolIndex: 2 +; SYM-NEXT: Index: 7 +; SYM-NEXT: ContainingCsectSymbolIndex: 4 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 0 @@ -207,7 +227,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: 6 +; SYM-NEXT: Index: 8 ; SYM-NEXT: Name: .data ; SYM-NEXT: Value (RelocatableAddress): 0x40 ; SYM-NEXT: Section: .data @@ -215,7 +235,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: 7 +; SYM-NEXT: Index: 9 ; SYM-NEXT: SectionLen: 52 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 @@ -227,7 +247,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: 8 +; SYM-NEXT: Index: 10 ; SYM-NEXT: Name: globalA ; SYM-NEXT: Value (RelocatableAddress): 0x40 ; SYM-NEXT: Section: .data @@ -235,8 +255,8 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: 9 -; SYM-NEXT: ContainingCsectSymbolIndex: 6 +; SYM-NEXT: Index: 11 +; SYM-NEXT: ContainingCsectSymbolIndex: 8 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 0 @@ -247,7 +267,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: 10 +; SYM-NEXT: Index: 12 ; SYM-NEXT: Name: globalB ; SYM-NEXT: Value (RelocatableAddress): 0x44 ; SYM-NEXT: Section: .data @@ -255,8 +275,8 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: 11 -; SYM-NEXT: ContainingCsectSymbolIndex: 6 +; SYM-NEXT: Index: 13 +; SYM-NEXT: ContainingCsectSymbolIndex: 8 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 0 @@ -267,7 +287,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: 12 +; SYM-NEXT: Index: 14 ; SYM-NEXT: Name: arr ; SYM-NEXT: Value (RelocatableAddress): 0x48 ; SYM-NEXT: Section: .data @@ -275,8 +295,8 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: 13 -; SYM-NEXT: ContainingCsectSymbolIndex: 6 +; SYM-NEXT: Index: 15 +; SYM-NEXT: ContainingCsectSymbolIndex: 8 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 0 @@ -287,7 +307,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: 14 +; SYM-NEXT: Index: 16 ; SYM-NEXT: Name: p ; SYM-NEXT: Value (RelocatableAddress): 0x70 ; SYM-NEXT: Section: .data @@ -295,8 +315,8 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: 15 -; SYM-NEXT: ContainingCsectSymbolIndex: 6 +; SYM-NEXT: Index: 17 +; SYM-NEXT: ContainingCsectSymbolIndex: 8 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 ; SYM-NEXT: SymbolAlignmentLog2: 0 @@ -307,7 +327,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: 16 +; SYM-NEXT: Index: 18 ; SYM-NEXT: Name: foo ; SYM-NEXT: Value (RelocatableAddress): 0x74 ; SYM-NEXT: Section: .data @@ -315,7 +335,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_EXT (0x2) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: 17 +; SYM-NEXT: Index: 19 ; SYM-NEXT: SectionLen: 12 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 @@ -327,7 +347,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: 18 +; SYM-NEXT: Index: 20 ; SYM-NEXT: Name: TOC ; SYM-NEXT: Value (RelocatableAddress): 0x80 ; SYM-NEXT: Section: .data @@ -335,7 +355,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: 19 +; SYM-NEXT: Index: 21 ; SYM-NEXT: SectionLen: 0 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 @@ -347,7 +367,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: 20 +; SYM-NEXT: Index: 22 ; SYM-NEXT: Name: globalA ; SYM-NEXT: Value (RelocatableAddress): 0x80 ; SYM-NEXT: Section: .data @@ -355,7 +375,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: 21 +; SYM-NEXT: Index: 23 ; SYM-NEXT: SectionLen: 4 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 @@ -367,7 +387,7 @@ declare i32 @bar(i32) ; SYM-NEXT: } ; SYM-NEXT: } ; SYM-NEXT: Symbol { -; SYM-NEXT: Index: 22 +; SYM-NEXT: Index: 24 ; SYM-NEXT: Name: globalB ; SYM-NEXT: Value (RelocatableAddress): 0x84 ; SYM-NEXT: Section: .data @@ -375,7 +395,7 @@ declare i32 @bar(i32) ; SYM-NEXT: StorageClass: C_HIDEXT (0x6B) ; SYM-NEXT: NumberOfAuxEntries: 1 ; SYM-NEXT: CSECT Auxiliary Entry { -; SYM-NEXT: Index: 23 +; SYM-NEXT: Index: 25 ; SYM-NEXT: SectionLen: 4 ; SYM-NEXT: ParameterHashIndex: 0x0 ; SYM-NEXT: TypeChkSectNum: 0x0 diff --git a/llvm/test/CodeGen/PowerPC/expand-isel-liveness.mir b/llvm/test/CodeGen/PowerPC/expand-isel-liveness.mir index 861b385deb24d..262e71d48fc09 100644 --- a/llvm/test/CodeGen/PowerPC/expand-isel-liveness.mir +++ b/llvm/test/CodeGen/PowerPC/expand-isel-liveness.mir @@ -38,14 +38,14 @@ body: | ; CHECK-LABEL: name: expand_isel_liveness1 ; CHECK: bb.1: - ; CHECK: liveins: $x7 + ; CHECK: liveins: $x3, $x4, $x7 ; CHECK: renamable $x5 = ORI8 killed renamable $x7, 0 ; CHECK: B %bb.3 ; CHECK: bb.2: - ; CHECK: liveins: $zero8 + ; CHECK: liveins: $x3, $x4 ; CHECK: renamable $x5 = ADDI8 $zero8, 0 ; CHECK: bb.3: - ; CHECK: liveins: $x3, $x4, $x5, $x6, $cr1lt, $cr1gt, $x3, $cr6lt, $cr0eq, $r3, $cr5un, $cr1eq, $cr1un, $cr6un, $cr0lt, $cr0gt, $cr6gt, $cr0un, $cr1, $cr6, $cr5eq, $x8, $r8, $cr6eq, $x4, $r4, $cr0, $cr5gt, $cr5, $cr5lt, $x7, $r7, $x5, $r5, $x5, $zero8, $x7, $cr5lt + ; CHECK: liveins: $x3, $x4, $x5 ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5 ... @@ -74,7 +74,7 @@ body: | ; CHECK: $r3 = ORI killed $r0, 0 ; CHECK: B %bb.3 ; CHECK: bb.2.entry: - ; CHECK: liveins: $zero + ; CHECK-NOT: liveins: $zero ; CHECK: $r3 = ADDI $zero, 0 ... diff --git a/llvm/test/CodeGen/PowerPC/machine-combiner.ll b/llvm/test/CodeGen/PowerPC/machine-combiner.ll index 47df8921fb6fa..cc8a2ec5de720 100644 --- a/llvm/test/CodeGen/PowerPC/machine-combiner.ll +++ b/llvm/test/CodeGen/PowerPC/machine-combiner.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR -; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX +; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR +; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -14,9 +14,9 @@ define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { ; CHECK: fadds 1, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd float %x0, %x1 - %t1 = fadd float %t0, %x2 - %t2 = fadd float %t1, %x3 + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %t1, %x3 ret float %t2 } @@ -28,9 +28,9 @@ define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { ; CHECK: fadds 1, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd float %x0, %x1 - %t1 = fadd float %x2, %t0 - %t2 = fadd float %t1, %x3 + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %x2, %t0 + %t2 = fadd reassoc nsz float %t1, %x3 ret float %t2 } @@ -42,9 +42,9 @@ define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { ; CHECK: fadds 1, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd float %x0, %x1 - %t1 = fadd float %t0, %x2 - %t2 = fadd float %x3, %t1 + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %x3, %t1 ret float %t2 } @@ -56,9 +56,9 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { ; CHECK: fadds 1, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd float %x0, %x1 - %t1 = fadd float %x2, %t0 - %t2 = fadd float %x3, %t1 + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %x2, %t0 + %t2 = fadd reassoc nsz float %x3, %t1 ret float %t2 } @@ -77,13 +77,13 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa ; CHECK: fadds 1, [[REG2]], 8 ; CHECK-NEXT: blr - %t0 = fadd float %x0, %x1 - %t1 = fadd float %t0, %x2 - %t2 = fadd float %t1, %x3 - %t3 = fadd float %t2, %x4 - %t4 = fadd float %t3, %x5 - %t5 = fadd float %t4, %x6 - %t6 = fadd float %t5, %x7 + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %t1, %x3 + %t3 = fadd reassoc nsz float %t2, %x4 + %t4 = fadd reassoc nsz float %t3, %x5 + %t5 = fadd reassoc nsz float %t4, %x6 + %t6 = fadd reassoc nsz float %t5, %x7 ret float %t6 } @@ -100,9 +100,9 @@ define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, < ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd <4 x float> %x0, %x1 - %t1 = fadd <4 x float> %t0, %x2 - %t2 = fadd <4 x float> %t1, %x3 + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %t0, %x2 + %t2 = fadd reassoc nsz <4 x float> %t1, %x3 ret <4 x float> %t2 } @@ -117,9 +117,9 @@ define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, < ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd <4 x float> %x0, %x1 - %t1 = fadd <4 x float> %x2, %t0 - %t2 = fadd <4 x float> %t1, %x3 + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %x2, %t0 + %t2 = fadd reassoc nsz <4 x float> %t1, %x3 ret <4 x float> %t2 } @@ -134,9 +134,9 @@ define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, < ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd <4 x float> %x0, %x1 - %t1 = fadd <4 x float> %t0, %x2 - %t2 = fadd <4 x float> %x3, %t1 + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %t0, %x2 + %t2 = fadd reassoc nsz <4 x float> %x3, %t1 ret <4 x float> %t2 } @@ -151,9 +151,9 @@ define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, < ; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] ; CHECK-NEXT: blr - %t0 = fadd <4 x float> %x0, %x1 - %t1 = fadd <4 x float> %x2, %t0 - %t2 = fadd <4 x float> %x3, %t1 + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %x2, %t0 + %t2 = fadd reassoc nsz <4 x float> %x3, %t1 ret <4 x float> %t2 } diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll index c145b5c4378d1..957a2d5e48f24 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll @@ -7,13 +7,11 @@ ; CHECK-S-LABEL: caller -; CHECK-S: bl callee@notoc -; CHECK-S: blr +; CHECK-S: b callee@notoc ; CHECK-O-LABEL: caller -; CHECK-O: bl +; CHECK-O: b ; CHECK-O-NEXT: R_PPC64_REL24_NOTOC callee -; CHECK-O: blr define dso_local signext i32 @caller() local_unnamed_addr { entry: %call = tail call signext i32 bitcast (i32 (...)* @callee to i32 ()*)() @@ -25,13 +23,11 @@ declare signext i32 @callee(...) local_unnamed_addr ; Some calls can be considered Extrnal Symbols. ; CHECK-S-LABEL: ExternalSymbol -; CHECK-S: bl memcpy@notoc -; CHECK-S: blr +; CHECK-S: b memcpy@notoc ; CHECK-O-LABEL: ExternalSymbol -; CHECK-O: bl +; CHECK-O: b ; CHECK-O-NEXT: R_PPC64_REL24_NOTOC memcpy -; CHECK-O: blr define dso_local void @ExternalSymbol(i8* nocapture %out, i8* nocapture readonly %in, i64 %num) local_unnamed_addr { entry: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %out, i8* align 1 %in, i64 %num, i1 false) diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll index 010704f546d0a..bb6b5052ee588 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll @@ -193,19 +193,10 @@ define dso_local signext i32 @TailCallLocal1(i32 signext %a) local_unnamed_addr ; CHECK-ALL-LABEL: TailCallLocal1: ; CHECK-S: .localentry TailCallLocal1 ; CHECK-S: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: plwz r4, globalVar@PCREL(0), 1 +; CHECK-S: plwz r4, globalVar@PCREL(0), 1 ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 -; CHECK-S-NEXT: bl localCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: b localCall@notoc entry: %0 = load i32, i32* @globalVar, align 4 %add = add nsw i32 %0, %a @@ -217,20 +208,11 @@ define dso_local signext i32 @TailCallLocal2(i32 signext %a) local_unnamed_addr ; CHECK-ALL-LABEL: TailCallLocal2: ; CHECK-S: .localentry TailCallLocal2 ; CHECK-S: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 -; CHECK-S-NEXT: bl localCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: b localCall@notoc entry: %0 = load i32, i32* @externGlobalVar, align 4 %add = add nsw i32 %0, %a @@ -243,16 +225,7 @@ define dso_local signext i32 @TailCallLocalNoGlobal(i32 signext %a) local_unname ; CHECK-S: .localentry TailCallLocalNoGlobal, 1 ; CHECK-P9: .localentry TailCallLocalNoGlobal, .Lfunc_lep9-.Lfunc_gep9 ; CHECK-ALL: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: bl localCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S: b localCall@notoc entry: %call = tail call signext i32 @localCall(i32 signext %a) ret i32 %call @@ -262,19 +235,10 @@ define dso_local signext i32 @TailCallExtern1(i32 signext %a) local_unnamed_addr ; CHECK-ALL-LABEL: TailCallExtern1: ; CHECK-S: .localentry TailCallExtern1 ; CHECK-S: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: plwz r4, globalVar@PCREL(0), 1 +; CHECK-S: plwz r4, globalVar@PCREL(0), 1 ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 -; CHECK-S-NEXT: bl externCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: b externCall@notoc entry: %0 = load i32, i32* @globalVar, align 4 %add = add nsw i32 %0, %a @@ -286,20 +250,11 @@ define dso_local signext i32 @TailCallExtern2(i32 signext %a) local_unnamed_addr ; CHECK-ALL-LABEL: TailCallExtern2: ; CHECK-S: .localentry TailCallExtern2 ; CHECK-S: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 -; CHECK-S-NEXT: bl externCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: b externCall@notoc entry: %0 = load i32, i32* @externGlobalVar, align 4 %add = add nsw i32 %0, %a @@ -311,16 +266,8 @@ define dso_local signext i32 @TailCallExternNoGlobal(i32 signext %a) local_unnam ; CHECK-ALL-LABEL: TailCallExternNoGlobal: ; CHECK-S: .localentry TailCallExternNoGlobal, 1 ; CHECK-S-NEXT: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: bl externCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: b externCall@notoc +; CHECK-S-NEXT: #TC_RETURNd8 externCall@notoc entry: %call = tail call signext i32 @externCall(i32 signext %a) ret i32 %call @@ -443,18 +390,10 @@ entry: define dso_local signext i32 @IndirectCallOnly(i32 signext %a, i32 (i32)* nocapture %call_param) local_unnamed_addr { ; CHECK-ALL-LABEL: IndirectCallOnly: ; CHECK-S: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 ; CHECK-S-NEXT: mtctr r4 ; CHECK-S-NEXT: mr r12, r4 -; CHECK-S-NEXT: bctrl -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: bctr +; CHECK-S-NEXT: #TC_RETURNr8 ctr entry: %call = tail call signext i32 %call_param(i32 signext %a) ret i32 %call diff --git a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll index a3404a8951a19..7f7659b356ee7 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll @@ -215,20 +215,13 @@ entry: define dso_local void @ReadFuncPtr() local_unnamed_addr { ; CHECK-LABEL: ReadFuncPtr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r0, 16(r1) -; CHECK-NEXT: stdu r1, -32(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK: .localentry ReadFuncPtr, 1 +; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 -; CHECK-NEXT: bctrl -; CHECK-NEXT: addi r1, r1, 32 -; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: mtlr r0 -; CHECK-NEXT: blr +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 entry: %0 = load void ()*, void ()** bitcast (void (...)** @ptrfunc to void ()**), align 8 tail call void %0() diff --git a/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll b/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll index 7806d691c5309..d7df6f10a6be1 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll @@ -9,20 +9,10 @@ define dso_local void @IndirectCallExternFuncPtr(void ()* nocapture %ptrfunc) { ; CHECK-LABEL: IndirectCallExternFuncPtr: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r0, 16(r1) -; CHECK-NEXT: stdu r1, -32(r1) - -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: mr r12, r3 -; CHECK-NEXT: bctrl - -; CHECK-NEXT: addi r1, r1, 32 -; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: mtlr r0 -; CHECK-NEXT: blr +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr entry: tail call void %ptrfunc() ret void diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll new file mode 100644 index 0000000000000..78b01601f1faa --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll @@ -0,0 +1,237 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; The tests check the behaviour of PC Relative tail calls. When using +; PC Relative we are able to do more tail calling than we have done in +; the past as we no longer need to restore the TOC pointer into R2 after +; most calls. + +@Func = external local_unnamed_addr global i32 (...)*, align 8 +@FuncLocal = common dso_local local_unnamed_addr global i32 (...)* null, align 8 + +; No calls in this function but we assign the function pointers. +define dso_local void @AssignFuncPtr() local_unnamed_addr { +; CHECK-LABEL: AssignFuncPtr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 +; CHECK-NEXT: pld r4, Function@got@pcrel(0), 1 +; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: pstd r4, FuncLocal@PCREL(0), 1 +; CHECK-NEXT: blr +entry: + store i32 (...)* @Function, i32 (...)** @Func, align 8 + store i32 (...)* @Function, i32 (...)** @FuncLocal, align 8 + ret void +} + +declare signext i32 @Function(...) + +define dso_local void @TailCallLocalFuncPtr() local_unnamed_addr { +; CHECK-LABEL: TailCallLocalFuncPtr: +; CHECK: .localentry TailCallLocalFuncPtr, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: pld r12, FuncLocal@PCREL(0), 1 +; CHECK-NEXT: mtctr r12 +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 +entry: + %0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @FuncLocal to i32 ()**), align 8 + %call = tail call signext i32 %0() + ret void +} + +define dso_local void @TailCallExtrnFuncPtr() local_unnamed_addr { +; CHECK-LABEL: TailCallExtrnFuncPtr: +; CHECK: .localentry TailCallExtrnFuncPtr, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 +; CHECK-NEXT: ld r12, 0(r3) +; CHECK-NEXT: mtctr r12 +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 +entry: + %0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @Func to i32 ()**), align 8 + %call = tail call signext i32 %0() + ret void +} + +define dso_local signext i32 @TailCallParamFuncPtr(i32 (...)* nocapture %passedfunc) local_unnamed_addr { +; CHECK-LABEL: TailCallParamFuncPtr: +; CHECK: .localentry TailCallParamFuncPtr, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: mr r12, r3 +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 +entry: + %callee.knr.cast = bitcast i32 (...)* %passedfunc to i32 ()* + %call = tail call signext i32 %callee.knr.cast() + ret i32 %call +} + +define dso_local signext i32 @NoTailIndirectCall(i32 (...)* nocapture %passedfunc, i32 signext %a) local_unnamed_addr { +; CHECK-LABEL: NoTailIndirectCall: +; CHECK: .localentry NoTailIndirectCall, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: mr r12, r3 +; CHECK-NEXT: mr r30, r4 +; CHECK-NEXT: bctrl +; CHECK-NEXT: add r3, r3, r30 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %callee.knr.cast = bitcast i32 (...)* %passedfunc to i32 ()* + %call = tail call signext i32 %callee.knr.cast() + %add = add nsw i32 %call, %a + ret i32 %add +} + +define dso_local signext i32 @TailCallDirect() local_unnamed_addr { +; CHECK-LABEL: TailCallDirect: +; CHECK: .localentry TailCallDirect, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: b Function@notoc +; CHECK-NEXT: #TC_RETURNd8 Function@notoc 0 +entry: + %call = tail call signext i32 bitcast (i32 (...)* @Function to i32 ()*)() + ret i32 %call +} + +define dso_local signext i32 @NoTailCallDirect(i32 signext %a) local_unnamed_addr { +; CHECK-LABEL: NoTailCallDirect: +; CHECK: .localentry NoTailCallDirect, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl Function@notoc +; CHECK-NEXT: add r3, r3, r30 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %call = tail call signext i32 bitcast (i32 (...)* @Function to i32 ()*)() + %add = add nsw i32 %call, %a + ret i32 %add +} + +define dso_local signext i32 @TailCallDirectLocal() local_unnamed_addr { +; CHECK-LABEL: TailCallDirectLocal: +; CHECK: .localentry TailCallDirectLocal, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: b LocalFunction@notoc +; CHECK-NEXT: #TC_RETURNd8 LocalFunction@notoc 0 +entry: + %call = tail call fastcc signext i32 @LocalFunction() + ret i32 %call +} + +define dso_local signext i32 @NoTailCallDirectLocal(i32 signext %a) local_unnamed_addr { +; CHECK-LABEL: NoTailCallDirectLocal: +; CHECK: .localentry NoTailCallDirectLocal, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl LocalFunction@notoc +; CHECK-NEXT: add r3, r3, r30 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %call = tail call fastcc signext i32 @LocalFunction() + %add = add nsw i32 %call, %a + ret i32 %add +} + +define dso_local signext i32 @TailCallAbs() local_unnamed_addr { +; CHECK-LABEL: TailCallAbs: +; CHECK: .localentry TailCallAbs, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: li r3, 400 +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: li r12, 400 +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 +entry: + %call = tail call signext i32 inttoptr (i64 400 to i32 ()*)() + ret i32 %call +} + +define dso_local signext i32 @NoTailCallAbs(i32 signext %a) local_unnamed_addr { +; CHECK-LABEL: NoTailCallAbs: +; CHECK: .localentry NoTailCallAbs, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: li r3, 400 +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: li r12, 400 +; CHECK-NEXT: bctrl +; CHECK-NEXT: add r3, r3, r30 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %call = tail call signext i32 inttoptr (i64 400 to i32 ()*)() + %add = add nsw i32 %call, %a + ret i32 %add +} + +; Function Attrs: noinline +; This function should be tail called and not inlined. +define internal fastcc signext i32 @LocalFunction() unnamed_addr #0 { +; CHECK-LABEL: LocalFunction: +; CHECK: .localentry LocalFunction, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: li r3, 42 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 asm "li $0, 42", "=&r"() + ret i32 %0 +} + +attributes #0 = { noinline } + diff --git a/llvm/test/CodeGen/PowerPC/ppc64-toc.ll b/llvm/test/CodeGen/PowerPC/ppc64-toc.ll index 728f5c7765f86..121aa09750230 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-toc.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-toc.ll @@ -1,4 +1,6 @@ ; RUN: llc -verify-machineinstrs -code-model=small < %s | FileCheck %s +; Test with null streamer. +; RUN: llc -O0 -filetype=null < %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/PowerPC/qpx-qvfmadd.ll b/llvm/test/CodeGen/PowerPC/qpx-qvfmadd.ll new file mode 100644 index 0000000000000..4c86a876a4795 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-qvfmadd.ll @@ -0,0 +1,79 @@ +; RUN: llc -verify-machineinstrs -stop-after=finalize-isel < %s -mcpu=a2q | FileCheck %s +target triple = "powerpc64-bgq-linux" + +define <2 x double> @test_qvfmadd(<2 x double> %0, <2 x double> %1, <2 x double> %2) { +; CHECK: test_qvfmadd +; CHECK: QVFMADD %2, %0, %1, implicit $rm +; + %4 = fmul reassoc nsz <2 x double> %2, %1 + %5 = fadd reassoc nsz <2 x double> %4, %0 + ret <2 x double> %5 +} + +define <4 x float> @test_qvfmadds(<4 x float> %0, <4 x float> %1, <4 x float> %2) { +; CHECK: test_qvfmadds +; CHECK: QVFMADDSs %2, %0, %1, implicit $rm +; + %4 = fmul reassoc nsz <4 x float> %2, %1 + %5 = fadd reassoc nsz <4 x float> %4, %0 + ret <4 x float> %5 +} + +define <2 x double> @test_qvfnmadd(<2 x double> %0, <2 x double> %1, <2 x double> %2) { +; CHECK: test_qvfnmadd +; CHECK: QVFNMADD %2, %0, %1, implicit $rm +; + %4 = fmul reassoc nsz <2 x double> %2, %1 + %5 = fadd reassoc nsz <2 x double> %4, %0 + %6 = fneg reassoc nsz <2 x double> %5 + ret <2 x double> %6 +} + +define <4 x float> @test_qvfnmadds(<4 x float> %0, <4 x float> %1, <4 x float> %2) { +; CHECK: test_qvfnmadds +; CHECK: QVFNMADDSs %2, %0, %1, implicit $rm +; + %4 = fmul reassoc nsz <4 x float> %2, %1 + %5 = fadd reassoc nsz <4 x float> %4, %0 + %6 = fneg reassoc nsz <4 x float> %5 + ret <4 x float> %6 +} + +define <2 x double> @test_qvfmsub(<2 x double> %0, <2 x double> %1, <2 x double> %2) { +; CHECK: test_qvfmsub +; CHECK: QVFMSUB %2, %0, %1, implicit $rm +; + %4 = fmul reassoc nsz <2 x double> %2, %1 + %5 = fsub reassoc nsz <2 x double> %4, %0 + ret <2 x double> %5 +} + +define <4 x float> @test_qvfmsubs(<4 x float> %0, <4 x float> %1, <4 x float> %2) { +; CHECK: test_qvfmsubs +; CHECK: QVFMSUBSs %2, %0, %1, implicit $rm +; + %4 = fmul reassoc nsz <4 x float> %2, %1 + %5 = fsub reassoc nsz <4 x float> %4, %0 + ret <4 x float> %5 +} + +define <2 x double> @test_qvfnmsub(<2 x double> %0, <2 x double> %1, <2 x double> %2) { +; CHECK: test_qvfnmsub +; CHECK: QVFNMSUB %2, %0, %1, implicit $rm +; + %4 = fmul reassoc nsz <2 x double> %2, %1 + %5 = fsub reassoc nsz <2 x double> %4, %0 + %6 = fneg reassoc nsz <2 x double> %5 + ret <2 x double> %6 +} + +define <4 x float> @test_qvfnmsubs(<4 x float> %0, <4 x float> %1, <4 x float> %2) { +; CHECK: test_qvfnmsubs +; CHECK: QVFNMSUBSs %2, %0, %1, implicit $rm +; + %4 = fmul reassoc nsz <4 x float> %2, %1 + %5 = fsub reassoc nsz <4 x float> %4, %0 + %6 = fneg reassoc nsz <4 x float> %5 + ret <4 x float> %6 +} + diff --git a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll index 2fdaa1fec8217..4f3abd2f60d68 100644 --- a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll +++ b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll @@ -65,8 +65,8 @@ define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind { ; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l ; CHECK-NEXT: qvlfsx 0, 0, 3 ; CHECK-NEXT: qvfmuls 4, 3, 3 -; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2 -; CHECK-NEXT: qvfmadds 0, 2, 4, 0 +; CHECK-NEXT: qvfmsubs 2, 2, 0, 2 +; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0 ; CHECK-NEXT: qvfmuls 0, 3, 0 ; CHECK-NEXT: qvfmul 1, 1, 0 ; CHECK-NEXT: blr @@ -182,8 +182,8 @@ define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind { ; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l ; CHECK-NEXT: qvlfsx 0, 0, 3 ; CHECK-NEXT: qvfmuls 4, 3, 3 -; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2 -; CHECK-NEXT: qvfmadds 0, 2, 4, 0 +; CHECK-NEXT: qvfmsubs 2, 2, 0, 2 +; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0 ; CHECK-NEXT: qvfmuls 0, 3, 0 ; CHECK-NEXT: qvfmuls 1, 1, 0 ; CHECK-NEXT: blr @@ -408,8 +408,8 @@ define <4 x float> @goo3_fmf_denorm_on(<4 x float> %a) #0 { ; CHECK-NEXT: addis 3, 2, .LCPI16_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI16_0@toc@l ; CHECK-NEXT: qvfmuls 4, 2, 2 -; CHECK-NEXT: qvfnmsubs 3, 1, 0, 1 -; CHECK-NEXT: qvfmadds 0, 3, 4, 0 +; CHECK-NEXT: qvfmsubs 3, 1, 0, 1 +; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0 ; CHECK-NEXT: qvlfsx 3, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI16_2@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI16_2@toc@l @@ -435,8 +435,8 @@ define <4 x float> @goo3_fmf_denorm_off(<4 x float> %a) #1 { ; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l ; CHECK-NEXT: qvfmuls 4, 2, 2 -; CHECK-NEXT: qvfnmsubs 3, 1, 0, 1 -; CHECK-NEXT: qvfmadds 0, 3, 4, 0 +; CHECK-NEXT: qvfmsubs 3, 1, 0, 1 +; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0 ; CHECK-NEXT: qvlfsx 3, 0, 3 ; CHECK-NEXT: qvfmuls 0, 2, 0 ; CHECK-NEXT: qvfmuls 0, 0, 1 diff --git a/llvm/test/CodeGen/PowerPC/sms-remark.ll b/llvm/test/CodeGen/PowerPC/sms-remark.ll new file mode 100644 index 0000000000000..647b56fa7fcd3 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/sms-remark.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr9 --ppc-enable-pipeliner \ +; RUN: -pass-remarks-analysis=pipeliner -pass-remarks=pipeliner -o /dev/null 2>&1 \ +; RUN: | FileCheck %s + +@x = dso_local local_unnamed_addr global <{ i32, i32, i32, i32, [1020 x i32] }> <{ i32 1, i32 2, i32 3, i32 4, [1020 x i32] zeroinitializer }>, align 4 +@y = dso_local global [1024 x i32] zeroinitializer, align 4 + +define dso_local i32* @foo() local_unnamed_addr { +;CHECK: Schedule found with Initiation Interval +;CHECK: Pipelined succesfully! +entry: + %.pre = load i32, i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @y, i64 0, i64 0), align 4 + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @y, i64 0, i64 0) + +for.body: ; preds = %for.body, %entry + %0 = phi i32 [ %.pre, %entry ], [ %add.2, %for.body ] + %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next.2, %for.body ] + %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* bitcast (<{ i32, i32, i32, i32, [1020 x i32] }>* @x to [1024 x i32]*), i64 0, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4 + %mul = mul nsw i32 %1, %1 + %add = add nsw i32 %mul, %0 + %arrayidx6 = getelementptr inbounds [1024 x i32], [1024 x i32]* @y, i64 0, i64 %indvars.iv + store i32 %add, i32* %arrayidx6, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidx2.1 = getelementptr inbounds [1024 x i32], [1024 x i32]* bitcast (<{ i32, i32, i32, i32, [1020 x i32] }>* @x to [1024 x i32]*), i64 0, i64 %indvars.iv.next + %2 = load i32, i32* %arrayidx2.1, align 4 + %mul.1 = mul nsw i32 %2, %2 + %add.1 = add nsw i32 %mul.1, %add + %arrayidx6.1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @y, i64 0, i64 %indvars.iv.next + store i32 %add.1, i32* %arrayidx6.1, align 4 + %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2 + %arrayidx2.2 = getelementptr inbounds [1024 x i32], [1024 x i32]* bitcast (<{ i32, i32, i32, i32, [1020 x i32] }>* @x to [1024 x i32]*), i64 0, i64 %indvars.iv.next.1 + %3 = load i32, i32* %arrayidx2.2, align 4 + %mul.2 = mul nsw i32 %3, %3 + %add.2 = add nsw i32 %mul.2, %add.1 + %arrayidx6.2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @y, i64 0, i64 %indvars.iv.next.1 + store i32 %add.2, i32* %arrayidx6.2, align 4 + %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv, 3 + %exitcond.2 = icmp eq i64 %indvars.iv.next.2, 1024 + br i1 %exitcond.2, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/CodeGen/RISCV/double-isnan.ll b/llvm/test/CodeGen/RISCV/double-isnan.ll new file mode 100644 index 0000000000000..0258ab53d6051 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/double-isnan.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi ilp32d -verify-machineinstrs \ +; RUN: < %s | FileCheck -check-prefix=RV32IFD %s +; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi lp64d -verify-machineinstrs \ +; RUN: < %s | FileCheck -check-prefix=RV64IFD %s + +define zeroext i1 @double_is_nan(double %a) nounwind { +; RV32IFD-LABEL: double_is_nan: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: seqz a0, a0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: double_is_nan: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: seqz a0, a0 +; RV64IFD-NEXT: ret + %1 = fcmp uno double %a, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @double_not_nan(double %a) nounwind { +; RV32IFD-LABEL: double_not_nan: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: feq.d a0, fa0, fa0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: double_not_nan: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: feq.d a0, fa0, fa0 +; RV64IFD-NEXT: ret + %1 = fcmp ord double %a, 0.000000e+00 + ret i1 %1 +} diff --git a/llvm/test/CodeGen/RISCV/float-isnan.ll b/llvm/test/CodeGen/RISCV/float-isnan.ll new file mode 100644 index 0000000000000..211783d7fb6dc --- /dev/null +++ b/llvm/test/CodeGen/RISCV/float-isnan.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f -verify-machineinstrs \ +; RUN: < %s | FileCheck -check-prefix=RV32IF %s +; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs \ +; RUN: < %s | FileCheck -check-prefix=RV64IF %s + +define zeroext i1 @float_is_nan(float %a) nounwind { +; RV32IF-LABEL: float_is_nan: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: seqz a0, a0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: float_is_nan: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: seqz a0, a0 +; RV64IF-NEXT: ret + %1 = fcmp uno float %a, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @float_not_nan(float %a) nounwind { +; RV32IF-LABEL: float_not_nan: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: float_not_nan: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: ret + %1 = fcmp ord float %a, 0.000000e+00 + ret i1 %1 +} diff --git a/llvm/test/CodeGen/RISCV/select-const.ll b/llvm/test/CodeGen/RISCV/select-const.ll new file mode 100644 index 0000000000000..4fd00b53c119e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/select-const.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32IF %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+f,+d -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64IFD %s + +;; This tests how good we are at materialising constants using `select`. The aim +;; is that we do so without a branch if possible (at the moment our lowering of +;; select always introduces a branch). +;; +;; Currently the hook `convertSelectOfConstantsToMath` only is useful when the +;; constants are either 1 away from each other, or one is a power of two and +;; the other is zero. + +define signext i32 @select_const_int_easy(i1 zeroext %a) nounwind { +; RV32I-LABEL: select_const_int_easy: +; RV32I: # %bb.0: +; RV32I-NEXT: ret +; +; RV32IF-LABEL: select_const_int_easy: +; RV32IF: # %bb.0: +; RV32IF-NEXT: ret +; +; RV64I-LABEL: select_const_int_easy: +; RV64I: # %bb.0: +; RV64I-NEXT: ret +; +; RV64IFD-LABEL: select_const_int_easy: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: ret + %1 = select i1 %a, i32 1, i32 0 + ret i32 %1 +} + +define signext i32 @select_const_int_one_away(i1 zeroext %a) nounwind { +; RV32I-LABEL: select_const_int_one_away: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a1, zero, 4 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32IF-LABEL: select_const_int_one_away: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi a1, zero, 4 +; RV32IF-NEXT: sub a0, a1, a0 +; RV32IF-NEXT: ret +; +; RV64I-LABEL: select_const_int_one_away: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a1, zero, 4 +; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64IFD-LABEL: select_const_int_one_away: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi a1, zero, 4 +; RV64IFD-NEXT: sub a0, a1, a0 +; RV64IFD-NEXT: ret + %1 = select i1 %a, i32 3, i32 4 + ret i32 %1 +} + +define signext i32 @select_const_int_pow2_zero(i1 zeroext %a) nounwind { +; RV32I-LABEL: select_const_int_pow2_zero: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: ret +; +; RV32IF-LABEL: select_const_int_pow2_zero: +; RV32IF: # %bb.0: +; RV32IF-NEXT: slli a0, a0, 2 +; RV32IF-NEXT: ret +; +; RV64I-LABEL: select_const_int_pow2_zero: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: ret +; +; RV64IFD-LABEL: select_const_int_pow2_zero: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: slli a0, a0, 2 +; RV64IFD-NEXT: ret + %1 = select i1 %a, i32 4, i32 0 + ret i32 %1 +} + +define signext i32 @select_const_int_harder(i1 zeroext %a) nounwind { +; RV32I-LABEL: select_const_int_harder: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: addi a0, zero, 6 +; RV32I-NEXT: bnez a1, .LBB3_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: addi a0, zero, 38 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: ret +; +; RV32IF-LABEL: select_const_int_harder: +; RV32IF: # %bb.0: +; RV32IF-NEXT: mv a1, a0 +; RV32IF-NEXT: addi a0, zero, 6 +; RV32IF-NEXT: bnez a1, .LBB3_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: addi a0, zero, 38 +; RV32IF-NEXT: .LBB3_2: +; RV32IF-NEXT: ret +; +; RV64I-LABEL: select_const_int_harder: +; RV64I: # %bb.0: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: addi a0, zero, 6 +; RV64I-NEXT: bnez a1, .LBB3_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: addi a0, zero, 38 +; RV64I-NEXT: .LBB3_2: +; RV64I-NEXT: ret +; +; RV64IFD-LABEL: select_const_int_harder: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: mv a1, a0 +; RV64IFD-NEXT: addi a0, zero, 6 +; RV64IFD-NEXT: bnez a1, .LBB3_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: addi a0, zero, 38 +; RV64IFD-NEXT: .LBB3_2: +; RV64IFD-NEXT: ret + %1 = select i1 %a, i32 6, i32 38 + ret i32 %1 +} + +define float @select_const_fp(i1 zeroext %a) nounwind { +; RV32I-LABEL: select_const_fp: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lui a0, 263168 +; RV32I-NEXT: bnez a1, .LBB4_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: lui a0, 264192 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: ret +; +; RV32IF-LABEL: select_const_fp: +; RV32IF: # %bb.0: +; RV32IF-NEXT: bnez a0, .LBB4_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: lui a0, %hi(.LCPI4_0) +; RV32IF-NEXT: addi a0, a0, %lo(.LCPI4_0) +; RV32IF-NEXT: j .LBB4_3 +; RV32IF-NEXT: .LBB4_2: +; RV32IF-NEXT: lui a0, %hi(.LCPI4_1) +; RV32IF-NEXT: addi a0, a0, %lo(.LCPI4_1) +; RV32IF-NEXT: .LBB4_3: +; RV32IF-NEXT: flw ft0, 0(a0) +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64I-LABEL: select_const_fp: +; RV64I: # %bb.0: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lui a0, 263168 +; RV64I-NEXT: bnez a1, .LBB4_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: lui a0, 264192 +; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: ret +; +; RV64IFD-LABEL: select_const_fp: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: bnez a0, .LBB4_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: lui a0, %hi(.LCPI4_0) +; RV64IFD-NEXT: addi a0, a0, %lo(.LCPI4_0) +; RV64IFD-NEXT: j .LBB4_3 +; RV64IFD-NEXT: .LBB4_2: +; RV64IFD-NEXT: lui a0, %hi(.LCPI4_1) +; RV64IFD-NEXT: addi a0, a0, %lo(.LCPI4_1) +; RV64IFD-NEXT: .LBB4_3: +; RV64IFD-NEXT: flw ft0, 0(a0) +; RV64IFD-NEXT: fmv.x.w a0, ft0 +; RV64IFD-NEXT: ret + %1 = select i1 %a, float 3.0, float 4.0 + ret float %1 +} diff --git a/llvm/test/CodeGen/SystemZ/load-and-test.mir b/llvm/test/CodeGen/SystemZ/load-and-test.mir index c83291376e122..17427283dc80d 100644 --- a/llvm/test/CodeGen/SystemZ/load-and-test.mir +++ b/llvm/test/CodeGen/SystemZ/load-and-test.mir @@ -15,12 +15,12 @@ body: | bb.0 (): liveins: $r1d renamable $r0l = L $r1d, 0, $noreg - CLFIMux killed renamable $r0l, 0, implicit-def $cc + CLFIMux renamable $r0l, 0, implicit-def $cc BRC 14, 10, %bb.2, implicit $cc bb.1 (): liveins: $r0l - ST killed renamable $r0l, $r15d, 164, $noreg + ST renamable $r0l, $r15d, 164, $noreg bb.2 (): liveins: $r0l @@ -38,12 +38,12 @@ body: | bb.0 (): liveins: $r1d renamable $r0l = L $r1d, 0, $noreg - CLFIMux killed renamable $r0l, 0, implicit-def $cc + CLFIMux renamable $r0l, 0, implicit-def $cc BRC 14, 8, %bb.2, implicit $cc bb.1 (): liveins: $r0l - ST killed renamable $r0l, $r15d, 164, $noreg + ST renamable $r0l, $r15d, 164, $noreg bb.2 (): liveins: $r0l diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index 2dc9ece2e348a..6b9675dfc091a 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -296,9 +296,8 @@ for.cond.cleanup: ; preds = %middle.block, %entr define dso_local i32 @or_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c, i32* noalias nocapture readonly %d, i32 %N) { ; CHECK-LABEL: or_mul_reduce_add: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: ldr.w r12, [sp, #20] +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: ldr.w r12, [sp, #16] ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq .LBB3_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph @@ -315,21 +314,16 @@ define dso_local i32 @or_mul_reduce_add(i32* noalias nocapture readonly %a, i32* ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 +; CHECK-NEXT: vpnot ; CHECK-NEXT: vsub.i32 q1, q2, q1 -; CHECK-NEXT: vcmp.i32 eq, q1, zr -; CHECK-NEXT: vmrs r5, p0 -; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload -; CHECK-NEXT: vmrs r6, p0 -; CHECK-NEXT: orrs r5, r6 -; CHECK-NEXT: vmsr p0, r5 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q1, [r3], #16 -; CHECK-NEXT: vldrwt.u32 q2, [r2], #16 +; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vpstee +; CHECK-NEXT: vcmpt.i32 ne, q1, zr +; CHECK-NEXT: vldrwe.u32 q1, [r3], #16 +; CHECK-NEXT: vldrwe.u32 q2, [r2], #16 ; CHECK-NEXT: vmul.i32 q1, q2, q1 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB3_2 @@ -337,12 +331,10 @@ define dso_local i32 @or_mul_reduce_add(i32* noalias nocapture readonly %a, i32* ; CHECK-NEXT: vctp.32 r4 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .LBB3_4: ; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %vector.ph diff --git a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll index 6d530b91754a2..de200eb097792 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll @@ -4,49 +4,24 @@ define arm_aapcs_vfpcc void @fmas1(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: fmas1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB0_3 -; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: adr r4, .LCPI0_0 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 -; CHECK-NEXT: sub.w r12, r3, #1 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vdup.32 q2, r12 -; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB0_2: @ %vector.body +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: vdup.32 q0, r12 +; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vdup.32 q3, r3 -; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: vorr q3, q3, q1 -; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vptt.u32 cs, q2, q3 -; CHECK-NEXT: vldrwt.u32 q3, [r1], #16 -; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 -; CHECK-NEXT: vfma.f32 q5, q4, q3 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q5, [r2], #16 -; CHECK-NEXT: le lr, .LBB0_2 -; CHECK-NEXT: .LBB0_3: @ %for.cond.cleanup -; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.4: -; CHECK-NEXT: .LCPI0_0: -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 1 @ 0x1 -; CHECK-NEXT: .long 2 @ 0x2 -; CHECK-NEXT: .long 3 @ 0x3 +; CHECK-NEXT: vmov q3, q0 +; CHECK-NEXT: vldrw.u32 q1, [r1], #16 +; CHECK-NEXT: vldrw.u32 q2, [r0], #16 +; CHECK-NEXT: vfma.f32 q3, q2, q1 +; CHECK-NEXT: vstrw.32 q3, [r2], #16 +; CHECK-NEXT: letp lr, .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: pop {r7, pc} entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %vector.ph, label %for.cond.cleanup @@ -146,48 +121,23 @@ for.cond.cleanup: ; preds = %vector.body, %entry define arm_aapcs_vfpcc void @fma1(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: fma1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB2_3 -; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: adr r4, .LCPI2_0 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 -; CHECK-NEXT: sub.w r12, r3, #1 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vdup.32 q2, r12 -; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB2_2: @ %vector.body +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: vdup.32 q0, r12 +; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vdup.32 q3, r3 -; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: vorr q3, q3, q1 -; CHECK-NEXT: vptt.u32 cs, q2, q3 -; CHECK-NEXT: vldrwt.u32 q3, [r0], #16 -; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 -; CHECK-NEXT: vfma.f32 q4, q3, q0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q4, [r2], #16 -; CHECK-NEXT: le lr, .LBB2_2 -; CHECK-NEXT: .LBB2_3: @ %for.cond.cleanup -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.4: -; CHECK-NEXT: .LCPI2_0: -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 1 @ 0x1 -; CHECK-NEXT: .long 2 @ 0x2 -; CHECK-NEXT: .long 3 @ 0x3 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q2, [r1], #16 +; CHECK-NEXT: vfma.f32 q2, q1, q0 +; CHECK-NEXT: vstrw.32 q2, [r2], #16 +; CHECK-NEXT: letp lr, .LBB2_1 +; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: pop {r7, pc} entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %vector.ph, label %for.cond.cleanup @@ -287,50 +237,25 @@ for.cond.cleanup: ; preds = %vector.body, %entry define arm_aapcs_vfpcc void @fmss1(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: fmss1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB4_3 -; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: adr r4, .LCPI4_0 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: subs r5, r3, #1 -; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: vdup.32 q2, r5 -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: eor r3, r12, #-2147483648 -; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: .LBB4_2: @ %vector.body +; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: eor r12, r12, #-2147483648 +; CHECK-NEXT: vdup.32 q0, r12 +; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vdup.32 q3, r3 -; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: vorr q3, q3, q1 -; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vptt.u32 cs, q2, q3 -; CHECK-NEXT: vldrwt.u32 q3, [r1], #16 -; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 -; CHECK-NEXT: vfma.f32 q5, q4, q3 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q5, [r2], #16 -; CHECK-NEXT: le lr, .LBB4_2 -; CHECK-NEXT: .LBB4_3: @ %for.cond.cleanup -; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: pop {r4, r5, r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.4: -; CHECK-NEXT: .LCPI4_0: -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 1 @ 0x1 -; CHECK-NEXT: .long 2 @ 0x2 -; CHECK-NEXT: .long 3 @ 0x3 +; CHECK-NEXT: vmov q3, q0 +; CHECK-NEXT: vldrw.u32 q1, [r1], #16 +; CHECK-NEXT: vldrw.u32 q2, [r0], #16 +; CHECK-NEXT: vfma.f32 q3, q2, q1 +; CHECK-NEXT: vstrw.32 q3, [r2], #16 +; CHECK-NEXT: letp lr, .LBB4_1 +; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: pop {r7, pc} entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %vector.ph, label %for.cond.cleanup @@ -434,49 +359,24 @@ for.cond.cleanup: ; preds = %vector.body, %entry define arm_aapcs_vfpcc void @fmss3(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: fmss3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB6_3 -; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: adr r4, .LCPI6_0 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 -; CHECK-NEXT: sub.w r12, r3, #1 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vdup.32 q2, r12 -; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB6_2: @ %vector.body +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: vdup.32 q0, r12 +; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vdup.32 q3, r3 -; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: vorr q3, q3, q1 -; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vptt.u32 cs, q2, q3 -; CHECK-NEXT: vldrwt.u32 q3, [r1], #16 -; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 -; CHECK-NEXT: vfms.f32 q5, q4, q3 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q5, [r2], #16 -; CHECK-NEXT: le lr, .LBB6_2 -; CHECK-NEXT: .LBB6_3: @ %for.cond.cleanup -; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.4: -; CHECK-NEXT: .LCPI6_0: -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 1 @ 0x1 -; CHECK-NEXT: .long 2 @ 0x2 -; CHECK-NEXT: .long 3 @ 0x3 +; CHECK-NEXT: vmov q3, q0 +; CHECK-NEXT: vldrw.u32 q1, [r1], #16 +; CHECK-NEXT: vldrw.u32 q2, [r0], #16 +; CHECK-NEXT: vfms.f32 q3, q2, q1 +; CHECK-NEXT: vstrw.32 q3, [r2], #16 +; CHECK-NEXT: letp lr, .LBB6_1 +; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: pop {r7, pc} entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %vector.ph, label %for.cond.cleanup @@ -579,49 +479,24 @@ for.cond.cleanup: ; preds = %vector.body, %entry define arm_aapcs_vfpcc void @fms1(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: fms1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB8_3 -; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: adr r4, .LCPI8_0 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: subs r5, r3, #1 -; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: vdup.32 q2, r5 -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: eor r3, r12, #-2147483648 -; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: .LBB8_2: @ %vector.body +; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: eor r12, r12, #-2147483648 +; CHECK-NEXT: vdup.32 q0, r12 +; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vdup.32 q3, r3 -; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: vorr q3, q3, q1 -; CHECK-NEXT: vptt.u32 cs, q2, q3 -; CHECK-NEXT: vldrwt.u32 q3, [r0], #16 -; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 -; CHECK-NEXT: vfma.f32 q4, q3, q0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q4, [r2], #16 -; CHECK-NEXT: le lr, .LBB8_2 -; CHECK-NEXT: .LBB8_3: @ %for.cond.cleanup -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.4: -; CHECK-NEXT: .LCPI8_0: -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 1 @ 0x1 -; CHECK-NEXT: .long 2 @ 0x2 -; CHECK-NEXT: .long 3 @ 0x3 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q2, [r1], #16 +; CHECK-NEXT: vfma.f32 q2, q1, q0 +; CHECK-NEXT: vstrw.32 q2, [r2], #16 +; CHECK-NEXT: letp lr, .LBB8_1 +; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: pop {r7, pc} entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %vector.ph, label %for.cond.cleanup @@ -723,49 +598,24 @@ for.cond.cleanup: ; preds = %vector.body, %entry define arm_aapcs_vfpcc void @fms3(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: fms3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: blt .LBB10_3 -; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: adr r4, .LCPI10_0 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 -; CHECK-NEXT: sub.w r12, r3, #1 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vdup.32 q2, r12 -; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: .LBB10_2: @ %vector.body +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: vdup.32 q0, r12 +; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: .LBB10_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vdup.32 q3, r3 -; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: vorr q3, q3, q1 -; CHECK-NEXT: vptt.u32 cs, q2, q3 -; CHECK-NEXT: vldrwt.u32 q3, [r1], #16 -; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 -; CHECK-NEXT: vneg.f32 q3, q3 -; CHECK-NEXT: vfma.f32 q3, q4, q0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q3, [r2], #16 -; CHECK-NEXT: le lr, .LBB10_2 -; CHECK-NEXT: .LBB10_3: @ %for.cond.cleanup -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.4: -; CHECK-NEXT: .LCPI10_0: -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 1 @ 0x1 -; CHECK-NEXT: .long 2 @ 0x2 -; CHECK-NEXT: .long 3 @ 0x3 +; CHECK-NEXT: vldrw.u32 q1, [r1], #16 +; CHECK-NEXT: vldrw.u32 q2, [r0], #16 +; CHECK-NEXT: vneg.f32 q1, q1 +; CHECK-NEXT: vfma.f32 q1, q2, q0 +; CHECK-NEXT: vstrw.32 q1, [r2], #16 +; CHECK-NEXT: letp lr, .LBB10_1 +; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup +; CHECK-NEXT: pop {r7, pc} entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %vector.ph, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminvq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminvq.ll index fd1daef4b9ec8..578f9f003f556 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminvq.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminvq.ll @@ -220,14 +220,11 @@ entry: define arm_aapcs_vfpcc float @test_vminnmvq_f16(float %a.coerce, <8 x half> %b) { ; CHECK-LABEL: test_vminnmvq_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vminnmv.f16 r0, q1 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vstr.16 s0, [sp, #2] -; CHECK-NEXT: ldrh.w r0, [sp, #2] +; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: bx lr entry: %0 = bitcast float %a.coerce to i32 @@ -255,14 +252,11 @@ entry: define arm_aapcs_vfpcc float @test_vminnmavq_f16(float %a.coerce, <8 x half> %b) { ; CHECK-LABEL: test_vminnmavq_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vminnmav.f16 r0, q1 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vstr.16 s0, [sp, #2] -; CHECK-NEXT: ldrh.w r0, [sp, #2] +; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: bx lr entry: %0 = bitcast float %a.coerce to i32 @@ -290,14 +284,11 @@ entry: define arm_aapcs_vfpcc float @test_vmaxnmvq_f16(float %a.coerce, <8 x half> %b) { ; CHECK-LABEL: test_vmaxnmvq_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vmaxnmv.f16 r0, q1 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vstr.16 s0, [sp, #2] -; CHECK-NEXT: ldrh.w r0, [sp, #2] +; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: bx lr entry: %0 = bitcast float %a.coerce to i32 @@ -325,14 +316,11 @@ entry: define arm_aapcs_vfpcc float @test_vmaxnmavq_f16(float %a.coerce, <8 x half> %b) { ; CHECK-LABEL: test_vmaxnmavq_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vmaxnmav.f16 r0, q1 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: vstr.16 s0, [sp, #2] -; CHECK-NEXT: ldrh.w r0, [sp, #2] +; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: bx lr entry: %0 = bitcast float %a.coerce to i32 @@ -648,16 +636,13 @@ entry: define arm_aapcs_vfpcc float @test_vminnmvq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) { ; CHECK-LABEL: test_vminnmvq_p_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vminnmvt.f16 r1, q1 ; CHECK-NEXT: vmov s0, r1 -; CHECK-NEXT: vstr.16 s0, [sp, #2] -; CHECK-NEXT: ldrh.w r0, [sp, #2] +; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: bx lr entry: %0 = bitcast float %a.coerce to i32 @@ -691,16 +676,13 @@ entry: define arm_aapcs_vfpcc float @test_vminnmavq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) { ; CHECK-LABEL: test_vminnmavq_p_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vminnmavt.f16 r1, q1 ; CHECK-NEXT: vmov s0, r1 -; CHECK-NEXT: vstr.16 s0, [sp, #2] -; CHECK-NEXT: ldrh.w r0, [sp, #2] +; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: bx lr entry: %0 = bitcast float %a.coerce to i32 @@ -734,16 +716,13 @@ entry: define arm_aapcs_vfpcc float @test_vmaxnmvq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) { ; CHECK-LABEL: test_vmaxnmvq_p_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vmaxnmvt.f16 r1, q1 ; CHECK-NEXT: vmov s0, r1 -; CHECK-NEXT: vstr.16 s0, [sp, #2] -; CHECK-NEXT: ldrh.w r0, [sp, #2] +; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: bx lr entry: %0 = bitcast float %a.coerce to i32 @@ -777,16 +756,13 @@ entry: define arm_aapcs_vfpcc float @test_vmaxnmavq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) { ; CHECK-LABEL: test_vmaxnmavq_p_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vmaxnmavt.f16 r1, q1 ; CHECK-NEXT: vmov s0, r1 -; CHECK-NEXT: vstr.16 s0, [sp, #2] -; CHECK-NEXT: ldrh.w r0, [sp, #2] +; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: vmov s0, r0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: bx lr entry: %0 = bitcast float %a.coerce to i32 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index a9c9bb9cba681..8f01326c002fd 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -1049,6 +1049,336 @@ if.end: ; preds = %for.cond.cleanup23, ret i8* %out } +%struct.arm_cfft_instance_f32 = type { i16, float*, i16*, i16, i32*, i32*, i32*, float*, float*, float* } +define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cfft_instance_f32Pfjf(%struct.arm_cfft_instance_f32* nocapture readonly %0, float* %1, i32 %2, float %3) { +; CHECK-LABEL: _Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cfft_instance_f32Pfjf: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .pad #56 +; CHECK-NEXT: sub sp, #56 +; CHECK-NEXT: cmp r2, #8 +; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: vstr s0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: blo.w .LBB7_9 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: lsrs r1, r2, #2 +; CHECK-NEXT: b .LBB7_3 +; CHECK-NEXT: .LBB7_2: @ in Loop: Header=BB7_3 Depth=1 +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: add.w r10, r10, #1 +; CHECK-NEXT: lsls r3, r3, #2 +; CHECK-NEXT: cmp r2, #7 +; CHECK-NEXT: asr.w r1, r2, #2 +; CHECK-NEXT: ble .LBB7_9 +; CHECK-NEXT: .LBB7_3: @ =>This Loop Header: Depth=1 +; CHECK-NEXT: @ Child Loop BB7_6 Depth 2 +; CHECK-NEXT: @ Child Loop BB7_7 Depth 3 +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: lsr.w r2, r1, #2 +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: blt .LBB7_2 +; CHECK-NEXT: @ %bb.4: @ in Loop: Header=BB7_3 Depth=1 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: cmp.w r2, r1, lsr #3 +; CHECK-NEXT: beq .LBB7_2 +; CHECK-NEXT: @ %bb.5: @ %.preheader +; CHECK-NEXT: @ in Loop: Header=BB7_3 Depth=1 +; CHECK-NEXT: lsrs r2, r1, #3 +; CHECK-NEXT: lsls r1, r1, #1 +; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: lsl.w r11, r2, #1 +; CHECK-NEXT: .LBB7_6: @ Parent Loop BB7_3 Depth=1 +; CHECK-NEXT: @ => This Loop Header: Depth=2 +; CHECK-NEXT: @ Child Loop BB7_7 Depth 3 +; CHECK-NEXT: add.w r12, r0, #16 +; CHECK-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldr.w lr, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldm.w r12, {r1, r2, r3, r12} +; CHECK-NEXT: muls r4, r5, r4 +; CHECK-NEXT: ldr.w r2, [r2, r10, lsl #2] +; CHECK-NEXT: ldr.w r1, [r1, r10, lsl #2] +; CHECK-NEXT: ldrd r6, r7, [r0, #32] +; CHECK-NEXT: ldr.w r3, [r3, r10, lsl #2] +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: add.w r6, r6, r2, lsl #2 +; CHECK-NEXT: add.w r12, r12, r1, lsl #2 +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: add.w r2, r1, r4, lsl #2 +; CHECK-NEXT: add.w r3, r7, r3, lsl #2 +; CHECK-NEXT: add.w r1, r2, r11, lsl #2 +; CHECK-NEXT: add.w r8, r1, r11, lsl #2 +; CHECK-NEXT: add.w r9, r8, r11, lsl #2 +; CHECK-NEXT: .LBB7_7: @ Parent Loop BB7_3 Depth=1 +; CHECK-NEXT: @ Parent Loop BB7_6 Depth=2 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 +; CHECK-NEXT: vldrw.u32 q3, [r9] +; CHECK-NEXT: vldrw.u32 q4, [r1] +; CHECK-NEXT: vldrw.u32 q6, [r8] +; CHECK-NEXT: vldrw.u32 q7, [r2] +; CHECK-NEXT: vsub.f32 q5, q4, q3 +; CHECK-NEXT: vsub.f32 q0, q7, q6 +; CHECK-NEXT: vcadd.f32 q1, q0, q5, #270 +; CHECK-NEXT: vcadd.f32 q2, q0, q5, #90 +; CHECK-NEXT: vadd.f32 q0, q4, q3 +; CHECK-NEXT: vadd.f32 q3, q6, q7 +; CHECK-NEXT: vsub.f32 q4, q3, q0 +; CHECK-NEXT: vadd.f32 q0, q3, q0 +; CHECK-NEXT: vstrb.8 q0, [r2], #16 +; CHECK-NEXT: vldrw.u32 q0, [r6], #16 +; CHECK-NEXT: vcmul.f32 q3, q0, q4, #0 +; CHECK-NEXT: vcmla.f32 q3, q0, q4, #90 +; CHECK-NEXT: vstrb.8 q3, [r1], #16 +; CHECK-NEXT: vldrw.u32 q0, [r12], #16 +; CHECK-NEXT: vcmul.f32 q3, q0, q2, #0 +; CHECK-NEXT: vcmla.f32 q3, q0, q2, #90 +; CHECK-NEXT: vstrb.8 q3, [r8], #16 +; CHECK-NEXT: vldrw.u32 q0, [r3], #16 +; CHECK-NEXT: vcmul.f32 q2, q0, q1, #0 +; CHECK-NEXT: vcmla.f32 q2, q0, q1, #90 +; CHECK-NEXT: vstrb.8 q2, [r9], #16 +; CHECK-NEXT: le lr, .LBB7_7 +; CHECK-NEXT: @ %bb.8: @ in Loop: Header=BB7_6 Depth=2 +; CHECK-NEXT: ldr r3, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: adds r5, #1 +; CHECK-NEXT: cmp r5, r3 +; CHECK-NEXT: bne .LBB7_6 +; CHECK-NEXT: b .LBB7_2 +; CHECK-NEXT: .LBB7_9: +; CHECK-NEXT: adr r0, .LCPI7_0 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: vadd.i32 q1, q1, r0 +; CHECK-NEXT: vldrw.u32 q2, [q1, #64]! +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: lsr.w lr, r0, #3 +; CHECK-NEXT: wls lr, lr, .LBB7_12 +; CHECK-NEXT: @ %bb.10: +; CHECK-NEXT: vldrw.u32 q3, [q1, #16] +; CHECK-NEXT: vldr s0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: .LBB7_11: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q0, [q1, #24] +; CHECK-NEXT: vldrw.u32 q4, [q1, #8] +; CHECK-NEXT: vadd.f32 q6, q2, q3 +; CHECK-NEXT: vsub.f32 q2, q2, q3 +; CHECK-NEXT: vadd.f32 q5, q4, q0 +; CHECK-NEXT: vsub.f32 q0, q4, q0 +; CHECK-NEXT: vsub.f32 q7, q6, q5 +; CHECK-NEXT: vcadd.f32 q4, q2, q0, #270 +; CHECK-NEXT: vstrw.32 q7, [sp, #32] @ 16-byte Spill +; CHECK-NEXT: vcadd.f32 q7, q2, q0, #90 +; CHECK-NEXT: vadd.f32 q0, q6, q5 +; CHECK-NEXT: vldrw.u32 q2, [q1, #64]! +; CHECK-NEXT: vmul.f32 q0, q0, r0 +; CHECK-NEXT: vldrw.u32 q3, [q1, #16] +; CHECK-NEXT: vstrw.32 q0, [q1, #-64] +; CHECK-NEXT: vldrw.u32 q5, [sp, #32] @ 16-byte Reload +; CHECK-NEXT: vmul.f32 q0, q4, r0 +; CHECK-NEXT: vmul.f32 q4, q7, r0 +; CHECK-NEXT: vmul.f32 q5, q5, r0 +; CHECK-NEXT: vstrw.32 q5, [q1, #-56] +; CHECK-NEXT: vstrw.32 q4, [q1, #-48] +; CHECK-NEXT: vstrw.32 q0, [q1, #-40] +; CHECK-NEXT: le lr, .LBB7_11 +; CHECK-NEXT: .LBB7_12: +; CHECK-NEXT: add sp, #56 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.13: +; CHECK-NEXT: .LCPI7_0: +; CHECK-NEXT: .long 4294967232 @ 0xffffffc0 +; CHECK-NEXT: .long 4294967236 @ 0xffffffc4 +; CHECK-NEXT: .long 4294967264 @ 0xffffffe0 +; CHECK-NEXT: .long 4294967268 @ 0xffffffe4 + %5 = icmp ugt i32 %2, 7 + br i1 %5, label %6, label %26 + +6: ; preds = %4 + %7 = lshr i32 %2, 2 + %8 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 7 + %9 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 4 + %10 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 8 + %11 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 5 + %12 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 9 + %13 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 6 + br label %14 + +14: ; preds = %6, %40 + %15 = phi i32 [ %2, %6 ], [ %19, %40 ] + %16 = phi i32 [ %7, %6 ], [ %43, %40 ] + %17 = phi i32 [ 1, %6 ], [ %41, %40 ] + %18 = phi i32 [ 0, %6 ], [ %42, %40 ] + %19 = lshr i32 %15, 2 + %20 = icmp sgt i32 %17, 0 + br i1 %20, label %21, label %40 + +21: ; preds = %14 + %22 = shl i32 %15, 1 + %23 = shl nuw nsw i32 %19, 1 + %24 = lshr i32 %15, 3 + %25 = icmp eq i32 %24, 0 + br i1 %25, label %40, label %45 + +26: ; preds = %40, %4 + %27 = ptrtoint float* %1 to i32 + %28 = insertelement <4 x i32> undef, i32 %27, i32 0 + %29 = shufflevector <4 x i32> %28, <4 x i32> undef, <4 x i32> zeroinitializer + %30 = add <4 x i32> %29, + %31 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %30, i32 64) + %32 = extractvalue { <4 x float>, <4 x i32> } %31, 1 + %33 = lshr i32 %2, 3 + %34 = icmp eq i32 %33, 0 + br i1 %34, label %141, label %35 + +35: ; preds = %26 + %36 = tail call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %32, i32 16) + %37 = extractvalue { <4 x float>, <4 x i32> } %31, 0 + %38 = insertelement <4 x float> undef, float %3, i32 0 + %39 = shufflevector <4 x float> %38, <4 x float> undef, <4 x i32> zeroinitializer + br label %116 + +40: ; preds = %113, %21, %14 + %41 = shl i32 %17, 2 + %42 = add nuw nsw i32 %18, 1 + %43 = ashr i32 %16, 2 + %44 = icmp sgt i32 %16, 7 + br i1 %44, label %14, label %26 + +45: ; preds = %21, %113 + %46 = phi i32 [ %114, %113 ], [ 0, %21 ] + %47 = load float*, float** %8, align 4 + %48 = load i32*, i32** %9, align 4 + %49 = getelementptr inbounds i32, i32* %48, i32 %18 + %50 = load i32, i32* %49, align 4 + %51 = getelementptr inbounds float, float* %47, i32 %50 + %52 = load float*, float** %10, align 4 + %53 = load i32*, i32** %11, align 4 + %54 = getelementptr inbounds i32, i32* %53, i32 %18 + %55 = load i32, i32* %54, align 4 + %56 = getelementptr inbounds float, float* %52, i32 %55 + %57 = load float*, float** %12, align 4 + %58 = load i32*, i32** %13, align 4 + %59 = getelementptr inbounds i32, i32* %58, i32 %18 + %60 = load i32, i32* %59, align 4 + %61 = getelementptr inbounds float, float* %57, i32 %60 + %62 = mul i32 %22, %46 + %63 = getelementptr inbounds float, float* %1, i32 %62 + %64 = getelementptr inbounds float, float* %63, i32 %23 + %65 = getelementptr inbounds float, float* %64, i32 %23 + %66 = getelementptr inbounds float, float* %65, i32 %23 + br label %67 + +67: ; preds = %45, %67 + %68 = phi float* [ %63, %45 ], [ %89, %67 ] + %69 = phi float* [ %65, %45 ], [ %103, %67 ] + %70 = phi float* [ %66, %45 ], [ %110, %67 ] + %71 = phi float* [ %64, %45 ], [ %96, %67 ] + %72 = phi float* [ %61, %45 ], [ %107, %67 ] + %73 = phi float* [ %56, %45 ], [ %93, %67 ] + %74 = phi float* [ %51, %45 ], [ %100, %67 ] + %75 = phi i32 [ %24, %45 ], [ %111, %67 ] + %76 = bitcast float* %69 to <4 x float>* + %77 = bitcast float* %68 to <4 x float>* + %78 = load <4 x float>, <4 x float>* %76, align 4 + %79 = load <4 x float>, <4 x float>* %77, align 4 + %80 = bitcast float* %71 to <4 x float>* + %81 = load <4 x float>, <4 x float>* %80, align 4 + %82 = bitcast float* %70 to <4 x float>* + %83 = load <4 x float>, <4 x float>* %82, align 4 + %84 = fadd <4 x float> %78, %79 + %85 = fsub <4 x float> %79, %78 + %86 = fadd <4 x float> %81, %83 + %87 = fsub <4 x float> %81, %83 + %88 = fadd <4 x float> %84, %86 + store <4 x float> %88, <4 x float>* %77, align 4 + %89 = getelementptr inbounds float, float* %68, i32 4 + %90 = fsub <4 x float> %84, %86 + %91 = bitcast float* %73 to <4 x float>* + %92 = load <4 x float>, <4 x float>* %91, align 4 + %93 = getelementptr inbounds float, float* %73, i32 4 + %94 = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %92, <4 x float> %90) + %95 = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %94, <4 x float> %92, <4 x float> %90) + store <4 x float> %95, <4 x float>* %80, align 4 + %96 = getelementptr inbounds float, float* %71, i32 4 + %97 = tail call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 0, <4 x float> %85, <4 x float> %87) + %98 = bitcast float* %74 to <4 x float>* + %99 = load <4 x float>, <4 x float>* %98, align 4 + %100 = getelementptr inbounds float, float* %74, i32 4 + %101 = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %99, <4 x float> %97) + %102 = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %101, <4 x float> %99, <4 x float> %97) + store <4 x float> %102, <4 x float>* %76, align 4 + %103 = getelementptr inbounds float, float* %69, i32 4 + %104 = tail call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 1, <4 x float> %85, <4 x float> %87) + %105 = bitcast float* %72 to <4 x float>* + %106 = load <4 x float>, <4 x float>* %105, align 4 + %107 = getelementptr inbounds float, float* %72, i32 4 + %108 = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %106, <4 x float> %104) + %109 = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %108, <4 x float> %106, <4 x float> %104) + store <4 x float> %109, <4 x float>* %82, align 4 + %110 = getelementptr inbounds float, float* %70, i32 4 + %111 = add nsw i32 %75, -1 + %112 = icmp eq i32 %111, 0 + br i1 %112, label %113, label %67 + +113: ; preds = %67 + %114 = add nuw nsw i32 %46, 1 + %115 = icmp eq i32 %114, %17 + br i1 %115, label %40, label %45 + +116: ; preds = %35, %116 + %117 = phi <4 x i32> [ %32, %35 ], [ %128, %116 ] + %118 = phi i32 [ %33, %35 ], [ %139, %116 ] + %119 = phi <4 x float> [ %36, %35 ], [ %130, %116 ] + %120 = phi <4 x float> [ %37, %35 ], [ %129, %116 ] + %121 = fadd <4 x float> %120, %119 + %122 = fsub <4 x float> %120, %119 + %123 = tail call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %117, i32 8) + %124 = tail call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %117, i32 24) + %125 = fadd <4 x float> %123, %124 + %126 = fsub <4 x float> %123, %124 + %127 = tail call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %117, i32 64) + %128 = extractvalue { <4 x float>, <4 x i32> } %127, 1 + %129 = extractvalue { <4 x float>, <4 x i32> } %127, 0 + %130 = tail call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %128, i32 16) + %131 = fadd <4 x float> %121, %125 + %132 = fmul <4 x float> %39, %131 + tail call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %128, i32 -64, <4 x float> %132) + %133 = fsub <4 x float> %121, %125 + %134 = fmul <4 x float> %39, %133 + tail call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %128, i32 -56, <4 x float> %134) + %135 = tail call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 0, <4 x float> %122, <4 x float> %126) + %136 = fmul <4 x float> %39, %135 + tail call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %128, i32 -48, <4 x float> %136) + %137 = tail call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 1, <4 x float> %122, <4 x float> %126) + %138 = fmul <4 x float> %39, %137 + tail call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %128, i32 -40, <4 x float> %138) + %139 = add nsw i32 %118, -1 + %140 = icmp eq i32 %139, 0 + br i1 %140, label %141, label %116 + +141: ; preds = %116, %26 + ret void +} + declare <16 x i1> @llvm.arm.mve.vctp8(i32) declare <8 x i1> @llvm.arm.mve.vctp16(i32) declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>) @@ -1061,3 +1391,10 @@ declare i32 @llvm.arm.mve.vmldava.v8i16(i32, i32, i32, i32, <8 x i16>, <8 x i16> declare i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32, i32, i32, i32, <16 x i8>, <16 x i8>, <16 x i1>) declare i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32, i32, i32, i32, <8 x i16>, <8 x i16>, <8 x i1>) declare <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) + +declare <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32, <4 x float>, <4 x float>) +declare <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32, <4 x float>, <4 x float>, <4 x float>) +declare <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32, i32, <4 x float>, <4 x float>) +declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32) +declare <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32>, i32) +declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32>, i32, <4 x float>) diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll index 955f48184c3c1..635f31a0ce3d6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll @@ -179,13 +179,15 @@ define arm_aapcs_vfpcc <2 x i64> @bitcast_to_v2i1(i2 %b, <2 x i64> %a) { ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: and r0, r0, #3 -; CHECK-LE-NEXT: sbfx r1, r0, #0, #1 -; CHECK-LE-NEXT: sbfx r0, r0, #1, #1 -; CHECK-LE-NEXT: vmov.32 q1[0], r1 -; CHECK-LE-NEXT: vmov.32 q1[1], r1 -; CHECK-LE-NEXT: vmov.32 q1[2], r0 -; CHECK-LE-NEXT: vmov.32 q1[3], r0 +; CHECK-LE-NEXT: and r1, r0, #2 +; CHECK-LE-NEXT: and r0, r0, #1 +; CHECK-LE-NEXT: rsbs r0, r0, #0 +; CHECK-LE-NEXT: movs r2, #0 +; CHECK-LE-NEXT: vmov.32 q1[0], r0 +; CHECK-LE-NEXT: sub.w r1, r2, r1, lsr #1 +; CHECK-LE-NEXT: vmov.32 q1[1], r0 +; CHECK-LE-NEXT: vmov.32 q1[2], r1 +; CHECK-LE-NEXT: vmov.32 q1[3], r1 ; CHECK-LE-NEXT: vand q0, q0, q1 ; CHECK-LE-NEXT: add sp, #4 ; CHECK-LE-NEXT: bx lr @@ -194,9 +196,11 @@ define arm_aapcs_vfpcc <2 x i64> @bitcast_to_v2i1(i2 %b, <2 x i64> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: .pad #4 ; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: and r0, r0, #3 -; CHECK-BE-NEXT: sbfx r1, r0, #0, #1 -; CHECK-BE-NEXT: sbfx r0, r0, #1, #1 +; CHECK-BE-NEXT: and r1, r0, #2 +; CHECK-BE-NEXT: movs r2, #0 +; CHECK-BE-NEXT: and r0, r0, #1 +; CHECK-BE-NEXT: sub.w r1, r2, r1, lsr #1 +; CHECK-BE-NEXT: rsbs r0, r0, #0 ; CHECK-BE-NEXT: vmov.32 q1[0], r1 ; CHECK-BE-NEXT: vmov.32 q1[1], r1 ; CHECK-BE-NEXT: vmov.32 q1[2], r0 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-const.ll b/llvm/test/CodeGen/Thumb2/mve-pred-const.ll new file mode 100644 index 0000000000000..c3f0e6e6ee1d0 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-pred-const.ll @@ -0,0 +1,366 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc i32 @build_v2i_v4i1_1() { +; CHECK-LABEL: build_v2i_v4i1_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> ) + ret i32 %r +} +define arm_aapcs_vfpcc i32 @build_v2i_v4i1_0() { +; CHECK-LABEL: build_v2i_v4i1_0: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> ) + ret i32 %r +} +define arm_aapcs_vfpcc i32 @build_v2i_v4i1_5() { +; CHECK-LABEL: build_v2i_v4i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #61680 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> ) + ret i32 %r +} + +define arm_aapcs_vfpcc i32 @build_v2i_v8i1_1() { +; CHECK-LABEL: build_v2i_v8i1_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> ) + ret i32 %r +} +define arm_aapcs_vfpcc i32 @build_v2i_v8i1_0() { +; CHECK-LABEL: build_v2i_v8i1_0: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> ) + ret i32 %r +} +define arm_aapcs_vfpcc i32 @build_v2i_v8i1_5() { +; CHECK-LABEL: build_v2i_v8i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #52428 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> ) + ret i32 %r +} + +define arm_aapcs_vfpcc i32 @build_v2i_v16i1_1() { +; CHECK-LABEL: build_v2i_v16i1_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> ) + ret i32 %r +} +define arm_aapcs_vfpcc i32 @build_v2i_v16i1_0() { +; CHECK-LABEL: build_v2i_v16i1_0: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> ) + ret i32 %r +} +define arm_aapcs_vfpcc i32 @build_v2i_v16i1_5() { +; CHECK-LABEL: build_v2i_v16i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #43690 +; CHECK-NEXT: bx lr + %r = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> ) + ret i32 %r +} + + + +define arm_aapcs_vfpcc <4 x i32> @build_i2v_v4i1_1() { +; CHECK-LABEL: build_i2v_v4i1_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %c = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 65535) + %r = select <4 x i1> %c, <4 x i32> , <4 x i32> + ret <4 x i32> %r +} +define arm_aapcs_vfpcc <4 x i32> @build_i2v_v4i1_0() { +; CHECK-LABEL: build_i2v_v4i1_0: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %c = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 0) + %r = select <4 x i1> %c, <4 x i32> , <4 x i32> + ret <4 x i32> %r +} +define arm_aapcs_vfpcc <4 x i32> @build_i2v_v4i1_5() { +; CHECK-LABEL: build_i2v_v4i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #61680 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %c = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 61680) + %r = select <4 x i1> %c, <4 x i32> , <4 x i32> + ret <4 x i32> %r +} + +define arm_aapcs_vfpcc <8 x i16> @build_i2v_v8i1_1() { +; CHECK-LABEL: build_i2v_v8i1_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %c = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 65535) + %r = select <8 x i1> %c, <8 x i16> , <8 x i16> + ret <8 x i16> %r +} +define arm_aapcs_vfpcc <8 x i16> @build_i2v_v8i1_0() { +; CHECK-LABEL: build_i2v_v8i1_0: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %c = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 0) + %r = select <8 x i1> %c, <8 x i16> , <8 x i16> + ret <8 x i16> %r +} +define arm_aapcs_vfpcc <8 x i16> @build_i2v_v8i1_5() { +; CHECK-LABEL: build_i2v_v8i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #52428 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %c = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 52428) + %r = select <8 x i1> %c, <8 x i16> , <8 x i16> + ret <8 x i16> %r +} + +define arm_aapcs_vfpcc <16 x i8> @build_i2v_v16i1_1() { +; CHECK-LABEL: build_i2v_v16i1_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #65535 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %c = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 65535) + %r = select <16 x i1> %c, <16 x i8> , <16 x i8> + ret <16 x i8> %r +} +define arm_aapcs_vfpcc <16 x i8> @build_i2v_v16i1_0() { +; CHECK-LABEL: build_i2v_v16i1_0: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %c = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 0) + %r = select <16 x i1> %c, <16 x i8> , <16 x i8> + ret <16 x i8> %r +} +define arm_aapcs_vfpcc <16 x i8> @build_i2v_v16i1_5() { +; CHECK-LABEL: build_i2v_v16i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #43690 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %c = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 43690) + %r = select <16 x i1> %c, <16 x i8> , <16 x i8> + ret <16 x i8> %r +} + + +define arm_aapcs_vfpcc i32 @build_i2v2i_v4i1_5() { +; CHECK-LABEL: build_i2v2i_v4i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #61680 +; CHECK-NEXT: bx lr + %c = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 61680) + %r = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %c) + ret i32 %r +} +define arm_aapcs_vfpcc i32 @build_i2v2i_v8i1_5() { +; CHECK-LABEL: build_i2v2i_v8i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #52428 +; CHECK-NEXT: bx lr + %c = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 52428) + %r = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %c) + ret i32 %r +} +define arm_aapcs_vfpcc i32 @build_i2v2i_v16i1_5() { +; CHECK-LABEL: build_i2v2i_v16i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #43690 +; CHECK-NEXT: bx lr + %c = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 43690) + %r = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %c) + ret i32 %r +} + + +define arm_aapcs_vfpcc <4 x i32> @build_v2i2v_v4i1_v4i1_5() { +; CHECK-LABEL: build_v2i2v_v4i1_v4i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #61680 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %b = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> ) + %c = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %b) + %r = select <4 x i1> %c, <4 x i32> , <4 x i32> + ret <4 x i32> %r +} +define arm_aapcs_vfpcc <4 x i32> @build_v2i2v_v8i1_v4i1_5() { +; CHECK-LABEL: build_v2i2v_v8i1_v4i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #52428 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %b = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> ) + %c = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %b) + %r = select <4 x i1> %c, <4 x i32> , <4 x i32> + ret <4 x i32> %r +} +define arm_aapcs_vfpcc <4 x i32> @build_v2i2v_v16i1_v4i1_5() { +; CHECK-LABEL: build_v2i2v_v16i1_v4i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #43690 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %b = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> ) + %c = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %b) + %r = select <4 x i1> %c, <4 x i32> , <4 x i32> + ret <4 x i32> %r +} + +define arm_aapcs_vfpcc <8 x i16> @build_v2i2v_v4i1_v8i1_5() { +; CHECK-LABEL: build_v2i2v_v4i1_v8i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #61680 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %b = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> ) + %c = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %b) + %r = select <8 x i1> %c, <8 x i16> , <8 x i16> + ret <8 x i16> %r +} +define arm_aapcs_vfpcc <8 x i16> @build_v2i2v_v8i1_v8i1_5() { +; CHECK-LABEL: build_v2i2v_v8i1_v8i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #52428 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %b = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> ) + %c = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %b) + %r = select <8 x i1> %c, <8 x i16> , <8 x i16> + ret <8 x i16> %r +} +define arm_aapcs_vfpcc <8 x i16> @build_v2i2v_v16i1_v8i1_5() { +; CHECK-LABEL: build_v2i2v_v16i1_v8i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #43690 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %b = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> ) + %c = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %b) + %r = select <8 x i1> %c, <8 x i16> , <8 x i16> + ret <8 x i16> %r +} + +define arm_aapcs_vfpcc <16 x i8> @build_v2i2v_v4i1_v16i1_5() { +; CHECK-LABEL: build_v2i2v_v4i1_v16i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #61680 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %b = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> ) + %c = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %b) + %r = select <16 x i1> %c, <16 x i8> , <16 x i8> + ret <16 x i8> %r +} +define arm_aapcs_vfpcc <16 x i8> @build_v2i2v_v8i1_v16i1_5() { +; CHECK-LABEL: build_v2i2v_v8i1_v16i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #52428 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %b = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> ) + %c = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %b) + %r = select <16 x i1> %c, <16 x i8> , <16 x i8> + ret <16 x i8> %r +} +define arm_aapcs_vfpcc <16 x i8> @build_v2i2v_v16i1_v16i1_5() { +; CHECK-LABEL: build_v2i2v_v16i1_v16i1_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #43690 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov.i8 q1, #0xff +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr + %b = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> ) + %c = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %b) + %r = select <16 x i1> %c, <16 x i8> , <16 x i8> + ret <16 x i8> %r +} + +declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>) +declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>) +declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>) + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-convert.ll b/llvm/test/CodeGen/Thumb2/mve-pred-convert.ll new file mode 100644 index 0000000000000..c24f6e4642580 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-pred-convert.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define void @g(i8* %v) { +; CHECK-LABEL: g: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movs r0, #63 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %0 = load i8, i8* %v, align 1 + %conv = zext i8 %0 to i32 + %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %conv, i32 0 + %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer + %1 = and <16 x i32> %broadcast.splat, + %2 = icmp eq <16 x i32> %1, zeroinitializer + %3 = select <16 x i1> %2, <16 x i8> zeroinitializer, <16 x i8> + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %3, <16 x i8>* undef, i32 1, <16 x i1> ) + ret void +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) #1 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll index 0dbbe3f663c3f..e299cf5891566 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll @@ -144,9 +144,11 @@ define arm_aapcs_vfpcc <2 x i64> @load_v2i1(<2 x i1> *%src, <2 x i64> %a) { ; CHECK-LE-LABEL: load_v2i1: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: ldrb r0, [r0] -; CHECK-LE-NEXT: sbfx r1, r0, #0, #1 -; CHECK-LE-NEXT: sbfx r0, r0, #1, #1 +; CHECK-LE-NEXT: and r1, r0, #1 +; CHECK-LE-NEXT: rsbs r1, r1, #0 +; CHECK-LE-NEXT: ubfx r0, r0, #1, #1 ; CHECK-LE-NEXT: vmov.32 q1[0], r1 +; CHECK-LE-NEXT: rsbs r0, r0, #0 ; CHECK-LE-NEXT: vmov.32 q1[1], r1 ; CHECK-LE-NEXT: vmov.32 q1[2], r0 ; CHECK-LE-NEXT: vmov.32 q1[3], r0 @@ -156,8 +158,10 @@ define arm_aapcs_vfpcc <2 x i64> @load_v2i1(<2 x i1> *%src, <2 x i64> %a) { ; CHECK-BE-LABEL: load_v2i1: ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: ldrb r0, [r0] -; CHECK-BE-NEXT: sbfx r1, r0, #0, #1 -; CHECK-BE-NEXT: sbfx r0, r0, #1, #1 +; CHECK-BE-NEXT: ubfx r1, r0, #1, #1 +; CHECK-BE-NEXT: and r0, r0, #1 +; CHECK-BE-NEXT: rsbs r1, r1, #0 +; CHECK-BE-NEXT: rsbs r0, r0, #0 ; CHECK-BE-NEXT: vmov.32 q1[0], r1 ; CHECK-BE-NEXT: vmov.32 q1[1], r1 ; CHECK-BE-NEXT: vmov.32 q1[2], r0 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll index 4e9e074083f5c..fbc268fa93007 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll @@ -6,8 +6,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpeqz_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.i32 ne, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -22,8 +21,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpnez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.i32 eq, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -38,8 +36,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsltz_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 ge, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -54,8 +51,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgtz_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -70,8 +66,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpslez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 gt, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -86,8 +81,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 lt, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -116,8 +110,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpugtz_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.i32 eq, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -131,12 +124,10 @@ define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcmp.u32 cs, q1, zr -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -165,8 +156,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpeq_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i3 ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.i32 ne, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -181,8 +171,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpne_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i3 ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.i32 eq, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -197,8 +186,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpslt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q2, q1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -213,8 +201,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -229,8 +216,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsle_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 lt, q2, q1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -245,8 +231,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsge_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 lt, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -260,12 +245,10 @@ define arm_aapcs_vfpcc <4 x i32> @cmpult_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK-LABEL: cmpult_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcmp.u32 hi, q2, q1 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -279,12 +262,10 @@ define arm_aapcs_vfpcc <4 x i32> @cmpugt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK-LABEL: cmpugt_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcmp.u32 hi, q1, q2 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -298,12 +279,10 @@ define arm_aapcs_vfpcc <4 x i32> @cmpule_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK-LABEL: cmpule_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcmp.u32 cs, q2, q1 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -317,12 +296,10 @@ define arm_aapcs_vfpcc <4 x i32> @cmpuge_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK-LABEL: cmpuge_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcmp.u32 cs, q1, q2 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -340,8 +317,7 @@ define arm_aapcs_vfpcc <8 x i16> @cmpeqz_v8i1(<8 x i16> %a, <8 x i16> %b) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vcmpt.i16 ne, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <8 x i16> %a, zeroinitializer @@ -356,8 +332,7 @@ define arm_aapcs_vfpcc <8 x i16> @cmpeq_v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1 ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vcmpt.i16 ne, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <8 x i16> %a, zeroinitializer @@ -373,8 +348,7 @@ define arm_aapcs_vfpcc <16 x i8> @cmpeqz_v16i1(<16 x i8> %a, <16 x i8> %b) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i8 ne, q0, zr ; CHECK-NEXT: vcmpt.i8 ne, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <16 x i8> %a, zeroinitializer @@ -389,8 +363,7 @@ define arm_aapcs_vfpcc <16 x i8> @cmpeq_v16i1(<16 x i8> %a, <16 x i8> %b, <16 x ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i8 ne, q0, zr ; CHECK-NEXT: vcmpt.i8 ne, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <16 x i8> %a, zeroinitializer diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll index 7c60eeadbfb0a..875ddde5acf6b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll @@ -109,8 +109,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0 ; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <4 x float> %src, %src2 @@ -484,8 +483,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q1, q0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -537,8 +535,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -590,8 +587,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q1 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, q1 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -643,8 +639,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q1 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, q1 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -698,8 +693,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0 ; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <4 x float> %src, %src2 @@ -1019,8 +1013,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_one_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 le, q1, q0 ; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <8 x half> %src, %src2 @@ -1904,8 +1897,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %s ; ; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q1, q0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q1, q0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2029,8 +2021,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %s ; ; CHECK-MVEFP-LABEL: vcmp_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 le, q1, q0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2154,8 +2145,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %s ; ; CHECK-MVEFP-LABEL: vcmp_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q1 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, q1 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2279,8 +2269,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %s ; ; CHECK-MVEFP-LABEL: vcmp_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q1 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, q1 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2406,8 +2395,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %s ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 le, q1, q0 ; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <8 x half> %src, %src2 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll index f03034c42baf9..6d77e61f0142d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -113,8 +113,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, float %src2 ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -509,8 +508,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, float %src2 ; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -565,8 +563,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, float %src2 ; CHECK-MVEFP-LABEL: vcmp_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -621,8 +618,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, float %src2 ; CHECK-MVEFP-LABEL: vcmp_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -677,8 +673,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, float %src2 ; CHECK-MVEFP-LABEL: vcmp_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -735,8 +730,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, float %src2 ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -1060,8 +1054,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_one_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -1952,8 +1945,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2078,8 +2070,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVEFP-LABEL: vcmp_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2204,8 +2195,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVEFP-LABEL: vcmp_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2330,8 +2320,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVEFP-LABEL: vcmp_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2458,8 +2447,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, half* %src2p, ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -2710,8 +2698,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, float %sr ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vpt.f32 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -3106,8 +3093,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ugt_v4f32(<4 x float> %src, float %sr ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -3162,8 +3148,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_uge_v4f32(<4 x float> %src, float %sr ; CHECK-MVEFP-LABEL: vcmp_r_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -3218,8 +3203,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ult_v4f32(<4 x float> %src, float %sr ; CHECK-MVEFP-LABEL: vcmp_r_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -3274,8 +3258,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ule_v4f32(<4 x float> %src, float %sr ; CHECK-MVEFP-LABEL: vcmp_r_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -3332,8 +3315,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ord_v4f32(<4 x float> %src, float %sr ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vpt.f32 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -3657,8 +3639,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_one_v8f16(<8 x half> %src, half* %src2 ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -4549,8 +4530,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_ugt_v8f16(<8 x half> %src, half* %src2 ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -4675,8 +4655,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_uge_v8f16(<8 x half> %src, half* %src2 ; CHECK-MVEFP-LABEL: vcmp_r_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -4801,8 +4780,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_ult_v8f16(<8 x half> %src, half* %src2 ; CHECK-MVEFP-LABEL: vcmp_r_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -4927,8 +4905,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_ule_v8f16(<8 x half> %src, half* %src2 ; CHECK-MVEFP-LABEL: vcmp_r_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -5055,8 +5032,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_ord_v8f16(<8 x half> %src, half* %src2 ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll index 20837421e9713..0f9c2a0373a5a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -109,8 +109,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <4 x float> %src, zeroinitializer @@ -484,8 +483,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -537,8 +535,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -590,8 +587,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -643,8 +639,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float> ; ; CHECK-MVEFP-LABEL: vcmp_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -698,8 +693,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <4 x float> %src, zeroinitializer @@ -1011,8 +1005,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_one_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <8 x half> %src, zeroinitializer @@ -1868,8 +1861,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %a ; ; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -1989,8 +1981,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %a ; ; CHECK-MVEFP-LABEL: vcmp_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2110,8 +2101,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %a ; ; CHECK-MVEFP-LABEL: vcmp_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2231,8 +2221,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %a ; ; CHECK-MVEFP-LABEL: vcmp_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2354,8 +2343,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <8 x half> %src, zeroinitializer @@ -2594,8 +2582,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, <4 x floa ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <4 x float> zeroinitializer, %src @@ -2969,8 +2956,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ugt_v4f32(<4 x float> %src, <4 x floa ; ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -3022,8 +3008,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_uge_v4f32(<4 x float> %src, <4 x floa ; ; CHECK-MVEFP-LABEL: vcmp_r_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -3075,8 +3060,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ult_v4f32(<4 x float> %src, <4 x floa ; ; CHECK-MVEFP-LABEL: vcmp_r_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -3128,8 +3112,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ule_v4f32(<4 x float> %src, <4 x floa ; ; CHECK-MVEFP-LABEL: vcmp_r_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -3183,8 +3166,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ord_v4f32(<4 x float> %src, <4 x floa ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <4 x float> zeroinitializer, %src @@ -3496,8 +3478,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_one_v8f16(<8 x half> %src, <8 x half> ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f16 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <8 x half> zeroinitializer, %src @@ -4353,8 +4334,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_ugt_v8f16(<8 x half> %src, <8 x half> ; ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -4474,8 +4454,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_uge_v8f16(<8 x half> %src, <8 x half> ; ; CHECK-MVEFP-LABEL: vcmp_r_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -4595,8 +4574,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_ult_v8f16(<8 x half> %src, <8 x half> ; ; CHECK-MVEFP-LABEL: vcmp_r_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -4716,8 +4694,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_ule_v8f16(<8 x half> %src, <8 x half> ; ; CHECK-MVEFP-LABEL: vcmp_r_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -4839,8 +4816,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_ord_v8f16(<8 x half> %src, <8 x half> ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f16 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <8 x half> zeroinitializer, %src diff --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll index ae91b52e1d543..78bd610958d7d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll @@ -127,15 +127,11 @@ entry: define arm_aapcs_vfpcc <8 x half> @vdup_f16_bc(half* %src1, half* %src2) { ; CHECK-LABEL: vdup_f16_bc: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vadd.f16 s0, s2, s0 -; CHECK-NEXT: vstr.16 s0, [sp, #2] -; CHECK-NEXT: ldrh.w r0, [sp, #2] +; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: vdup.16 q0, r0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: bx lr entry: %0 = load half, half *%src1, align 2 @@ -260,16 +256,12 @@ entry: define arm_aapcs_vfpcc half @vdup_f16_extract(half* %src1, half* %src2) { ; CHECK-LABEL: vdup_f16_extract: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vldr.16 s0, [r2] ; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: vadd.f16 s0, s2, s0 -; CHECK-NEXT: vstr.16 s0, [sp, #2] -; CHECK-NEXT: ldrh.w r1, [sp, #2] +; CHECK-NEXT: vmov.f16 r1, s0 ; CHECK-NEXT: vdup.16 q0, r1 ; CHECK-NEXT: vstr.16 s1, [r0] -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: bx lr entry: %0 = load half, half *%src1, align 2 diff --git a/llvm/test/CodeGen/VE/branch1.ll b/llvm/test/CodeGen/VE/branch1.ll index baf23cd9430ea..c9f0a22c4c0a0 100644 --- a/llvm/test/CodeGen/VE/branch1.ll +++ b/llvm/test/CodeGen/VE/branch1.ll @@ -9,8 +9,8 @@ define signext i8 @func1(i8 signext %a, i8 signext %b) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) -; CHECK-NEXT: br.l .LBB0_3 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: br.l.t .LBB0_3 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK: .LBB{{[0-9]+}}_3: @@ -42,8 +42,8 @@ define i32 @func2(i16 signext %a, i16 signext %b) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) -; CHECK-NEXT: br.l .LBB1_3 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: br.l.t .LBB1_3 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK: .LBB{{[0-9]+}}_3: @@ -70,8 +70,8 @@ define i32 @func3(i32 %a, i32 %b) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) -; CHECK-NEXT: br.l .LBB2_3 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: br.l.t .LBB2_3 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK: .LBB{{[0-9]+}}_3: @@ -98,8 +98,8 @@ define i32 @func4(i64 %a, i64 %b) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) -; CHECK-NEXT: br.l .LBB3_3 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: br.l.t .LBB3_3 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK: .LBB{{[0-9]+}}_3: @@ -127,8 +127,8 @@ define i32 @func5(i8 zeroext %a, i8 zeroext %b) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) -; CHECK-NEXT: br.l .LBB4_3 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: br.l.t .LBB4_3 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK: .LBB{{[0-9]+}}_3: @@ -156,8 +156,8 @@ define i32 @func6(i16 zeroext %a, i16 zeroext %b) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) -; CHECK-NEXT: br.l .LBB5_3 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: br.l.t .LBB5_3 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK: .LBB{{[0-9]+}}_3: @@ -185,8 +185,8 @@ define i32 @func7(i32 %a, i32 %b) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) -; CHECK-NEXT: br.l .LBB6_3 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: br.l.t .LBB6_3 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK: .LBB{{[0-9]+}}_3: @@ -213,8 +213,8 @@ define i32 @func8(float %a, float %b) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) -; CHECK-NEXT: br.l .LBB7_3 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: br.l.t .LBB7_3 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK: .LBB{{[0-9]+}}_3: @@ -241,8 +241,8 @@ define i32 @func9(double %a, double %b) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) -; CHECK-NEXT: br.l .LBB8_3 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: br.l.t .LBB8_3 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK: .LBB{{[0-9]+}}_3: @@ -270,8 +270,8 @@ define i32 @func10(double %a, double %b) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) -; CHECK-NEXT: br.l .LBB9_3 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: br.l.t .LBB9_3 ; CHECK: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK: .LBB{{[0-9]+}}_3: diff --git a/llvm/test/CodeGen/VE/call.ll b/llvm/test/CodeGen/VE/call.ll index 4f0c7b50eb753..51256319d3b53 100644 --- a/llvm/test/CodeGen/VE/call.ll +++ b/llvm/test/CodeGen/VE/call.ll @@ -8,7 +8,7 @@ define i32 @sample_call() { ; CHECK-NEXT: lea.sl %s12, sample_add@hi(, %s0) ; CHECK-NEXT: or %s0, 1, (0)1 ; CHECK-NEXT: or %s1, 2, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @sample_add(i32 1, i32 2) ret i32 %r @@ -34,7 +34,7 @@ define i32 @stack_call_int() { ; CHECK-NEXT: or %s6, 7, (0)1 ; CHECK-NEXT: or %s7, 8, (0)1 ; CHECK-NEXT: stl %s34, 240(, %s11) -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @stack_callee_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) ret i32 %r @@ -59,7 +59,7 @@ define i32 @stack_call_int_szext() { ; CHECK-NEXT: or %s6, 7, (0)1 ; CHECK-NEXT: or %s7, 8, (0)1 ; CHECK-NEXT: stl %s34, 240(, %s11) -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @stack_callee_int_szext(i1 -1, i8 -1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i16 -1, i8 -1) ret i32 %r @@ -93,7 +93,7 @@ define float @stack_call_float() { ; CHECK-NEXT: or %s5, 0, %s6 ; CHECK-NEXT: or %s6, 0, %s7 ; CHECK-NEXT: or %s7, 0, %s34 -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call float @stack_callee_float(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0) ret float %r @@ -116,7 +116,7 @@ define float @stack_call_float2(float %p0) { ; CHECK-NEXT: or %s5, 0, %s0 ; CHECK-NEXT: or %s6, 0, %s0 ; CHECK-NEXT: or %s7, 0, %s0 -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call float @stack_callee_float(float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0) ret float %r diff --git a/llvm/test/CodeGen/VE/callstruct.ll b/llvm/test/CodeGen/VE/callstruct.ll index c1d9d9f0d27fb..f8886e0cbcbe3 100644 --- a/llvm/test/CodeGen/VE/callstruct.ll +++ b/llvm/test/CodeGen/VE/callstruct.ll @@ -28,7 +28,7 @@ define void @caller() { ; CHECK-NEXT: lea %s0, -8(, %s9) ; CHECK-NEXT: or %s1, 3, (0)1 ; CHECK-NEXT: or %s2, 4, (0)1 -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: ld %s0, -8(, %s9) ; CHECK-NEXT: lea %s1, A@lo ; CHECK-NEXT: and %s1, %s1, (32)0 diff --git a/llvm/test/CodeGen/VE/fp_add.ll b/llvm/test/CodeGen/VE/fp_add.ll index 0c8df7c8f6aa1..7cc6ea2bfe657 100644 --- a/llvm/test/CodeGen/VE/fp_add.ll +++ b/llvm/test/CodeGen/VE/fp_add.ll @@ -61,3 +61,21 @@ define double @func8(double %a) { %r = fadd double %a, 0x7FEFFFFFFFFFFFFF ret double %r } + +define float @fadds_imm(float %a) { +; CHECK-LABEL: fadds_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fadd.s %s0, %s0, (2)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fadd float %a, -2.e+00 + ret float %r +} + +define double @faddd_imm(double %a) { +; CHECK-LABEL: faddd_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fadd.d %s0, %s0, (2)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fadd double %a, -2.e+00 + ret double %r +} diff --git a/llvm/test/CodeGen/VE/fp_extload_truncstore.ll b/llvm/test/CodeGen/VE/fp_extload_truncstore.ll index 9487e1ca9eb4a..b031dd25fb5e8 100644 --- a/llvm/test/CodeGen/VE/fp_extload_truncstore.ll +++ b/llvm/test/CodeGen/VE/fp_extload_truncstore.ll @@ -13,7 +13,7 @@ define float @func_i16fp32(i16* %a) { ; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(, %s1) -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 %a.val = load i16, i16* %a, align 4 %a.asd = call float @llvm.convert.from.fp16.f32(i16 %a.val) @@ -27,7 +27,7 @@ define double @func_i16fp64(i16* %a) { ; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(, %s1) -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: cvt.d.s %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %a.val = load i16, i16* %a, align 4 @@ -42,7 +42,7 @@ define float @func_fp16fp32(half* %a) { ; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(, %s1) -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 %a.val = load half, half* %a, align 4 %a.asd = fpext half %a.val to float @@ -56,7 +56,7 @@ define double @func_fp16fp64(half* %a) { ; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(, %s1) -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: cvt.d.s %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %a.val = load half, half* %a, align 4 @@ -73,7 +73,7 @@ define void @func_fp32i16(i16* %fl.ptr, float %val) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, __gnu_f2h_ieee@hi(, %s0) ; CHECK-NEXT: or %s0, 0, %s1 -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: st2b %s0, (, %s18) ; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload ; CHECK-NEXT: or %s11, 0, %s9 @@ -92,13 +92,13 @@ define half @func_fp32fp16(half* %fl.ptr, float %a) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, __gnu_f2h_ieee@hi(, %s0) ; CHECK-NEXT: or %s0, 0, %s1 -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s19, 0, %s0 ; CHECK-NEXT: lea %s0, __gnu_h2f_ieee@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(, %s0) ; CHECK-NEXT: or %s0, 0, %s19 -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: st2b %s19, (, %s18) ; CHECK-NEXT: ld %s19, 56(, %s9) # 8-byte Folded Reload ; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload @@ -128,7 +128,7 @@ define void @func_fp64i16(i16* %fl.ptr, double %val) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, __truncdfhf2@hi(, %s0) ; CHECK-NEXT: or %s0, 0, %s1 -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: st2b %s0, (, %s18) ; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload ; CHECK-NEXT: or %s11, 0, %s9 @@ -146,7 +146,7 @@ define void @func_fp64fp16(half* %fl.ptr, double %val) { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, __truncdfhf2@hi(, %s0) ; CHECK-NEXT: or %s0, 0, %s1 -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: st2b %s0, (, %s18) ; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload ; CHECK-NEXT: or %s11, 0, %s9 diff --git a/llvm/test/CodeGen/VE/fp_mul.ll b/llvm/test/CodeGen/VE/fp_mul.ll index 4a6c740eb7c8c..4b324980ca694 100644 --- a/llvm/test/CodeGen/VE/fp_mul.ll +++ b/llvm/test/CodeGen/VE/fp_mul.ll @@ -61,3 +61,40 @@ define double @func8(double %a) { %r = fmul double %a, 0x7FEFFFFFFFFFFFFF ret double %r } + +define float @fmuls_ir(float %a) { +; CHECK-LABEL: fmuls_ir: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fmul.s %s0, 0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fmul float 0.e+00, %a + ret float %r +} + +define float @fmuls_ri(float %a) { +; CHECK-LABEL: fmuls_ri: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fmul.s %s0, %s0, (2)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fmul float %a, -2. + ret float %r +} + +define float @fmuls_ri2(float %a) { +; CHECK-LABEL: fmuls_ri2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fmul.s %s0, %s0, (3)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fmul float %a, -36893488147419103232. + ret float %r +} + +define float @fmuls_ri3(float %a) { +; CHECK-LABEL: fmuls_ri3: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fmul.s %s0, %s0, (9)0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fmul float %a, 1.175494210692441075487029444849287348827052428745893333857174530571588870475618904265502351336181163787841796875E-38 + ret float %r +} + diff --git a/llvm/test/CodeGen/VE/fp_sub.ll b/llvm/test/CodeGen/VE/fp_sub.ll index 18e97e46aca86..42a3d9bec1b7f 100644 --- a/llvm/test/CodeGen/VE/fp_sub.ll +++ b/llvm/test/CodeGen/VE/fp_sub.ll @@ -61,3 +61,21 @@ define double @func8(double %a) { %r = fadd double %a, 0xFFEFFFFFFFFFFFFF ret double %r } + +define float @fsubs_ir(float %a) { +; CHECK-LABEL: fsubs_ir: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fsub.s %s0, 0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fsub float 0.e+00, %a + ret float %r +} + +define float @fsubs_ri(float %a) { +; CHECK-LABEL: fsubs_ri: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fadd.s %s0, %s0, (2)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fsub float %a, 2.0e+00 + ret float %r +} diff --git a/llvm/test/CodeGen/VE/pic_access_static_data.ll b/llvm/test/CodeGen/VE/pic_access_static_data.ll index b8fcd42d4ac21..e0741724172e9 100644 --- a/llvm/test/CodeGen/VE/pic_access_static_data.ll +++ b/llvm/test/CodeGen/VE/pic_access_static_data.ll @@ -47,7 +47,7 @@ define i32 @main() { ; CHECK-NEXT: and %s12, %s12, (32)0 ; CHECK-NEXT: sic %s16 ; CHECK-NEXT: lea.sl %s12, func@plt_hi(%s16, %s12) -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: lea %s0, dst@gotoff_lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s0, dst@gotoff_hi(, %s0) @@ -62,7 +62,7 @@ define i32 @main() { ; CHECK-NEXT: sic %s16 ; CHECK-NEXT: lea.sl %s12, printf@plt_hi(%s16, %s12) ; CHECK-NEXT: st %s0, 176(, %s11) -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 store i1 true, i1* @src, align 4 diff --git a/llvm/test/CodeGen/VE/pic_func_call.ll b/llvm/test/CodeGen/VE/pic_func_call.ll index a5f8a6acc075d..489ffbe4c0ea9 100644 --- a/llvm/test/CodeGen/VE/pic_func_call.ll +++ b/llvm/test/CodeGen/VE/pic_func_call.ll @@ -11,7 +11,7 @@ define void @func() { ; CHECK-NEXT: and %s12, %s12, (32)0 ; CHECK-NEXT: sic %s16 ; CHECK-NEXT: lea.sl %s12, function@plt_hi(%s16, %s12) -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 call void bitcast (void (...)* @function to void ()*)() diff --git a/llvm/test/CodeGen/VE/pic_indirect_func_call.ll b/llvm/test/CodeGen/VE/pic_indirect_func_call.ll index f1f3a6ea0acba..1e758c92901a7 100644 --- a/llvm/test/CodeGen/VE/pic_indirect_func_call.ll +++ b/llvm/test/CodeGen/VE/pic_indirect_func_call.ll @@ -19,7 +19,7 @@ define void @func() { ; CHECK-NEXT: ld %s1, (%s1, %s15) ; CHECK-NEXT: st %s0, (, %s1) ; CHECK-NEXT: or %s12, 0, %s0 -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 store void (...)* @function, void (...)** @ptr, align 8 diff --git a/llvm/test/CodeGen/VE/simple_prologue_epilogue.ll b/llvm/test/CodeGen/VE/simple_prologue_epilogue.ll index 70f6a3d9ae139..690f3c7061cae 100644 --- a/llvm/test/CodeGen/VE/simple_prologue_epilogue.ll +++ b/llvm/test/CodeGen/VE/simple_prologue_epilogue.ll @@ -11,7 +11,7 @@ define void @func() { ; CHECK-NEXT: lea %s13, -176 ; CHECK-NEXT: and %s13, %s13, (32)0 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) -; CHECK-NEXT: brge.l %s11, %s8, .LBB0_2 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ld %s61, 24(, %s14) ; CHECK-NEXT: or %s62, 0, %s0 @@ -27,7 +27,7 @@ define void @func() { ; CHECK-NEXT: ld %s15, 24(, %s11) ; CHECK-NEXT: ld %s10, 8(, %s11) ; CHECK-NEXT: ld %s9, (, %s11) -; CHECK-NEXT: b.l (,%lr) +; CHECK-NEXT: b.l.t (, %s10) ret void } @@ -42,7 +42,7 @@ define i64 @func1(i64) { ; CHECK-NEXT: lea %s13, -176 ; CHECK-NEXT: and %s13, %s13, (32)0 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) -; CHECK-NEXT: brge.l %s11, %s8, .LBB1_2 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB1_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ld %s61, 24(, %s14) ; CHECK-NEXT: or %s62, 0, %s0 @@ -58,7 +58,7 @@ define i64 @func1(i64) { ; CHECK-NEXT: ld %s15, 24(, %s11) ; CHECK-NEXT: ld %s10, 8(, %s11) ; CHECK-NEXT: ld %s9, (, %s11) -; CHECK-NEXT: b.l (,%lr) +; CHECK-NEXT: b.l.t (, %s10) ret i64 %0 } @@ -73,7 +73,7 @@ define i64 @func2(i64, i64, i64, i64, i64) { ; CHECK-NEXT: lea %s13, -176 ; CHECK-NEXT: and %s13, %s13, (32)0 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) -; CHECK-NEXT: brge.l %s11, %s8, .LBB2_2 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB2_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ld %s61, 24(, %s14) ; CHECK-NEXT: or %s62, 0, %s0 @@ -90,6 +90,6 @@ define i64 @func2(i64, i64, i64, i64, i64) { ; CHECK-NEXT: ld %s15, 24(, %s11) ; CHECK-NEXT: ld %s10, 8(, %s11) ; CHECK-NEXT: ld %s9, (, %s11) -; CHECK-NEXT: b.l (,%lr) +; CHECK-NEXT: b.l.t (, %s10) ret i64 %4 } diff --git a/llvm/test/CodeGen/VE/va_caller.ll b/llvm/test/CodeGen/VE/va_caller.ll index 414fce57dee42..64473378a0df1 100644 --- a/llvm/test/CodeGen/VE/va_caller.ll +++ b/llvm/test/CodeGen/VE/va_caller.ll @@ -38,7 +38,7 @@ define i32 @caller() { ; CHECK-NEXT: stl %s18, 176(, %s11) ; CHECK-NEXT: or %s6, 0, %s0 ; CHECK-NEXT: or %s0, 0, %s18 -; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s0, 0, %s18 ; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload ; CHECK-NEXT: or %s11, 0, %s9 diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll index 5cbff5d0bb627..188ad22c89fcc 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll @@ -850,8 +850,51 @@ terminate7: ; preds = %ehcleanup unreachable } +; We don't need to call placeBlockMarker after fixUnwindMismatches unless the +; destination is the appendix BB at the very end. This should not crash. +define void @test16(i32* %p, i32 %a, i32 %b) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +entry: + br label %loop + +loop: + invoke void @foo() + to label %bb0 unwind label %catch.dispatch0 + +bb0: + %cmp = icmp ne i32 %a, %b + br i1 %cmp, label %bb1, label %last + +bb1: ; preds = %bb0 + invoke void @bar() + to label %try.cont unwind label %catch.dispatch1 + +catch.dispatch0: ; preds = %loop + %0 = catchswitch within none [label %catch.start0] unwind to caller + +catch.start0: ; preds = %catch.dispatch0 + %1 = catchpad within %0 [i8* null] + %2 = call i8* @llvm.wasm.get.exception(token %1) + %3 = call i32 @llvm.wasm.get.ehselector(token %1) + catchret from %1 to label %try.cont + +catch.dispatch1: ; preds = %bb1 + %4 = catchswitch within none [label %catch.start1] unwind to caller + +catch.start1: ; preds = %catch.dispatch1 + %5 = catchpad within %4 [i8* null] + %6 = call i8* @llvm.wasm.get.exception(token %5) + %7 = call i32 @llvm.wasm.get.ehselector(token %5) + catchret from %5 to label %try.cont + +try.cont: ; preds = %catch.start, %loop + br label %loop + +last: + ret void +} + ; Check if the unwind destination mismatch stats are correct -; NOSORT-STAT: 15 wasm-cfg-stackify - Number of EH pad unwind mismatches found +; NOSORT-STAT: 16 wasm-cfg-stackify - Number of EH pad unwind mismatches found declare void @foo() declare void @bar() diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll index d0283386afdd8..8e89f2a02bb2d 100644 --- a/llvm/test/CodeGen/WebAssembly/offset.ll +++ b/llvm/test/CodeGen/WebAssembly/offset.ll @@ -645,9 +645,9 @@ define void @aggregate_load_store({i32,i32,i32,i32}* %p, {i32,i32,i32,i32}* %q) ; CHECK-LABEL: aggregate_return: ; CHECK: i64.const $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK: i64.store 8($0):p2align=2, $pop[[L0]]{{$}} +; CHECK: i64.store 8($0), $pop[[L0]]{{$}} ; CHECK: i64.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK: i64.store 0($0):p2align=2, $pop[[L1]]{{$}} +; CHECK: i64.store 0($0), $pop[[L1]]{{$}} define {i32,i32,i32,i32} @aggregate_return() { ret {i32,i32,i32,i32} zeroinitializer } diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll index 5437a9ab0a81d..53731b0f7c16d 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -31,8 +31,6 @@ define <4 x float> @convert_u_v4f32(<4 x i32> %x) { ; NO-SIMD128-NOT: i64x2 ; SIMD128-VM-NOT: f64x2.convert_i64x2_s ; SIMD128-NEXT: .functype convert_s_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.convert_i64x2_s $push[[R:[0-9]+]]=, $0 -; SIMD128-NEXT: return $pop[[R]] define <2 x double> @convert_s_v2f64(<2 x i64> %x) { %a = sitofp <2 x i64> %x to <2 x double> ret <2 x double> %a @@ -42,8 +40,6 @@ define <2 x double> @convert_s_v2f64(<2 x i64> %x) { ; NO-SIMD128-NOT: i64x2 ; SIMD128-VM-NOT: f64x2.convert_i64x2_u ; SIMD128-NEXT: .functype convert_u_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.convert_i64x2_u $push[[R:[0-9]+]]=, $0 -; SIMD128-NEXT: return $pop[[R]] define <2 x double> @convert_u_v2f64(<2 x i64> %x) { %a = uitofp <2 x i64> %x to <2 x double> ret <2 x double> %a @@ -73,8 +69,6 @@ define <4 x i32> @trunc_sat_u_v4i32(<4 x float> %x) { ; NO-SIMD128-NOT: f64x2 ; SIMD128-VM-NOT: i64x2.trunc_sat_f64x2_s ; SIMD128-NEXT: .functype trunc_sat_s_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.trunc_sat_f64x2_s $push[[R:[0-9]+]]=, $0 -; SIMD128-NEXT: return $pop[[R]] define <2 x i64> @trunc_sat_s_v2i64(<2 x double> %x) { %a = fptosi <2 x double> %x to <2 x i64> ret <2 x i64> %a @@ -84,8 +78,6 @@ define <2 x i64> @trunc_sat_s_v2i64(<2 x double> %x) { ; NO-SIMD128-NOT: f64x2 ; SIMD128-VM-NOT: i64x2.trunc_sat_f64x2_u ; SIMD128-NEXT: .functype trunc_sat_u_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.trunc_sat_f64x2_u $push[[R:[0-9]+]]=, $0 -; SIMD128-NEXT: return $pop[[R]] define <2 x i64> @trunc_sat_u_v2i64(<2 x double> %x) { %a = fptoui <2 x double> %x to <2 x i64> ret <2 x i64> %a diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index 77e677df6459a..6e0fadc9d7081 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -456,28 +456,6 @@ define <2 x i64> @bitselect_v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %c) { ret <2 x i64> %a } -; CHECK-LABEL: trunc_sat_s_v2i64: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype trunc_sat_s_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.trunc_sat_f64x2_s $push[[R:[0-9]+]]=, $0 -; SIMD128-NEXT: return $pop[[R]] -declare <2 x i64> @llvm.wasm.trunc.saturate.signed.v2i64.v2f64(<2 x double>) -define <2 x i64> @trunc_sat_s_v2i64(<2 x double> %x) { - %a = call <2 x i64> @llvm.wasm.trunc.saturate.signed.v2i64.v2f64(<2 x double> %x) - ret <2 x i64> %a -} - -; CHECK-LABEL: trunc_sat_u_v2i64: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype trunc_sat_u_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.trunc_sat_f64x2_u $push[[R:[0-9]+]]=, $0 -; SIMD128-NEXT: return $pop[[R]] -declare <2 x i64> @llvm.wasm.trunc.saturate.unsigned.v2i64.v2f64(<2 x double>) -define <2 x i64> @trunc_sat_u_v2i64(<2 x double> %x) { - %a = call <2 x i64> @llvm.wasm.trunc.saturate.unsigned.v2i64.v2f64(<2 x double> %x) - ret <2 x i64> %a -} - ; ============================================================================== ; 4 x f32 ; ============================================================================== diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll index 64cb129a7eb92..f285e48210372 100644 --- a/llvm/test/CodeGen/X86/O3-pipeline.ll +++ b/llvm/test/CodeGen/X86/O3-pipeline.ll @@ -42,6 +42,7 @@ ; CHECK-NEXT: Remove unreachable blocks from the CFG ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Constant Hoisting @@ -67,6 +68,7 @@ ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Lazy Branch Probability Analysis ; CHECK-NEXT: Lazy Block Frequency Analysis diff --git a/llvm/test/CodeGen/X86/asm-modifier2.ll b/llvm/test/CodeGen/X86/asm-modifier2.ll new file mode 100644 index 0000000000000..69af99e2d5555 --- /dev/null +++ b/llvm/test/CodeGen/X86/asm-modifier2.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +define void @test1() { +; CHECK-LABEL: test1: +; CHECK: vmovaps %xmm0, %xmm0 +; CHECK: vmovaps %ymm0, %ymm0 +; CHECK: vmovaps %zmm0, %zmm0 + tail call void asm sideeffect "vmovaps ${0:x}, ${0:x}", "{xmm0},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect "vmovaps ${0:t}, ${0:t}", "{xmm0},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect "vmovaps ${0:g}, ${0:g}", "{xmm0},~{dirflag},~{fpsr},~{flags}"(i32 0) + ret void +} + +define void @test2() { +; CHECK-LABEL: test2: +; CHECK: vmovaps %xmm0, %xmm0 +; CHECK: vmovaps %ymm0, %ymm0 +; CHECK: vmovaps %zmm0, %zmm0 + tail call void asm sideeffect inteldialect "vmovaps ${0:x}, ${0:x}", "{xmm0},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect inteldialect "vmovaps ${0:t}, ${0:t}", "{xmm0},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect inteldialect "vmovaps ${0:g}, ${0:g}", "{xmm0},~{dirflag},~{fpsr},~{flags}"(i32 0) + ret void +} + +define void @test3() { +; CHECK-LABEL: test3: +; CHECK: movb %al, %al +; CHECK: movb %ah, %ah +; CHECK: movw %ax, %ax +; CHECK: movl %eax, %eax +; CHECK: movq %rax, %rax + tail call void asm sideeffect "mov ${0:b}, ${0:b}", "{eax},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect "mov ${0:h}, ${0:h}", "{eax},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect "mov ${0:w}, ${0:w}", "{eax},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect "mov ${0:k}, ${0:k}", "{eax},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect "mov ${0:q}, ${0:q}", "{eax},~{dirflag},~{fpsr},~{flags}"(i32 0) + ret void +} + +define void @test4() { +; CHECK-LABEL: test4: +; CHECK: movb %al, %al +; CHECK: movb %ah, %ah +; CHECK: movw %ax, %ax +; CHECK: movl %eax, %eax +; CHECK: movq %rax, %rax + tail call void asm sideeffect inteldialect "mov ${0:b}, ${0:b}", "{eax},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect inteldialect "mov ${0:h}, ${0:h}", "{eax},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect inteldialect "mov ${0:w}, ${0:w}", "{eax},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect inteldialect "mov ${0:k}, ${0:k}", "{eax},~{dirflag},~{fpsr},~{flags}"(i32 0) + tail call void asm sideeffect inteldialect "mov ${0:q}, ${0:q}", "{eax},~{dirflag},~{fpsr},~{flags}"(i32 0) + ret void +} diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll index c3b5983644b4a..c6c2230a1d77d 100644 --- a/llvm/test/CodeGen/X86/avg.ll +++ b/llvm/test/CodeGen/X86/avg.ll @@ -151,7 +151,7 @@ define void @avg_v24i8(<24 x i8>* %a, <24 x i8>* %b) nounwind { ; SSE2-NEXT: pand %xmm6, %xmm5 ; SSE2-NEXT: pand %xmm6, %xmm1 ; SSE2-NEXT: packuswb %xmm5, %xmm1 -; SSE2-NEXT: packuswb %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 ; SSE2-NEXT: movq %xmm1, (%rax) ; SSE2-NEXT: movdqu %xmm0, (%rax) ; SSE2-NEXT: retq @@ -2588,206 +2588,166 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind ; AVX2-NEXT: pushq %r13 ; AVX2-NEXT: pushq %r12 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: subq $16, %rsp -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero -; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX2-NEXT: vpextrq $1, %xmm4, %rbx -; AVX2-NEXT: vmovq %xmm4, %rbp -; AVX2-NEXT: vpextrq $1, %xmm3, %rdi -; AVX2-NEXT: vmovq %xmm3, %rcx -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpextrq $1, %xmm3, %rdx -; AVX2-NEXT: vmovq %xmm3, %r9 -; AVX2-NEXT: vpextrq $1, %xmm2, %r13 -; AVX2-NEXT: vmovq %xmm2, %r12 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpextrq $1, %xmm3, %r14 -; AVX2-NEXT: vmovq %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT: vpextrq $1, %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero -; AVX2-NEXT: vpextrq $1, %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT: vmovq %xmm1, %r10 +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm9 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5 +; AVX2-NEXT: vmovq %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero -; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX2-NEXT: vpextrq $1, %xmm4, %rax -; AVX2-NEXT: addq %rbx, %rax -; AVX2-NEXT: movq %rax, %rbx -; AVX2-NEXT: vmovq %xmm4, %rsi -; AVX2-NEXT: addq %rbp, %rsi -; AVX2-NEXT: vpextrq $1, %xmm3, %rax -; AVX2-NEXT: addq %rdi, %rax -; AVX2-NEXT: movq %rax, %rdi -; AVX2-NEXT: vmovq %xmm3, %r11 -; AVX2-NEXT: addq %rcx, %r11 +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm6 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero +; AVX2-NEXT: vmovq %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: vextracti128 $1, %ymm6, %xmm7 +; AVX2-NEXT: vmovq %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpextrq $1, %xmm3, %rcx -; AVX2-NEXT: addq %rdx, %rcx -; AVX2-NEXT: vmovq %xmm3, %r8 -; AVX2-NEXT: addq %r9, %r8 -; AVX2-NEXT: vpextrq $1, %xmm2, %r9 -; AVX2-NEXT: addq %r13, %r9 -; AVX2-NEXT: vmovq %xmm2, %r15 -; AVX2-NEXT: addq %r12, %r15 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vmovq %xmm2, %r11 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm0 +; AVX2-NEXT: vmovq %xmm0, %r14 +; AVX2-NEXT: vpextrq $1, %xmm0, %rbx +; AVX2-NEXT: vpextrq $1, %xmm2, %rsi +; AVX2-NEXT: vpextrq $1, %xmm7, %r12 +; AVX2-NEXT: vpextrq $1, %xmm6, %r15 +; AVX2-NEXT: vpextrq $1, %xmm5, %rdx +; AVX2-NEXT: vpextrq $1, %xmm1, %rcx ; AVX2-NEXT: vpextrq $1, %xmm3, %rax -; AVX2-NEXT: addq %r14, %rax -; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: vmovq %xmm3, %rax -; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload -; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: vpextrq $1, %xmm2, %rax -; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload -; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: vmovq %xmm2, %rax -; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload -; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX2-NEXT: vpextrq $1, %xmm0, %rbp -; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; AVX2-NEXT: vmovq %xmm0, %r12 -; AVX2-NEXT: addq %r10, %r12 -; AVX2-NEXT: vpextrq $1, %xmm1, %rax +; AVX2-NEXT: vmovq %xmm3, %rbp +; AVX2-NEXT: vpextrq $1, %xmm9, %r9 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: vextracti128 $1, %ymm6, %xmm2 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpextrq $1, %xmm0, %r10 -; AVX2-NEXT: addq %rax, %r10 -; AVX2-NEXT: vmovq %xmm1, %rax -; AVX2-NEXT: vmovq %xmm0, %rdx -; AVX2-NEXT: addq %rax, %rdx -; AVX2-NEXT: addq $-1, %rbx -; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movl $0, %eax -; AVX2-NEXT: adcq $-1, %rax -; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: addq $-1, %rsi -; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movl $0, %eax -; AVX2-NEXT: adcq $-1, %rax -; AVX2-NEXT: movq %rax, (%rsp) # 8-byte Spill -; AVX2-NEXT: addq $-1, %rdi -; AVX2-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movl $0, %eax -; AVX2-NEXT: adcq $-1, %rax +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm5 +; AVX2-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: vextracti128 $1, %ymm4, %xmm7 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm8 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: vextracti128 $1, %ymm8, %xmm0 +; AVX2-NEXT: vpextrq $1, %xmm0, %rdi +; AVX2-NEXT: addq %rbx, %rdi +; AVX2-NEXT: movq %rdi, %rbx +; AVX2-NEXT: vpextrq $1, %xmm8, %r10 +; AVX2-NEXT: addq %rsi, %r10 +; AVX2-NEXT: vpextrq $1, %xmm7, %rsi +; AVX2-NEXT: addq %r12, %rsi +; AVX2-NEXT: movq %rsi, %r12 +; AVX2-NEXT: vpextrq $1, %xmm4, %r13 +; AVX2-NEXT: addq %r15, %r13 +; AVX2-NEXT: vpextrq $1, %xmm5, %r15 +; AVX2-NEXT: addq %rdx, %r15 +; AVX2-NEXT: vpextrq $1, %xmm3, %r8 +; AVX2-NEXT: addq %rcx, %r8 +; AVX2-NEXT: vpextrq $1, %xmm6, %rsi +; AVX2-NEXT: addq %rax, %rsi +; AVX2-NEXT: vmovq %xmm6, %rdx +; AVX2-NEXT: addq %rbp, %rdx +; AVX2-NEXT: vpextrq $1, %xmm2, %rcx +; AVX2-NEXT: addq %r9, %rcx +; AVX2-NEXT: vmovq %xmm0, %rdi +; AVX2-NEXT: leaq -1(%r14,%rdi), %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: addq $-1, %r11 -; AVX2-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movl $0, %eax -; AVX2-NEXT: adcq $-1, %rax +; AVX2-NEXT: vmovq %xmm8, %rdi +; AVX2-NEXT: leaq -1(%r11,%rdi), %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: addq $-1, %rcx -; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movl $0, %eax -; AVX2-NEXT: adcq $-1, %rax +; AVX2-NEXT: vmovq %xmm7, %rdi +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: leaq -1(%rax,%rdi), %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: addq $-1, %r8 -; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movl $0, %eax -; AVX2-NEXT: adcq $-1, %rax +; AVX2-NEXT: vmovq %xmm4, %rdi +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: leaq -1(%rax,%rdi), %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: addq $-1, %r9 -; AVX2-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movl $0, %eax -; AVX2-NEXT: adcq $-1, %rax -; AVX2-NEXT: movq %rax, %rsi -; AVX2-NEXT: addq $-1, %r15 -; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movl $0, %r15d -; AVX2-NEXT: adcq $-1, %r15 -; AVX2-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT: movl $0, %r13d -; AVX2-NEXT: adcq $-1, %r13 -; AVX2-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT: movl $0, %r14d -; AVX2-NEXT: adcq $-1, %r14 -; AVX2-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX2-NEXT: movl $0, %ebx -; AVX2-NEXT: adcq $-1, %rbx +; AVX2-NEXT: vmovq %xmm5, %rdi ; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT: addq $-1, %rax -; AVX2-NEXT: movl $0, %r11d -; AVX2-NEXT: adcq $-1, %r11 -; AVX2-NEXT: addq $-1, %rbp +; AVX2-NEXT: leaq -1(%rax,%rdi), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: vmovq %xmm1, %rdi +; AVX2-NEXT: vmovq %xmm3, %rbp +; AVX2-NEXT: leaq -1(%rdi,%rbp), %rax +; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: vmovq %xmm9, %rdi +; AVX2-NEXT: vmovq %xmm2, %rbp +; AVX2-NEXT: leaq -1(%rdi,%rbp), %rdi +; AVX2-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: addq $-1, %rbx +; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: movl $0, %r9d ; AVX2-NEXT: adcq $-1, %r9 -; AVX2-NEXT: addq $-1, %r12 -; AVX2-NEXT: movl $0, %r8d -; AVX2-NEXT: adcq $-1, %r8 ; AVX2-NEXT: addq $-1, %r10 +; AVX2-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: movl $0, %edi ; AVX2-NEXT: adcq $-1, %rdi +; AVX2-NEXT: addq $-1, %r12 +; AVX2-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX2-NEXT: movl $0, %r11d +; AVX2-NEXT: adcq $-1, %r11 +; AVX2-NEXT: addq $-1, %r13 +; AVX2-NEXT: movl $0, %r10d +; AVX2-NEXT: adcq $-1, %r10 +; AVX2-NEXT: addq $-1, %r15 +; AVX2-NEXT: movl $0, %r14d +; AVX2-NEXT: adcq $-1, %r14 +; AVX2-NEXT: addq $-1, %r8 +; AVX2-NEXT: movl $0, %ebp +; AVX2-NEXT: adcq $-1, %rbp +; AVX2-NEXT: addq $-1, %rsi +; AVX2-NEXT: movl $0, %r12d +; AVX2-NEXT: adcq $-1, %r12 ; AVX2-NEXT: addq $-1, %rdx -; AVX2-NEXT: movl $0, %ecx -; AVX2-NEXT: adcq $-1, %rcx -; AVX2-NEXT: shldq $63, %rdx, %rcx -; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: shldq $63, %r10, %rdi -; AVX2-NEXT: shldq $63, %r12, %r8 -; AVX2-NEXT: shldq $63, %rbp, %r9 -; AVX2-NEXT: shldq $63, %rax, %r11 -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; AVX2-NEXT: movl $0, %ebx +; AVX2-NEXT: adcq $-1, %rbx +; AVX2-NEXT: addq $-1, %rcx +; AVX2-NEXT: movl $0, %eax +; AVX2-NEXT: adcq $-1, %rax +; AVX2-NEXT: shldq $63, %rcx, %rax ; AVX2-NEXT: shldq $63, %rdx, %rbx -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; AVX2-NEXT: shldq $63, %rdx, %r14 -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; AVX2-NEXT: shldq $63, %rdx, %r13 -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT: shldq $63, %rax, %r15 -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT: shldq $63, %rax, %rsi -; AVX2-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT: shldq $63, %rax, %rsi -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT: shldq $63, %rax, %r12 +; AVX2-NEXT: shldq $63, %rsi, %r12 +; AVX2-NEXT: shldq $63, %r8, %rbp +; AVX2-NEXT: shldq $63, %r15, %r14 +; AVX2-NEXT: shldq $63, %r13, %r10 ; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT: shldq $63, %rax, %rcx -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX2-NEXT: shldq $63, %rax, %r10 -; AVX2-NEXT: movq (%rsp), %rax # 8-byte Reload -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; AVX2-NEXT: shldq $63, %rdx, %rax -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload -; AVX2-NEXT: shldq $63, %rdx, %rbp -; AVX2-NEXT: vmovq %rbp, %xmm8 -; AVX2-NEXT: vmovq %rax, %xmm9 -; AVX2-NEXT: vmovq %r10, %xmm0 +; AVX2-NEXT: shldq $63, %rcx, %r11 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: shldq $63, %rcx, %rdi +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: shldq $63, %rcx, %r9 +; AVX2-NEXT: vmovq %r9, %xmm8 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: shrq %rcx +; AVX2-NEXT: vmovq %rcx, %xmm9 +; AVX2-NEXT: vmovq %rdi, %xmm0 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: vmovq %rcx, %xmm1 -; AVX2-NEXT: vmovq %r12, %xmm12 -; AVX2-NEXT: vmovq %rsi, %xmm13 -; AVX2-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 8-byte Folded Reload -; AVX2-NEXT: # xmm14 = mem[0],zero -; AVX2-NEXT: vmovq %r15, %xmm15 -; AVX2-NEXT: vmovq %r13, %xmm10 -; AVX2-NEXT: vmovq %r14, %xmm11 -; AVX2-NEXT: vmovq %rbx, %xmm2 -; AVX2-NEXT: vmovq %r11, %xmm3 -; AVX2-NEXT: vmovq %r9, %xmm4 -; AVX2-NEXT: vmovq %r8, %xmm5 -; AVX2-NEXT: vmovq %rdi, %xmm6 -; AVX2-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 8-byte Folded Reload -; AVX2-NEXT: # xmm7 = mem[0],zero +; AVX2-NEXT: vmovq %r11, %xmm12 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: shrq %rcx +; AVX2-NEXT: vmovq %rcx, %xmm13 +; AVX2-NEXT: vmovq %r10, %xmm14 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: shrq %rcx +; AVX2-NEXT: vmovq %rcx, %xmm15 +; AVX2-NEXT: vmovq %r14, %xmm10 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: shrq %rcx +; AVX2-NEXT: vmovq %rcx, %xmm11 +; AVX2-NEXT: vmovq %rbp, %xmm2 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; AVX2-NEXT: shrq %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 +; AVX2-NEXT: vmovq %r12, %xmm4 +; AVX2-NEXT: vmovq %rbx, %xmm5 +; AVX2-NEXT: vmovq %rax, %xmm6 +; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX2-NEXT: shrq %rax +; AVX2-NEXT: vmovq %rax, %xmm7 ; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm8 = xmm9[0],xmm8[0],xmm9[1],xmm8[1],xmm9[2],xmm8[2],xmm9[3],xmm8[3],xmm9[4],xmm8[4],xmm9[5],xmm8[5],xmm9[6],xmm8[6],xmm9[7],xmm8[7] ; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm9 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX2-NEXT: vpslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1] @@ -2800,8 +2760,8 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5],xmm1[6,7] ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm8[3] ; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7] -; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1 +; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] ; AVX2-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3],xmm2[4,5,6,7] ; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] @@ -2811,7 +2771,6 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2,3] ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX2-NEXT: vmovdqu %xmm0, (%rax) -; AVX2-NEXT: addq $16, %rsp ; AVX2-NEXT: popq %rbx ; AVX2-NEXT: popq %r12 ; AVX2-NEXT: popq %r13 diff --git a/llvm/test/CodeGen/X86/avx-cvt-3.ll b/llvm/test/CodeGen/X86/avx-cvt-3.ll index 03592c8af9dbb..87eabd9cb5521 100644 --- a/llvm/test/CodeGen/X86/avx-cvt-3.ll +++ b/llvm/test/CodeGen/X86/avx-cvt-3.ll @@ -93,31 +93,31 @@ define <8 x float> @sitofp_shuffle_allbits_v8i32(<8 x i32> %a0) { define <8 x float> @sitofp_insert_constants_v8i32(<8 x i32> %a0) { ; X86-LABEL: sitofp_insert_constants_v8i32: ; X86: # %bb.0: -; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] -; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 -; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7] -; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5,6,7] +; X86-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] +; X86-NEXT: vextractf128 $1, %ymm0, %xmm0 ; X86-NEXT: movl $2, %eax -; X86-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 +; X86-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0 ; X86-NEXT: movl $-3, %eax -; X86-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X86-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: sitofp_insert_constants_v8i32: ; X64: # %bb.0: -; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] -; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 -; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7] -; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5,6,7] +; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] +; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 ; X64-NEXT: movl $2, %eax -; X64-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 +; X64-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0 ; X64-NEXT: movl $-3, %eax -; X64-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 ; X64-NEXT: retq %1 = insertelement <8 x i32> %a0, i32 0, i32 0 diff --git a/llvm/test/CodeGen/X86/avx-shift.ll b/llvm/test/CodeGen/X86/avx-shift.ll index ee6ca2224ea48..443204ccf0996 100644 --- a/llvm/test/CodeGen/X86/avx-shift.ll +++ b/llvm/test/CodeGen/X86/avx-shift.ll @@ -181,6 +181,27 @@ define <8 x i32> @vshift08(<8 x i32> %a) { ret <8 x i32> %bitop } +define <8 x i32> @vshift08_add(<8 x i32> %a, <8 x i32> %y) { +; CHECK-LABEL: vshift08_add: +; CHECK: # %bb.0: +; CHECK-NEXT: vpslld $23, %xmm0, %xmm2 +; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] +; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2 +; CHECK-NEXT: vcvttps2dq %xmm2, %xmm2 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vpslld $23, %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 +; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 +; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm3 +; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm1, %xmm2, %xmm1 +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %bitop = shl <8 x i32> , %a + %r = add <8 x i32> %bitop, %y + ret <8 x i32> %r +} + ; PR15141 define <4 x i32> @vshift13(<4 x i32> %in) { ; CHECK-LABEL: vshift13: diff --git a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll index b30a81932d89b..7b7ddf72123bd 100644 --- a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll +++ b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll @@ -611,11 +611,12 @@ define void @load_v2i1_broadcast_1_v1i1_store(<2 x i1>* %a0,<1 x i1>* %a1) { define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) { ; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store: ; AVX512: # %bb.0: -; AVX512-NEXT: movzbl (%rdi), %eax +; AVX512-NEXT: movb (%rdi), %al +; AVX512-NEXT: shrb %al ; AVX512-NEXT: xorl %ecx, %ecx -; AVX512-NEXT: btl $1, %eax +; AVX512-NEXT: testb $1, %al ; AVX512-NEXT: movl $255, %eax -; AVX512-NEXT: cmovael %ecx, %eax +; AVX512-NEXT: cmovel %ecx, %eax ; AVX512-NEXT: kmovd %eax, %k0 ; AVX512-NEXT: kshiftrb $1, %k0, %k0 ; AVX512-NEXT: kmovb %k0, (%rsi) @@ -623,11 +624,12 @@ define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) { ; ; AVX512NOTDQ-LABEL: load_v3i1_broadcast_1_v1i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax +; AVX512NOTDQ-NEXT: movb (%rdi), %al +; AVX512NOTDQ-NEXT: shrb %al ; AVX512NOTDQ-NEXT: xorl %ecx, %ecx -; AVX512NOTDQ-NEXT: btl $1, %eax +; AVX512NOTDQ-NEXT: testb $1, %al ; AVX512NOTDQ-NEXT: movl $255, %eax -; AVX512NOTDQ-NEXT: cmovael %ecx, %eax +; AVX512NOTDQ-NEXT: cmovel %ecx, %eax ; AVX512NOTDQ-NEXT: kmovd %eax, %k0 ; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0 ; AVX512NOTDQ-NEXT: kmovd %k0, %eax @@ -641,24 +643,22 @@ define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) { define void @load_v3i1_broadcast_2_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) { ; AVX512-LABEL: load_v3i1_broadcast_2_v1i1_store: ; AVX512: # %bb.0: -; AVX512-NEXT: movzbl (%rdi), %eax -; AVX512-NEXT: xorl %ecx, %ecx -; AVX512-NEXT: btl $2, %eax -; AVX512-NEXT: movl $255, %eax -; AVX512-NEXT: cmovael %ecx, %eax -; AVX512-NEXT: kmovd %eax, %k0 +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: testb $4, (%rdi) +; AVX512-NEXT: movl $255, %ecx +; AVX512-NEXT: cmovel %eax, %ecx +; AVX512-NEXT: kmovd %ecx, %k0 ; AVX512-NEXT: kshiftrb $2, %k0, %k0 ; AVX512-NEXT: kmovb %k0, (%rsi) ; AVX512-NEXT: retq ; ; AVX512NOTDQ-LABEL: load_v3i1_broadcast_2_v1i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax -; AVX512NOTDQ-NEXT: xorl %ecx, %ecx -; AVX512NOTDQ-NEXT: btl $2, %eax -; AVX512NOTDQ-NEXT: movl $255, %eax -; AVX512NOTDQ-NEXT: cmovael %ecx, %eax -; AVX512NOTDQ-NEXT: kmovd %eax, %k0 +; AVX512NOTDQ-NEXT: xorl %eax, %eax +; AVX512NOTDQ-NEXT: testb $4, (%rdi) +; AVX512NOTDQ-NEXT: movl $255, %ecx +; AVX512NOTDQ-NEXT: cmovel %eax, %ecx +; AVX512NOTDQ-NEXT: kmovd %ecx, %k0 ; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0 ; AVX512NOTDQ-NEXT: kmovd %k0, %eax ; AVX512NOTDQ-NEXT: movb %al, (%rsi) diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index 7a99fcc3be33f..d2062f21762c9 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -553,7 +553,6 @@ define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) { ; CHECK-LABEL: insert_v4i64: ; CHECK: ## %bb.0: ; CHECK-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 -; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 ; CHECK-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 @@ -595,7 +594,6 @@ define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) { ; CHECK-LABEL: insert_v8i32: ; CHECK: ## %bb.0: ; CHECK-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 -; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 ; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll index c782d9b5bcf07..719bd9f9d95f0 100644 --- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -1575,11 +1575,9 @@ define <2 x i32> @narrow_cmp_select_reverse(<2 x i64> %x, <2 x i32> %y) nounwind ; AVX512: ## %bb.0: ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2] ; AVX512-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x29,0xc2] -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x25,0xc0,0x0f] ; AVX512-NEXT: vpshufd $232, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x70,0xc0,0xe8] ; AVX512-NEXT: ## xmm0 = xmm0[0,2,2,3] -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0xc1] -; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdf,0xc1] ; AVX512-NEXT: retq ## encoding: [0xc3] ; ; SKX-LABEL: narrow_cmp_select_reverse: diff --git a/llvm/test/CodeGen/X86/basicblock-sections-clusters-branches.ll b/llvm/test/CodeGen/X86/basicblock-sections-clusters-branches.ll index c85f01b919846..a6f297392d94d 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-clusters-branches.ll +++ b/llvm/test/CodeGen/X86/basicblock-sections-clusters-branches.ll @@ -17,7 +17,7 @@ ; RUN: echo '!!1 3' >> %t2 ; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t2 | FileCheck %s -check-prefix=LINUX-SECTIONS2 -define void @foo(i1 zeroext) { +define void @foo(i1 zeroext) nounwind { %2 = alloca i8, align 1 %3 = zext i1 %0 to i8 store i8 %3, i8* %2, align 1 @@ -43,23 +43,23 @@ declare i32 @baz() #1 ; LINUX-SECTIONS1: .section .text.foo,"ax",@progbits ; LINUX-SECTIONS1-LABEL: foo: -; LINUX-SECTIONS1: jne a.BB.foo +; LINUX-SECTIONS1: jne foo.1 ; LINUX-SECTIONS1-NOT: {{jne|je|jmp}} ; LINUX-SECTIONS1-LABEL: # %bb.2: -; LINUX-SECTIONS1: jmp raa.BB.foo +; LINUX-SECTIONS1: jmp foo.cold ; LINUX-SECTIONS1: .section .text.foo,"ax",@progbits,unique,1 -; LINUX-SECTIONS1-LABEL: a.BB.foo: -; LINUX-SECTIONS1: jmp raa.BB.foo +; LINUX-SECTIONS1-LABEL: foo.1: +; LINUX-SECTIONS1: jmp foo.cold ; LINUX-SECTIONS1: .section .text.unlikely.foo,"ax",@progbits -; LINUX-SECTIONS1-LABEL: raa.BB.foo: +; LINUX-SECTIONS1-LABEL: foo.cold: ; LINUX-SECTIONS2: .section .text.foo,"ax",@progbits ; LINUX-SECTIONS2-LABEL: foo: -; LINUX-SECTIONS2: jne a.BB.foo +; LINUX-SECTIONS2: jne foo.0 ; LINUX-SECTIONS2-NOT: {{jne|je|jmp}} ; LINUX-SECTIONS2-LABEL: # %bb.2: ; LINUX-SECTIONS2: jmp .LBB0_3 ; LINUX-SECTIONS2: .section .text.foo,"ax",@progbits,unique,1 -; LINUX-SECTIONS2: a.BB.foo: +; LINUX-SECTIONS2: foo.0: ; LINUX-SECTIONS2-NOT: {{jne|je|jmp}} ; LINUX-SECTIONS2: .LBB0_3: diff --git a/llvm/test/CodeGen/X86/basicblock-sections-clusters-eh.ll b/llvm/test/CodeGen/X86/basicblock-sections-clusters-eh.ll index cadbdd355db0e..60f62d4c152ff 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-clusters-eh.ll +++ b/llvm/test/CodeGen/X86/basicblock-sections-clusters-eh.ll @@ -13,7 +13,6 @@ ; RUN: echo '!main' > %t2 ; RUN: echo '!!1 2 3' >> %t2 ; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t2 | FileCheck %s -check-prefix=LINUX-SECTIONS2 -; @_ZTIi = external constant i8* @@ -61,9 +60,9 @@ declare i32 @__gxx_personality_v0(...) ; LINUX-SECTIONS1-NOT: .section ; LINUX-SECTIONS1-LABEL: .LBB0_6: ; LINUX-SECTIONS1: .section .text.main,"ax",@progbits,unique,1 -; LINUX-SECTIONS1-LABEL: al.BB.main: +; LINUX-SECTIONS1-LABEL: main.0: ; LINUX-SECTIONS1: .section .text.eh.main,"ax",@progbits -; LINUX-SECTIONS1-LABEL: l.BB.main: +; LINUX-SECTIONS1-LABEL: main.eh: ; LINUX-SECTIONS1-NOT: .section ; LINUX-SECTIONS1-LABEL: .LBB0_3: ; LINUX-SECTIONS1-NOT: .section @@ -80,7 +79,7 @@ declare i32 @__gxx_personality_v0(...) ; LINUX-SECTIONS2-NOT: .section ; LINUX-SECTIONS2-LABEL: .LBB0_6: ; LINUX-SECTIONS2: .section .text.main,"ax",@progbits,unique,1 -; LINUX-SECTIONS2-LABEL: l.BB.main: +; LINUX-SECTIONS2-LABEL: main.0: ; LINUX-SECTIONS2-NOT: .section ; LINUX-SECTIONS2-LABEL: .LBB0_2: ; LINUX-SECTIONS2-NOT: .section diff --git a/llvm/test/CodeGen/X86/basicblock-sections-clusters.ll b/llvm/test/CodeGen/X86/basicblock-sections-clusters.ll index 39d0b3898e429..2c40542f6808f 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-clusters.ll +++ b/llvm/test/CodeGen/X86/basicblock-sections-clusters.ll @@ -14,7 +14,7 @@ ; RUN: echo '!!1 3' >> %t2 ; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t2 | FileCheck %s -check-prefix=LINUX-SECTIONS2 -define void @foo(i1 zeroext) { +define void @foo(i1 zeroext) nounwind { %2 = alloca i8, align 1 %3 = zext i1 %0 to i8 store i8 %3, i8* %2, align 1 @@ -44,12 +44,12 @@ declare i32 @baz() #1 ; LINUX-SECTIONS1-NOT: .section ; LINUX-SECTIONS1-LABEL: # %bb.2: ; LINUX-SECTIONS1: .section .text.foo,"ax",@progbits,unique,1 -; LINUX-SECTIONS1-LABEL: a.BB.foo: +; LINUX-SECTIONS1-LABEL: foo.1: ; LINUX-SECTIONS1-LABEL: .Ltmp0: -; LINUX-SECTIONS1-NEXT: .size a.BB.foo, .Ltmp0-a.BB.foo +; LINUX-SECTIONS1-NEXT: .size foo.1, .Ltmp0-foo.1 ; LINUX-SECTIONS1-NOT: .section ; LINUX-SECTIONS1: .section .text.unlikely.foo,"ax",@progbits -; LINUX-SECTIONS1-LABEL: raa.BB.foo: +; LINUX-SECTIONS1-LABEL: foo.cold: ; LINUX-SECTIONS1: .section .text.foo,"ax",@progbits ; LINUX-SECTIONS1-LABEL: .Lfunc_end0: ; LINUX-SECTIONS1-NEXT: .size foo, .Lfunc_end0-foo @@ -60,12 +60,11 @@ declare i32 @baz() #1 ; LINUX-SECTIONS2-NOT: .section ; LINUX-SECTIONS2-LABEL: # %bb.2: ; LINUX-SECTIONS2: .section .text.foo,"ax",@progbits,unique,1 -; LINUX-SECTIONS2-NEXT: a.BB.foo: +; LINUX-SECTIONS2-NEXT: foo.0: ; LINUX-SECTIONS2-NOT: .section ; LINUX-SECTIONS2-LABEL: .LBB0_3: ; LINUX-SECTIONS2-LABEL: .Ltmp0: -; LINUX-SECTIONS2-NEXT: .size a.BB.foo, .Ltmp0-a.BB.foo +; LINUX-SECTIONS2-NEXT: .size foo.0, .Ltmp0-foo.0 ; LINUX-SECTIONS2: .section .text.foo,"ax",@progbits ; LINUX-SECTIONS2-LABEL: .Lfunc_end0: ; LINUX-SECTIONS2-NEXT: .size foo, .Lfunc_end0-foo - diff --git a/llvm/test/CodeGen/X86/basicblock-sections-cold.ll b/llvm/test/CodeGen/X86/basicblock-sections-cold.ll index b2a36e8d2ffc2..c7282a1e57368 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-cold.ll +++ b/llvm/test/CodeGen/X86/basicblock-sections-cold.ll @@ -2,10 +2,9 @@ ; Basic block with id 1 and 2 must be in the cold section. ; RUN: echo '!_Z3bazb' > %t ; RUN: echo '!!0' >> %t -; RUN: cat %t ; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS -define void @_Z3bazb(i1 zeroext) { +define void @_Z3bazb(i1 zeroext) nounwind { %2 = alloca i8, align 1 %3 = zext i1 %0 to i8 store i8 %3, i8* %2, align 1 @@ -32,10 +31,10 @@ declare i32 @_Z3foov() #1 ; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb: ; Check that the basic block with id 1 doesn't get a section. -; LINUX-SECTIONS-NOT: .section .text._Z3bazb.r.BB._Z3bazb,"ax",@progbits,unique +; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.1,"ax",@progbits,unique ; Check that a single cold section is started here and id 1 and 2 blocks are placed here. ; LINUX-SECTIONS: .section .text.unlikely._Z3bazb,"ax",@progbits -; LINUX-SECTIONS: r.BB._Z3bazb: -; LINUX-SECTIONS-NOT: .section .text._Z3bazb.rr.BB._Z3bazb,"ax",@progbits,unique +; LINUX-SECTIONS: _Z3bazb.cold: +; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.2,"ax",@progbits,unique ; LINUX-SECTIONS: .LBB0_2: ; LINUX-SECTIONS: .size _Z3bazb, .Lfunc_end{{[0-9]}}-_Z3bazb diff --git a/llvm/test/CodeGen/X86/basicblock-sections-directjumps.ll b/llvm/test/CodeGen/X86/basicblock-sections-directjumps.ll index c961f8d327380..8604b129b54cf 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-directjumps.ll +++ b/llvm/test/CodeGen/X86/basicblock-sections-directjumps.ll @@ -29,10 +29,10 @@ declare i32 @_Z3foov() #1 ; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb: -; LINUX-SECTIONS: jmp a.BB._Z3bazb -; LINUX-SECTIONS: .section .text._Z3bazb.a.BB._Z3bazb,"ax",@progbits -; LINUX-SECTIONS: a.BB._Z3bazb: -; LINUX-SECTIONS: jmp aa.BB._Z3bazb -; LINUX-SECTIONS: .section .text._Z3bazb.aa.BB._Z3bazb,"ax",@progbits -; LINUX-SECTIONS: aa.BB._Z3bazb: -; LINUX-SECTIONS: jmp raa.BB._Z3bazb +; LINUX-SECTIONS: jmp _Z3bazb.1 +; LINUX-SECTIONS: .section .text._Z3bazb._Z3bazb.1,"ax",@progbits +; LINUX-SECTIONS: _Z3bazb.1: +; LINUX-SECTIONS: jmp _Z3bazb.2 +; LINUX-SECTIONS: .section .text._Z3bazb._Z3bazb.2,"ax",@progbits +; LINUX-SECTIONS: _Z3bazb.2: +; LINUX-SECTIONS: jmp _Z3bazb.3 diff --git a/llvm/test/CodeGen/X86/basicblock-sections-eh.ll b/llvm/test/CodeGen/X86/basicblock-sections-eh.ll index 2d1fd88cbd4d2..7e5f4a2fe3927 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-eh.ll +++ b/llvm/test/CodeGen/X86/basicblock-sections-eh.ll @@ -80,5 +80,6 @@ declare void @__cxa_end_catch() ;LINUX-SECTIONS: .section .text._Z3foob,"ax",@progbits ;LINUX-SECTIONS: _Z3foob: -;LINUX-SECTIONS: .section .text._Z3foob.laara.BB._Z3foob,"ax",@progbits -;LINUX-SECTIONS: l{{[a|r]*}}.BB._Z3foob: +;LINUX-SECTIONS: .section .text._Z3foob._Z3foob.{{[0-9]+}},"ax",@progbits +;LINUX-SECTIONS-LABEL: _Z3foob.{{[0-9]+}}: +;LINUX-SECTIONS: calll __cxa_begin_catch diff --git a/llvm/test/CodeGen/X86/basicblock-sections-list.ll b/llvm/test/CodeGen/X86/basicblock-sections-list.ll index 55524ce6b454b..9a5056af39ed0 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-list.ll +++ b/llvm/test/CodeGen/X86/basicblock-sections-list.ll @@ -2,7 +2,7 @@ ; RUN: echo '!_Z3foob' > %t ; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS -define i32 @_Z3foob(i1 zeroext %0) #0 { +define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 %3 = alloca i8, align 1 %4 = zext i1 %0 to i8 @@ -31,7 +31,7 @@ define i32 @_Z3foob(i1 zeroext %0) #0 { declare i32 @_Z3barv() #1 declare i32 @_Z3bazv() #1 -define i32 @_Z3zipb(i1 zeroext %0) #0 { +define i32 @_Z3zipb(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 %3 = alloca i8, align 1 %4 = zext i1 %0 to i8 @@ -59,18 +59,14 @@ define i32 @_Z3zipb(i1 zeroext %0) #0 { ; LINUX-SECTIONS: .section .text._Z3foob,"ax",@progbits ; LINUX-SECTIONS: _Z3foob: -; LINUX-SECTIONS: .section .text._Z3foob.a.BB._Z3foob,"ax",@progbits -; LINUX-SECTIONS: a.BB._Z3foob: -; LINUX-SECTIONS: .section .text._Z3foob.aa.BB._Z3foob,"ax",@progbits -; LINUX-SECTIONS: aa.BB._Z3foob: -; LINUX-SECTIONS: .section .text._Z3foob.raa.BB._Z3foob,"ax",@progbits -; LINUX-SECTIONS: raa.BB._Z3foob: +; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.1,"ax",@progbits +; LINUX-SECTIONS: _Z3foob.1: +; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.2,"ax",@progbits +; LINUX-SECTIONS: _Z3foob.2: +; LINUX-SECTIONS: .section .text._Z3foob._Z3foob.3,"ax",@progbits +; LINUX-SECTIONS: _Z3foob.3: ; LINUX-SECTIONS: .section .text._Z3zipb,"ax",@progbits ; LINUX-SECTIONS: _Z3zipb: -; LINUX-SECTIONS-NOT: .section .text._Z3zipb.a.BB._Z3zipb,"ax",@progbits -; LINUX-SECTIONS-NOT: a.BB._Z3zipb: -; LINUX-SECTIONS-NOT: .section .text._Z3zipb.aa.BB._Z3zipb,"ax",@progbits -; LINUX-SECTIONS-NOT: aa.BB._Z3zipb: -; LINUX-SECTIONS-NOT: .section .text._Z3zipb.raa.BB._Z3zipb,"ax",@progbits -; LINUX-SECTIONS-NOT: raa.BB._Z3zipb: +; LINUX-SECTIONS-NOT: .section .text._Z3zipb._Z3zipb.{{[0-9]+}},"ax",@progbits +; LINUX-SECTIONS-NOT: _Z3zipb.{{[0-9]+}}: diff --git a/llvm/test/CodeGen/X86/basicblock-sections-listbb.ll b/llvm/test/CodeGen/X86/basicblock-sections-listbb.ll index a93ca9fdc2ffb..ac17a461d7af9 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-listbb.ll +++ b/llvm/test/CodeGen/X86/basicblock-sections-listbb.ll @@ -4,7 +4,7 @@ ; RUN: echo '!!2' >> %t ; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS -define void @_Z3bazb(i1 zeroext) { +define void @_Z3bazb(i1 zeroext) nounwind { %2 = alloca i8, align 1 %3 = zext i1 %0 to i8 store i8 %3, i8* %2, align 1 @@ -28,12 +28,16 @@ declare i32 @_Z3barv() #1 declare i32 @_Z3foov() #1 +; Check that the correct block is found using the call insts for foo and bar. +; ; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb: ; Check that the basic block with id 1 doesn't get a section. -; LINUX-SECTIONS-NOT: .section .text._Z3bazb.r.BB._Z3bazb,"ax",@progbits -; LINUX-SECTIONS: # %bb.1: -; LINUX-SECTIONS: .section .text._Z3bazb.rr.BB._Z3bazb,"ax",@progbits -; LINUX-SECTIONS: rr.BB._Z3bazb: +; LINUX-SECTIONS-NOT: .section .text._Z3bazb._Z3bazb.{{[0-9]+}},"ax",@progbits +; LINUX-SECTIONS-LABEL: # %bb.1: +; LINUX-SECTIONS-NEXT: callq _Z3barv +; LINUX-SECTIONS: .section .text._Z3bazb._Z3bazb.{{[0-9]+}},"ax",@progbits +; LINUX-SECTIONS-LABEL: _Z3bazb.{{[0-9]+}}: +; LINUX-SECTIONS-NEXT: callq _Z3foov ; LINUX-SECTIONS: .Ltmp0: -; LINUX-SECTIONS-NEXT: .size rr.BB._Z3bazb, .Ltmp0-rr.BB._Z3bazb +; LINUX-SECTIONS-NEXT: .size _Z3bazb.{{[0-9]+}}, .Ltmp0-_Z3bazb.{{[0-9]+}} diff --git a/llvm/test/CodeGen/X86/basicblock-sections-mir-parse.mir b/llvm/test/CodeGen/X86/basicblock-sections-mir-parse.mir index 0c80b32e143c0..6011342a6f003 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-mir-parse.mir +++ b/llvm/test/CodeGen/X86/basicblock-sections-mir-parse.mir @@ -124,8 +124,8 @@ body: | # CHECK: _Z3foob: # CHECK: .section .text,"ax",@progbits,unique -# CHECK: a.BB._Z3foob: +# CHECK: _Z3foob.1: # CHECK: .section .text,"ax",@progbits,unique -# CHECK: aa.BB._Z3foob: +# CHECK: _Z3foob.2: # CHECK: .section .text,"ax",@progbits,unique -# CHECK: aaa.BB._Z3foob: +# CHECK: _Z3foob.3: diff --git a/llvm/test/CodeGen/X86/basicblock-sections.ll b/llvm/test/CodeGen/X86/basicblock-sections.ll index 85b7e1fc9ac5c..5c17b755fa70b 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections.ll +++ b/llvm/test/CodeGen/X86/basicblock-sections.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=all -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -function-sections -basicblock-sections=all -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS -define void @_Z3bazb(i1 zeroext) { +define void @_Z3bazb(i1 zeroext) nounwind { %2 = alloca i8, align 1 %3 = zext i1 %0 to i8 store i8 %3, i8* %2, align 1 @@ -28,11 +28,11 @@ declare i32 @_Z3foov() #1 ; LINUX-SECTIONS: .section .text._Z3bazb,"ax",@progbits ; LINUX-SECTIONS: _Z3bazb: -; LINUX-SECTIONS: .section .text._Z3bazb.r.BB._Z3bazb,"ax",@progbits -; LINUX-SECTIONS: r.BB._Z3bazb: +; LINUX-SECTIONS: .section .text._Z3bazb._Z3bazb.1,"ax",@progbits +; LINUX-SECTIONS: _Z3bazb.1: ; LINUX-SECTIONS: .Ltmp0: -; LINUX-SECTIONS-NEXT: .size r.BB._Z3bazb, .Ltmp0-r.BB._Z3bazb -; LINUX-SECTIONS: .section .text._Z3bazb.rr.BB._Z3bazb,"ax",@progbits -; LINUX-SECTIONS: rr.BB._Z3bazb: +; LINUX-SECTIONS-NEXT: .size _Z3bazb.1, .Ltmp0-_Z3bazb.1 +; LINUX-SECTIONS: .section .text._Z3bazb._Z3bazb.2,"ax",@progbits +; LINUX-SECTIONS: _Z3bazb.2: ; LINUX-SECTIONS: .Ltmp1: -; LINUX-SECTIONS-NEXT: .size rr.BB._Z3bazb, .Ltmp1-rr.BB._Z3bazb +; LINUX-SECTIONS-NEXT: .size _Z3bazb.2, .Ltmp1-_Z3bazb.2 diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll index 7d5886d7e594b..0510dadee263b 100644 --- a/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll +++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll @@ -12,7 +12,7 @@ define i8 @v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) { ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2 -; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm2 +; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm2 ; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq @@ -557,7 +557,7 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm2 ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 -; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm1 +; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm1 ; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll index e50dca9646567..a3545af788d76 100644 --- a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll +++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -234,7 +234,7 @@ define i8 @v8i32_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4 ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 -; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4 +; SSE2-SSSE3-NEXT: packsswb %xmm4, %xmm4 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq @@ -303,7 +303,7 @@ define i8 @v8i32_or(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { ; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4 ; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm4 -; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4 +; SSE2-SSSE3-NEXT: packsswb %xmm4, %xmm4 ; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq @@ -380,7 +380,7 @@ define i8 @v8i32_or_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d, ; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm8 ; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8 ; SSE2-SSSE3-NEXT: pand %xmm6, %xmm8 -; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm8 +; SSE2-SSSE3-NEXT: packsswb %xmm8, %xmm8 ; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq @@ -461,7 +461,7 @@ define i8 @v8f32_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm6 -; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6 +; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq @@ -512,7 +512,7 @@ define i8 @v8f32_xor(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> ; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6 ; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm6 -; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm6 +; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6 ; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq @@ -577,7 +577,7 @@ define i8 @v8f32_xor_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x fl ; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8 ; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm8 -; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm8 +; SSE2-SSSE3-NEXT: packsswb %xmm8, %xmm8 ; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll index a3e1c3f08177d..2424e1c3a74d8 100644 --- a/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll +++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll @@ -27,7 +27,7 @@ define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { ; SSE-NEXT: packssdw %xmm9, %xmm8 ; SSE-NEXT: packssdw %xmm10, %xmm8 ; SSE-NEXT: pand %xmm0, %xmm8 -; SSE-NEXT: packsswb %xmm0, %xmm8 +; SSE-NEXT: packsswb %xmm8, %xmm8 ; SSE-NEXT: pmovmskb %xmm8, %eax ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq @@ -123,7 +123,7 @@ define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> ; SSE-NEXT: packssdw %xmm9, %xmm8 ; SSE-NEXT: packssdw %xmm10, %xmm8 ; SSE-NEXT: pand %xmm4, %xmm8 -; SSE-NEXT: packsswb %xmm0, %xmm8 +; SSE-NEXT: packsswb %xmm8, %xmm8 ; SSE-NEXT: pmovmskb %xmm8, %eax ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-setcc-256.ll index 3c461d438307e..b6e27f1e91ac4 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-256.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-256.ll @@ -117,7 +117,7 @@ define i8 @v8f32(<8 x float> %a, <8 x float> %b) { ; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2 ; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2 -; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm2 +; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm2 ; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSE2-SSSE3-NEXT: retq diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll index 1fc6b82a85cfe..efe04317919c1 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll @@ -354,7 +354,7 @@ define i8 @v8f64(<8 x double> %a, <8 x double> %b) { ; SSE-NEXT: cmpltpd %xmm0, %xmm4 ; SSE-NEXT: packssdw %xmm5, %xmm4 ; SSE-NEXT: packssdw %xmm6, %xmm4 -; SSE-NEXT: packsswb %xmm0, %xmm4 +; SSE-NEXT: packsswb %xmm4, %xmm4 ; SSE-NEXT: pmovmskb %xmm4, %eax ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq @@ -608,7 +608,7 @@ define void @bitcast_8i64_store(i8* %p, <8 x i64> %a0) { ; SSE-NEXT: pcmpgtq %xmm0, %xmm4 ; SSE-NEXT: packssdw %xmm2, %xmm4 ; SSE-NEXT: packssdw %xmm3, %xmm4 -; SSE-NEXT: packsswb %xmm0, %xmm4 +; SSE-NEXT: packsswb %xmm4, %xmm4 ; SSE-NEXT: pmovmskb %xmm4, %eax ; SSE-NEXT: movb %al, (%rdi) ; SSE-NEXT: retq diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll index 66bb6aa3d662f..65009a77c4d46 100644 --- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll +++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll @@ -49,11 +49,12 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind { ; SSE2-SSSE3: # %bb.0: ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax ; SSE2-SSSE3-NEXT: movl %eax, %ecx -; SSE2-SSSE3-NEXT: andl $3, %ecx -; SSE2-SSSE3-NEXT: movq %rcx, %xmm0 -; SSE2-SSSE3-NEXT: shrl $2, %eax -; SSE2-SSSE3-NEXT: movq %rax, %xmm1 -; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-SSSE3-NEXT: shrb $2, %cl +; SSE2-SSSE3-NEXT: movzbl %cl, %ecx +; SSE2-SSSE3-NEXT: andb $3, %al +; SSE2-SSSE3-NEXT: movzbl %al, %eax +; SSE2-SSSE3-NEXT: movd %eax, %xmm0 +; SSE2-SSSE3-NEXT: pinsrw $4, %ecx, %xmm0 ; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al @@ -61,10 +62,10 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind { ; ; AVX12-LABEL: bitcast_v4i32_to_v2i2: ; AVX12: # %bb.0: -; AVX12-NEXT: vmovmskps %xmm0, %ecx -; AVX12-NEXT: movl %ecx, %eax -; AVX12-NEXT: shrl $2, %eax -; AVX12-NEXT: andl $3, %ecx +; AVX12-NEXT: vmovmskps %xmm0, %eax +; AVX12-NEXT: movl %eax, %ecx +; AVX12-NEXT: shrb $2, %cl +; AVX12-NEXT: andb $3, %al ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq @@ -73,11 +74,10 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 -; AVX512-NEXT: kmovd %k0, %ecx -; AVX512-NEXT: movzbl %cl, %eax -; AVX512-NEXT: shrl $2, %eax -; AVX512-NEXT: andl $3, %eax -; AVX512-NEXT: andl $3, %ecx +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: movl %eax, %ecx +; AVX512-NEXT: shrb $2, %cl +; AVX512-NEXT: andb $3, %al ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq @@ -94,13 +94,14 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind { ; SSE2-SSSE3: # %bb.0: ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax -; SSE2-SSSE3-NEXT: movzbl %al, %ecx -; SSE2-SSSE3-NEXT: shrl $4, %ecx -; SSE2-SSSE3-NEXT: movq %rcx, %xmm0 -; SSE2-SSSE3-NEXT: andl $15, %eax -; SSE2-SSSE3-NEXT: movq %rax, %xmm1 -; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-SSSE3-NEXT: movl %eax, %ecx +; SSE2-SSSE3-NEXT: shrb $4, %cl +; SSE2-SSSE3-NEXT: movzbl %cl, %ecx +; SSE2-SSSE3-NEXT: andb $15, %al +; SSE2-SSSE3-NEXT: movzbl %al, %eax +; SSE2-SSSE3-NEXT: movd %eax, %xmm0 +; SSE2-SSSE3-NEXT: pinsrw $4, %ecx, %xmm0 +; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: retq @@ -108,10 +109,10 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind { ; AVX12-LABEL: bitcast_v8i16_to_v2i4: ; AVX12: # %bb.0: ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX12-NEXT: vpmovmskb %xmm0, %ecx -; AVX12-NEXT: movzbl %cl, %eax -; AVX12-NEXT: shrl $4, %eax -; AVX12-NEXT: andl $15, %ecx +; AVX12-NEXT: vpmovmskb %xmm0, %eax +; AVX12-NEXT: movl %eax, %ecx +; AVX12-NEXT: shrb $4, %cl +; AVX12-NEXT: andb $15, %al ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq @@ -119,10 +120,10 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind { ; AVX512-LABEL: bitcast_v8i16_to_v2i4: ; AVX512: # %bb.0: ; AVX512-NEXT: vpmovw2m %xmm0, %k0 -; AVX512-NEXT: kmovd %k0, %ecx -; AVX512-NEXT: movzbl %cl, %eax -; AVX512-NEXT: shrl $4, %eax -; AVX512-NEXT: andl $15, %ecx +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: movl %eax, %ecx +; AVX512-NEXT: shrb $4, %cl +; AVX512-NEXT: andb $15, %al ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq @@ -181,22 +182,23 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind { ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax ; SSE2-SSSE3-NEXT: movl %eax, %ecx -; SSE2-SSSE3-NEXT: shrl $2, %ecx -; SSE2-SSSE3-NEXT: movq %rcx, %xmm0 -; SSE2-SSSE3-NEXT: andl $3, %eax -; SSE2-SSSE3-NEXT: movq %rax, %xmm1 -; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-SSSE3-NEXT: shrb $2, %cl +; SSE2-SSSE3-NEXT: movzbl %cl, %ecx +; SSE2-SSSE3-NEXT: andb $3, %al +; SSE2-SSSE3-NEXT: movzbl %al, %eax +; SSE2-SSSE3-NEXT: movd %eax, %xmm0 +; SSE2-SSSE3-NEXT: pinsrw $4, %ecx, %xmm0 +; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: retq ; ; AVX12-LABEL: bitcast_v4i64_to_v2i2: ; AVX12: # %bb.0: -; AVX12-NEXT: vmovmskpd %ymm0, %ecx -; AVX12-NEXT: movl %ecx, %eax -; AVX12-NEXT: shrl $2, %eax -; AVX12-NEXT: andl $3, %ecx +; AVX12-NEXT: vmovmskpd %ymm0, %eax +; AVX12-NEXT: movl %eax, %ecx +; AVX12-NEXT: shrb $2, %cl +; AVX12-NEXT: andb $3, %al ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper @@ -206,11 +208,10 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0 -; AVX512-NEXT: kmovd %k0, %ecx -; AVX512-NEXT: movzbl %cl, %eax -; AVX512-NEXT: shrl $2, %eax -; AVX512-NEXT: andl $3, %eax -; AVX512-NEXT: andl $3, %ecx +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: movl %eax, %ecx +; AVX512-NEXT: shrb $2, %cl +; AVX512-NEXT: andb $3, %al ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -229,23 +230,24 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind { ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax -; SSE2-SSSE3-NEXT: movzbl %al, %ecx -; SSE2-SSSE3-NEXT: shrl $4, %ecx -; SSE2-SSSE3-NEXT: movq %rcx, %xmm0 -; SSE2-SSSE3-NEXT: andl $15, %eax -; SSE2-SSSE3-NEXT: movq %rax, %xmm1 -; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-SSSE3-NEXT: movl %eax, %ecx +; SSE2-SSSE3-NEXT: shrb $4, %cl +; SSE2-SSSE3-NEXT: movzbl %cl, %ecx +; SSE2-SSSE3-NEXT: andb $15, %al +; SSE2-SSSE3-NEXT: movzbl %al, %eax +; SSE2-SSSE3-NEXT: movd %eax, %xmm0 +; SSE2-SSSE3-NEXT: pinsrw $4, %ecx, %xmm0 +; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: retq ; ; AVX12-LABEL: bitcast_v8i32_to_v2i4: ; AVX12: # %bb.0: -; AVX12-NEXT: vmovmskps %ymm0, %ecx -; AVX12-NEXT: movl %ecx, %eax -; AVX12-NEXT: shrl $4, %eax -; AVX12-NEXT: andl $15, %ecx +; AVX12-NEXT: vmovmskps %ymm0, %eax +; AVX12-NEXT: movl %eax, %ecx +; AVX12-NEXT: shrb $4, %cl +; AVX12-NEXT: andb $15, %al ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper @@ -255,10 +257,10 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 -; AVX512-NEXT: kmovd %k0, %ecx -; AVX512-NEXT: movzbl %cl, %eax -; AVX512-NEXT: shrl $4, %eax -; AVX512-NEXT: andl $15, %ecx +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: movl %eax, %ecx +; AVX512-NEXT: shrb $4, %cl +; AVX512-NEXT: andb $15, %al ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -391,13 +393,14 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax -; SSE2-SSSE3-NEXT: movzbl %al, %ecx -; SSE2-SSSE3-NEXT: shrl $4, %ecx -; SSE2-SSSE3-NEXT: movq %rcx, %xmm0 -; SSE2-SSSE3-NEXT: andl $15, %eax -; SSE2-SSSE3-NEXT: movq %rax, %xmm1 -; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-SSSE3-NEXT: movl %eax, %ecx +; SSE2-SSSE3-NEXT: shrb $4, %cl +; SSE2-SSSE3-NEXT: movzbl %cl, %ecx +; SSE2-SSSE3-NEXT: andb $15, %al +; SSE2-SSSE3-NEXT: movzbl %al, %eax +; SSE2-SSSE3-NEXT: movd %eax, %xmm0 +; SSE2-SSSE3-NEXT: pinsrw $4, %ecx, %xmm0 +; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: retq @@ -412,10 +415,10 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vmovmskps %ymm0, %ecx -; AVX1-NEXT: movl %ecx, %eax -; AVX1-NEXT: shrl $4, %eax -; AVX1-NEXT: andl $15, %ecx +; AVX1-NEXT: vmovmskps %ymm0, %eax +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrb $4, %cl +; AVX1-NEXT: andb $15, %al ; AVX1-NEXT: addb %cl, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -425,10 +428,10 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vmovmskps %ymm0, %ecx -; AVX2-NEXT: movl %ecx, %eax -; AVX2-NEXT: shrl $4, %eax -; AVX2-NEXT: andl $15, %ecx +; AVX2-NEXT: vmovmskps %ymm0, %eax +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: shrb $4, %cl +; AVX2-NEXT: andb $15, %al ; AVX2-NEXT: addb %cl, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -438,10 +441,10 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 -; AVX512-NEXT: kmovd %k0, %ecx -; AVX512-NEXT: movzbl %cl, %eax -; AVX512-NEXT: shrl $4, %eax -; AVX512-NEXT: andl $15, %ecx +; AVX512-NEXT: kmovd %k0, %eax +; AVX512-NEXT: movl %eax, %ecx +; AVX512-NEXT: shrb $4, %cl +; AVX512-NEXT: andb $15, %al ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll index 75a00dd03a312..8369a44dcbad2 100644 --- a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll +++ b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll @@ -66,9 +66,20 @@ define i64 @div64(i64 %a, i64 %b) { ; X64-LABEL: div64: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: orq %rsi, %rcx +; X64-NEXT: shrq $32, %rcx +; X64-NEXT: je .LBB1_1 +; X64-NEXT: # %bb.2: ; X64-NEXT: cqto ; X64-NEXT: idivq %rsi ; X64-NEXT: retq +; X64-NEXT: .LBB1_1: +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %esi +; X64-NEXT: # kill: def $eax killed $eax def $rax +; X64-NEXT: retq ; ; SLM-LABEL: div64: ; SLM: # %bb.0: # %entry @@ -178,9 +189,20 @@ define i64 @div64_hugews(i64 %a, i64 %b) { ; X64-LABEL: div64_hugews: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: orq %rsi, %rcx +; X64-NEXT: shrq $32, %rcx +; X64-NEXT: je .LBB4_1 +; X64-NEXT: # %bb.2: ; X64-NEXT: cqto ; X64-NEXT: idivq %rsi ; X64-NEXT: retq +; X64-NEXT: .LBB4_1: +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %esi +; X64-NEXT: # kill: def $eax killed $eax def $rax +; X64-NEXT: retq ; ; SLM-LABEL: div64_hugews: ; SLM: # %bb.0: diff --git a/llvm/test/CodeGen/X86/cfi-epilogue-with-return.mir b/llvm/test/CodeGen/X86/cfi-epilogue-with-return.mir new file mode 100644 index 0000000000000..583e54b097faf --- /dev/null +++ b/llvm/test/CodeGen/X86/cfi-epilogue-with-return.mir @@ -0,0 +1,48 @@ +# RUN: llc -o - %s -mtriple=x86_64-- -run-pass=prologepilog 2>&1 | FileCheck %s +--- | + define i64 @_Z3foob(i1 zeroext %cond) #0 { + ret i64 0 + } + attributes #0 = {"frame-pointer"="all"} +... +--- +# If the epilogue bb.1 is a return block, no .cfi_restore is +# needed in it. +# CHECK: bb.1: +# CHECK-NOT: CFI_INSTRUCTION restore +# CHECK: RET 0 +# CHECK: bb.2: +# CHECK: RET 0 +name: _Z3foob +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$edi' } +frameInfo: + maxAlignment: 1 + hasCalls: true + savePoint: '%bb.1' + restorePoint: '%bb.1' +machineFunctionInfo: {} +body: | + bb.0: + liveins: $edi + + TEST8rr renamable $dil, renamable $dil, implicit-def $eflags, implicit killed $edi + JCC_1 %bb.2, 4, implicit killed $eflags + JMP_1 %bb.1 + + bb.1: + renamable $rbx = IMPLICIT_DEF + renamable $r14 = IMPLICIT_DEF + renamable $r15 = IMPLICIT_DEF + renamable $r12 = IMPLICIT_DEF + renamable $r13 = IMPLICIT_DEF + dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax + RET 0, killed $rax + + bb.2: + dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax + RET 0, killed $rax + +... diff --git a/llvm/test/CodeGen/X86/cfi-epilogue-without-return.mir b/llvm/test/CodeGen/X86/cfi-epilogue-without-return.mir new file mode 100644 index 0000000000000..8f04721489608 --- /dev/null +++ b/llvm/test/CodeGen/X86/cfi-epilogue-without-return.mir @@ -0,0 +1,53 @@ +# RUN: llc -o - %s -mtriple=x86_64-- -run-pass=prologepilog 2>&1 | FileCheck %s +--- | + declare dso_local void @_Z3goov() + define i64 @_Z3foob(i1 zeroext %cond) #0 { + ret i64 0 + } + attributes #0 = {"frame-pointer"="all"} +... +--- +# If the epilogue bb.1.if.then is not a return block, .cfi_restore is +# needed in it, otherwise bb.2.return will see different outgoing CFI +# information from its predecessors. +# CHECK: bb.1: +# CHECK: CFI_INSTRUCTION restore $rbx +# CHECK-NEXT: CFI_INSTRUCTION restore $r12 +# CHECK-NEXT: CFI_INSTRUCTION restore $r13 +# CHECK-NEXT: CFI_INSTRUCTION restore $r14 +# CHECK-NEXT: CFI_INSTRUCTION restore $r15 +# CHECK-NEXT: CFI_INSTRUCTION restore $rbp +# CHECK-NOT: RET 0 +# CHECK: bb.2: +# CHECK: RET 0 +name: _Z3foob +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$edi' } +frameInfo: + maxAlignment: 1 + hasCalls: true + savePoint: '%bb.1' + restorePoint: '%bb.1' +machineFunctionInfo: {} +body: | + bb.0: + liveins: $edi + + TEST8rr renamable $dil, renamable $dil, implicit-def $eflags, implicit killed $edi + JCC_1 %bb.2, 4, implicit killed $eflags + JMP_1 %bb.1 + + bb.1: + renamable $rbx = IMPLICIT_DEF + renamable $r14 = IMPLICIT_DEF + renamable $r15 = IMPLICIT_DEF + renamable $r12 = IMPLICIT_DEF + renamable $r13 = IMPLICIT_DEF + + bb.2: + dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax + RET 0, killed $rax + +... diff --git a/llvm/test/CodeGen/X86/cfi-inserter-callee-save-register-2.mir b/llvm/test/CodeGen/X86/cfi-inserter-callee-save-register-2.mir new file mode 100644 index 0000000000000..e85126d649be4 --- /dev/null +++ b/llvm/test/CodeGen/X86/cfi-inserter-callee-save-register-2.mir @@ -0,0 +1,97 @@ +# RUN: llc -o - %s -mtriple=x86_64-- -verify-cfiinstrs \ +# RUN: -run-pass=cfi-instr-inserter 2>&1 | FileCheck %s +# Test that CFI inserter inserts .cfi_offset/.cfi_register/.cfi_rel_offset +# properly for callee saved registers. +--- | + define void @foo() { + ret void + } +... +--- +# CHECK: bb.3: +# CHECK: CFI_INSTRUCTION offset $rbp, -16 +# CHECK-NEXT: CFI_INSTRUCTION offset $r12, -24 +# CHECK-NEXT: CFI_INSTRUCTION register $r13, $rcx +# CHECK-NEXT: CFI_INSTRUCTION offset $r14, -40 +name: foo +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$edi' } +frameInfo: + stackSize: 16 + offsetAdjustment: -16 + maxAlignment: 16 + hasCalls: true + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 8 +fixedStack: + - { id: 0, type: spill-slot, offset: -40, size: 8, alignment: 8, callee-saved-register: '$r14' } + - { id: 1, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '$r12' } + - { id: 2, type: spill-slot, offset: -16, size: 8, alignment: 16 } + - { id: 3, type: spill-slot, offset: -16, size: 8, alignment: 16 } + - { id: 4, type: spill-slot, offset: -24, size: 8, alignment: 8 } +machineFunctionInfo: {} +body: | + bb.0: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + liveins: $edi, $r12, $r13, $r14 + + frame-setup PUSH64r killed $rbp, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 16 + CFI_INSTRUCTION offset $rbp, -16 + $rbp = frame-setup MOV64rr $rsp + CFI_INSTRUCTION def_cfa_register $rbp + frame-setup PUSH64r killed $r12, implicit-def $rsp, implicit $rsp + $rcx = frame-setup COPY $r13 + frame-setup PUSH64r killed $r14, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION offset $r12, -24 + CFI_INSTRUCTION register $r13, $rcx + CFI_INSTRUCTION rel_offset $r14, -24 + TEST8rr renamable $dil, renamable $dil, implicit-def $eflags, implicit killed $edi + JCC_1 %bb.2, 4, implicit killed $eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.3(0x80000000) + + renamable $r12 = IMPLICIT_DEF + renamable $r13 = IMPLICIT_DEF + renamable $r14 = IMPLICIT_DEF + JMP_1 %bb.3 + + bb.2: + liveins: $rcx + dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax + $r12 = frame-destroy POP64r implicit-def $rsp, implicit $rsp + $r13 = frame-destroy COPY $rcx + $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp + $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION restore $rbp + CFI_INSTRUCTION restore $r12 + CFI_INSTRUCTION restore $r13 + CFI_INSTRUCTION restore $r14 + CFI_INSTRUCTION def_cfa $rsp, 8 + RET 0, killed $rax + + bb.3: + successors: %bb.4(0x80000000) + + renamable $rdi = IMPLICIT_DEF + renamable $rsi = IMPLICIT_DEF + + bb.4: + liveins: $rcx + dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax + $r12 = frame-destroy POP64r implicit-def $rsp, implicit $rsp + $r13 = frame-destroy COPY $rcx + $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp + $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION restore $rbp + CFI_INSTRUCTION restore $r12 + CFI_INSTRUCTION restore $r13 + CFI_INSTRUCTION restore $r14 + CFI_INSTRUCTION def_cfa $rsp, 8 + RET 0, killed $rax + +... diff --git a/llvm/test/CodeGen/X86/cfi-inserter-callee-save-register.mir b/llvm/test/CodeGen/X86/cfi-inserter-callee-save-register.mir new file mode 100644 index 0000000000000..b17c9a67abb18 --- /dev/null +++ b/llvm/test/CodeGen/X86/cfi-inserter-callee-save-register.mir @@ -0,0 +1,34 @@ +# RUN: llc -o - %s -mtriple=x86_64-- -verify-cfiinstrs \ +# RUN: -run-pass=cfi-instr-inserter 2>&1 | FileCheck %s +# Test that CFI inserter inserts .cfi_restore properly for +# callee saved registers. +--- | + define void @foo() { + ret void + } +... +--- +# CHECK: bb.3: +# CHECK: CFI_INSTRUCTION restore $rbx +# CHECK-NEXT: CFI_INSTRUCTION restore $rbp +name: foo +body: | + bb.0: + TEST8rr renamable $dil, renamable $dil, implicit-def $eflags, implicit killed $edi + JCC_1 %bb.2, 5, implicit killed $eflags + + bb.1: + JMP_1 %bb.3 + + bb.2: + CFI_INSTRUCTION def_cfa_offset 16 + CFI_INSTRUCTION offset $rbp, -16 + CFI_INSTRUCTION def_cfa_register $rbp + CFI_INSTRUCTION offset $rbx, -24 + CFI_INSTRUCTION def_cfa $rsp, 8 + RET 0, $rax + + bb.3: + RET 0, $rax + +... diff --git a/llvm/test/CodeGen/X86/cfi-inserter-verify-inconsistent-csr.mir b/llvm/test/CodeGen/X86/cfi-inserter-verify-inconsistent-csr.mir new file mode 100644 index 0000000000000..63957ae5229fa --- /dev/null +++ b/llvm/test/CodeGen/X86/cfi-inserter-verify-inconsistent-csr.mir @@ -0,0 +1,28 @@ +# RUN: not --crash llc -o - %s -mtriple=x86_64-- -verify-cfiinstrs \ +# RUN: -run-pass=cfi-instr-inserter 2>&1 | FileCheck %s +# Test that CFI verifier finds inconsistent csr saved set between bb.end and +# one of its precedessors. +--- | + define void @inconsistentCSR() { + entry: + br label %then + then: + br label %end + end: + ret void + } +... +--- +# CHECK: *** Inconsistent CSR Saved between pred and succ in function inconsistentCSR *** +# CHECK: LLVM ERROR: Found 1 in/out CFI information errors. +name: inconsistentCSR +body: | + bb.0.entry: + JCC_1 %bb.2, 5, implicit undef $eflags + + bb.1.then: + CFI_INSTRUCTION offset $rbp, -16 + + bb.2.end: + RET 0 +... diff --git a/llvm/test/CodeGen/X86/cfi-inserter-verify-inconsistent-loc.mir b/llvm/test/CodeGen/X86/cfi-inserter-verify-inconsistent-loc.mir new file mode 100644 index 0000000000000..ef9fb22476a71 --- /dev/null +++ b/llvm/test/CodeGen/X86/cfi-inserter-verify-inconsistent-loc.mir @@ -0,0 +1,26 @@ +# REQUIRES: asserts +# RUN: not --crash llc -o - %s -mtriple=x86_64-- \ +# RUN: -run-pass=cfi-instr-inserter 2>&1 | FileCheck %s +# Test that CSR being saved in multiple locations can be caught by +# an assertion. +--- | + define void @inconsistentlocs() { + bb.end: + ret void + } +... +--- +# CHECK: Different saved locations for the same CSR +# CHECK-NEXT: UNREACHABLE executed +name: inconsistentlocs +body: | + bb.0: + CFI_INSTRUCTION offset $r12, -8 + JCC_1 %bb.2, 5, implicit undef $eflags + + bb.1: + CFI_INSTRUCTION offset $r12, -16 + + bb.2.bb.end: + RET 0 +... diff --git a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll index 78487bd162d13..b96f44ec3073f 100644 --- a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -542,7 +542,7 @@ define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind { define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind { ; SSE2-LABEL: _clearupper4xi64b: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps {{.*#+}} xmm2 +; SSE2-NEXT: movaps {{.*#+}} xmm2 = [NaN,0.0E+0,NaN,0.0E+0] ; SSE2-NEXT: andps %xmm2, %xmm0 ; SSE2-NEXT: andps %xmm2, %xmm1 ; SSE2-NEXT: retq @@ -805,48 +805,48 @@ define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind { ; AVX-NEXT: pushq %rbx ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %r9 -; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdx +; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; AVX-NEXT: movq %r9, %r8 ; AVX-NEXT: shrq $56, %r8 ; AVX-NEXT: andl $15, %r8d ; AVX-NEXT: movq %r9, %r10 ; AVX-NEXT: shrq $48, %r10 ; AVX-NEXT: andl $15, %r10d -; AVX-NEXT: movq %r9, %rsi -; AVX-NEXT: shrq $40, %rsi -; AVX-NEXT: andl $15, %esi +; AVX-NEXT: movq %rcx, %rdx +; AVX-NEXT: shldq $24, %r9, %rdx +; AVX-NEXT: andl $15, %edx ; AVX-NEXT: movq %r9, %r11 ; AVX-NEXT: shrq $32, %r11 ; AVX-NEXT: andl $15, %r11d -; AVX-NEXT: movq %rdx, %rdi +; AVX-NEXT: movq %rcx, %rdi ; AVX-NEXT: shrq $56, %rdi ; AVX-NEXT: andl $15, %edi -; AVX-NEXT: movq %rdx, %rax -; AVX-NEXT: shrq $48, %rax +; AVX-NEXT: movq %rcx, %rsi +; AVX-NEXT: shrq $48, %rsi +; AVX-NEXT: andl $15, %esi +; AVX-NEXT: movq %rcx, %rax +; AVX-NEXT: shrq $40, %rax ; AVX-NEXT: andl $15, %eax -; AVX-NEXT: movq %rdx, %rcx -; AVX-NEXT: shrq $40, %rcx -; AVX-NEXT: andl $15, %ecx -; AVX-NEXT: movq %rdx, %rbx +; AVX-NEXT: movq %rcx, %rbx ; AVX-NEXT: shrq $32, %rbx ; AVX-NEXT: andl $15, %ebx ; AVX-NEXT: shlq $32, %rbx -; AVX-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F -; AVX-NEXT: orq %rbx, %rdx -; AVX-NEXT: shlq $40, %rcx -; AVX-NEXT: orq %rdx, %rcx -; AVX-NEXT: shlq $48, %rax +; AVX-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; AVX-NEXT: orq %rbx, %rcx +; AVX-NEXT: shlq $40, %rax ; AVX-NEXT: orq %rcx, %rax +; AVX-NEXT: shlq $48, %rsi +; AVX-NEXT: orq %rax, %rsi ; AVX-NEXT: shlq $56, %rdi -; AVX-NEXT: orq %rax, %rdi +; AVX-NEXT: orq %rsi, %rdi ; AVX-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) ; AVX-NEXT: shlq $32, %r11 ; AVX-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F ; AVX-NEXT: orq %r11, %r9 -; AVX-NEXT: shlq $40, %rsi -; AVX-NEXT: orq %r9, %rsi +; AVX-NEXT: shlq $40, %rdx +; AVX-NEXT: orq %r9, %rdx ; AVX-NEXT: shlq $48, %r10 -; AVX-NEXT: orq %rsi, %r10 +; AVX-NEXT: orq %rdx, %r10 ; AVX-NEXT: shlq $56, %r8 ; AVX-NEXT: orq %r10, %r8 ; AVX-NEXT: movq %r8, -{{[0-9]+}}(%rsp) @@ -986,96 +986,96 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; AVX1-NEXT: movq %rax, %rcx +; AVX1-NEXT: movq %rax, %r8 ; AVX1-NEXT: movq %rax, %rdx ; AVX1-NEXT: movq %rax, %rsi ; AVX1-NEXT: movq %rax, %rdi -; AVX1-NEXT: shrq $32, %rdi -; AVX1-NEXT: andl $15, %edi -; AVX1-NEXT: shlq $32, %rdi +; AVX1-NEXT: movq %rax, %rcx +; AVX1-NEXT: shrq $32, %rcx +; AVX1-NEXT: andl $15, %ecx +; AVX1-NEXT: shlq $32, %rcx ; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F -; AVX1-NEXT: orq %rdi, %rax -; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rdi -; AVX1-NEXT: shrq $40, %rsi +; AVX1-NEXT: orq %rcx, %rax +; AVX1-NEXT: shrq $40, %rdi +; AVX1-NEXT: andl $15, %edi +; AVX1-NEXT: shlq $40, %rdi +; AVX1-NEXT: orq %rax, %rdi +; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; AVX1-NEXT: shrq $48, %rsi ; AVX1-NEXT: andl $15, %esi -; AVX1-NEXT: shlq $40, %rsi -; AVX1-NEXT: orq %rax, %rsi -; AVX1-NEXT: movq %rdi, %rax -; AVX1-NEXT: shrq $48, %rdx +; AVX1-NEXT: shlq $48, %rsi +; AVX1-NEXT: orq %rdi, %rsi +; AVX1-NEXT: movq %rax, %rcx +; AVX1-NEXT: shrq $56, %rdx ; AVX1-NEXT: andl $15, %edx -; AVX1-NEXT: shlq $48, %rdx +; AVX1-NEXT: shlq $56, %rdx ; AVX1-NEXT: orq %rsi, %rdx -; AVX1-NEXT: movq %rdi, %rsi +; AVX1-NEXT: movq %rax, %rsi +; AVX1-NEXT: shldq $24, %rax, %r8 +; AVX1-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movq %rax, %rdx +; AVX1-NEXT: shrq $32, %rdx +; AVX1-NEXT: andl $15, %edx +; AVX1-NEXT: shlq $32, %rdx +; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; AVX1-NEXT: orq %rdx, %rax +; AVX1-NEXT: andl $15, %r8d +; AVX1-NEXT: shlq $40, %r8 +; AVX1-NEXT: orq %rax, %r8 +; AVX1-NEXT: shrq $48, %rsi +; AVX1-NEXT: andl $15, %esi +; AVX1-NEXT: shlq $48, %rsi +; AVX1-NEXT: orq %r8, %rsi ; AVX1-NEXT: shrq $56, %rcx ; AVX1-NEXT: andl $15, %ecx +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: shlq $56, %rcx -; AVX1-NEXT: orq %rdx, %rcx -; AVX1-NEXT: movq %rdi, %rdx +; AVX1-NEXT: orq %rsi, %rcx +; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: movq %rdi, %rcx +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $8, %ecx +; AVX1-NEXT: vmovd %eax, %xmm1 +; AVX1-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 +; AVX1-NEXT: movl %eax, %ecx +; AVX1-NEXT: shrl $24, %ecx +; AVX1-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 +; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq $32, %rcx -; AVX1-NEXT: andl $15, %ecx -; AVX1-NEXT: shlq $32, %rcx -; AVX1-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; AVX1-NEXT: orq %rcx, %rdi -; AVX1-NEXT: shrq $40, %rdx -; AVX1-NEXT: andl $15, %edx -; AVX1-NEXT: shlq $40, %rdx -; AVX1-NEXT: orq %rdi, %rdx -; AVX1-NEXT: shrq $48, %rsi -; AVX1-NEXT: andl $15, %esi -; AVX1-NEXT: shlq $48, %rsi -; AVX1-NEXT: orq %rdx, %rsi +; AVX1-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 +; AVX1-NEXT: movq %rax, %rcx +; AVX1-NEXT: shrq $40, %rcx +; AVX1-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 +; AVX1-NEXT: movq %rax, %rcx +; AVX1-NEXT: shrq $48, %rcx +; AVX1-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 +; AVX1-NEXT: vpextrq $1, %xmm0, %rcx ; AVX1-NEXT: shrq $56, %rax -; AVX1-NEXT: andl $15, %eax -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: shlq $56, %rax -; AVX1-NEXT: orq %rsi, %rax -; AVX1-NEXT: vmovq %xmm0, %rcx -; AVX1-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0 ; AVX1-NEXT: movl %ecx, %eax ; AVX1-NEXT: shrl $8, %eax -; AVX1-NEXT: vmovd %ecx, %xmm1 -; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; AVX1-NEXT: movl %ecx, %eax ; AVX1-NEXT: shrl $16, %eax -; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 +; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; AVX1-NEXT: movl %ecx, %eax ; AVX1-NEXT: shrl $24, %eax -; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; AVX1-NEXT: movq %rcx, %rax ; AVX1-NEXT: shrq $32, %rax -; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; AVX1-NEXT: movq %rcx, %rax ; AVX1-NEXT: shrq $40, %rax -; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; AVX1-NEXT: movq %rcx, %rax ; AVX1-NEXT: shrq $48, %rax -; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; AVX1-NEXT: shrq $56, %rcx -; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0 -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $8, %ecx -; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $16, %ecx -; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $24, %ecx -; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shrq $32, %rcx -; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shrq $40, %rcx -; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shrq $48, %rcx -; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: shrq $56, %rax -; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 ; AVX1-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq @@ -1084,96 +1084,96 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; AVX2-NEXT: movq %rax, %rcx +; AVX2-NEXT: movq %rax, %r8 ; AVX2-NEXT: movq %rax, %rdx ; AVX2-NEXT: movq %rax, %rsi ; AVX2-NEXT: movq %rax, %rdi -; AVX2-NEXT: shrq $32, %rdi -; AVX2-NEXT: andl $15, %edi -; AVX2-NEXT: shlq $32, %rdi +; AVX2-NEXT: movq %rax, %rcx +; AVX2-NEXT: shrq $32, %rcx +; AVX2-NEXT: andl $15, %ecx +; AVX2-NEXT: shlq $32, %rcx ; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F -; AVX2-NEXT: orq %rdi, %rax -; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rdi -; AVX2-NEXT: shrq $40, %rsi +; AVX2-NEXT: orq %rcx, %rax +; AVX2-NEXT: shrq $40, %rdi +; AVX2-NEXT: andl $15, %edi +; AVX2-NEXT: shlq $40, %rdi +; AVX2-NEXT: orq %rax, %rdi +; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: shrq $48, %rsi ; AVX2-NEXT: andl $15, %esi -; AVX2-NEXT: shlq $40, %rsi -; AVX2-NEXT: orq %rax, %rsi -; AVX2-NEXT: movq %rdi, %rax -; AVX2-NEXT: shrq $48, %rdx +; AVX2-NEXT: shlq $48, %rsi +; AVX2-NEXT: orq %rdi, %rsi +; AVX2-NEXT: movq %rax, %rcx +; AVX2-NEXT: shrq $56, %rdx ; AVX2-NEXT: andl $15, %edx -; AVX2-NEXT: shlq $48, %rdx +; AVX2-NEXT: shlq $56, %rdx ; AVX2-NEXT: orq %rsi, %rdx -; AVX2-NEXT: movq %rdi, %rsi +; AVX2-NEXT: movq %rax, %rsi +; AVX2-NEXT: shldq $24, %rax, %r8 +; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movq %rax, %rdx +; AVX2-NEXT: shrq $32, %rdx +; AVX2-NEXT: andl $15, %edx +; AVX2-NEXT: shlq $32, %rdx +; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; AVX2-NEXT: orq %rdx, %rax +; AVX2-NEXT: andl $15, %r8d +; AVX2-NEXT: shlq $40, %r8 +; AVX2-NEXT: orq %rax, %r8 +; AVX2-NEXT: shrq $48, %rsi +; AVX2-NEXT: andl $15, %esi +; AVX2-NEXT: shlq $48, %rsi +; AVX2-NEXT: orq %r8, %rsi ; AVX2-NEXT: shrq $56, %rcx ; AVX2-NEXT: andl $15, %ecx +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX2-NEXT: shlq $56, %rcx -; AVX2-NEXT: orq %rdx, %rcx -; AVX2-NEXT: movq %rdi, %rdx +; AVX2-NEXT: orq %rsi, %rcx +; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %rdi, %rcx +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: shrl $8, %ecx +; AVX2-NEXT: vmovd %eax, %xmm1 +; AVX2-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: shrl $16, %ecx +; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: shrl $24, %ecx +; AVX2-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 +; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq $32, %rcx -; AVX2-NEXT: andl $15, %ecx -; AVX2-NEXT: shlq $32, %rcx -; AVX2-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F -; AVX2-NEXT: orq %rcx, %rdi -; AVX2-NEXT: shrq $40, %rdx -; AVX2-NEXT: andl $15, %edx -; AVX2-NEXT: shlq $40, %rdx -; AVX2-NEXT: orq %rdi, %rdx -; AVX2-NEXT: shrq $48, %rsi -; AVX2-NEXT: andl $15, %esi -; AVX2-NEXT: shlq $48, %rsi -; AVX2-NEXT: orq %rdx, %rsi +; AVX2-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 +; AVX2-NEXT: movq %rax, %rcx +; AVX2-NEXT: shrq $40, %rcx +; AVX2-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 +; AVX2-NEXT: movq %rax, %rcx +; AVX2-NEXT: shrq $48, %rcx +; AVX2-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 +; AVX2-NEXT: vpextrq $1, %xmm0, %rcx ; AVX2-NEXT: shrq $56, %rax -; AVX2-NEXT: andl $15, %eax -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: shlq $56, %rax -; AVX2-NEXT: orq %rsi, %rax -; AVX2-NEXT: vmovq %xmm0, %rcx -; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0 ; AVX2-NEXT: movl %ecx, %eax ; AVX2-NEXT: shrl $8, %eax -; AVX2-NEXT: vmovd %ecx, %xmm1 -; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; AVX2-NEXT: movl %ecx, %eax ; AVX2-NEXT: shrl $16, %eax -; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 +; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; AVX2-NEXT: movl %ecx, %eax ; AVX2-NEXT: shrl $24, %eax -; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; AVX2-NEXT: movq %rcx, %rax ; AVX2-NEXT: shrq $32, %rax -; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; AVX2-NEXT: movq %rcx, %rax ; AVX2-NEXT: shrq $40, %rax -; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; AVX2-NEXT: movq %rcx, %rax ; AVX2-NEXT: shrq $48, %rax -; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 -; AVX2-NEXT: vpextrq $1, %xmm0, %rax +; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; AVX2-NEXT: shrq $56, %rcx -; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0 -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: shrl $8, %ecx -; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: shrl $16, %ecx -; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: shrl $24, %ecx -; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: movq %rax, %rcx -; AVX2-NEXT: shrq $32, %rcx -; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: movq %rax, %rcx -; AVX2-NEXT: shrq $40, %rcx -; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: movq %rax, %rcx -; AVX2-NEXT: shrq $48, %rcx -; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: shrq $56, %rax -; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 ; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm1 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll index b65a7504b24aa..37128e312132d 100644 --- a/llvm/test/CodeGen/X86/combine-bitselect.ll +++ b/llvm/test/CodeGen/X86/combine-bitselect.ll @@ -3,7 +3,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s --check-prefixes=XOP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL ; ; 128-bit vectors @@ -28,6 +29,18 @@ define <2 x i64> @bitselect_v2i64_rr(<2 x i64>, <2 x i64>) { ; AVX-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vorps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; +; AVX512F-LABEL: bitselect_v2i64_rr: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v2i64_rr: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: retq %3 = and <2 x i64> %0, %4 = and <2 x i64> %1, %5 = or <2 x i64> %4, %3 @@ -56,6 +69,20 @@ define <2 x i64> @bitselect_v2i64_rm(<2 x i64>, <2 x i64>* nocapture readonly) { ; AVX-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vorps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; +; AVX512F-LABEL: bitselect_v2i64_rm: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovaps (%rdi), %xmm1 +; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v2i64_rm: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: retq %3 = load <2 x i64>, <2 x i64>* %1 %4 = and <2 x i64> %0, %5 = and <2 x i64> %3, @@ -85,6 +112,20 @@ define <2 x i64> @bitselect_v2i64_mr(<2 x i64>* nocapture readonly, <2 x i64>) { ; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vorps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; +; AVX512F-LABEL: bitselect_v2i64_mr: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovaps (%rdi), %xmm1 +; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v2i64_mr: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: retq %3 = load <2 x i64>, <2 x i64>* %0 %4 = and <2 x i64> %3, %5 = and <2 x i64> %1, @@ -117,6 +158,22 @@ define <2 x i64> @bitselect_v2i64_mm(<2 x i64>* nocapture readonly, <2 x i64>* n ; AVX-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vorps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; +; AVX512F-LABEL: bitselect_v2i64_mm: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovaps (%rdi), %xmm0 +; AVX512F-NEXT: vmovaps (%rsi), %xmm1 +; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v2i64_mm: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovdqa (%rsi), %xmm1 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm0 = [18446744073709551612,18446744065119617022] +; AVX512VL-NEXT: vpternlogq $202, (%rdi), %xmm1, %xmm0 +; AVX512VL-NEXT: retq %3 = load <2 x i64>, <2 x i64>* %0 %4 = load <2 x i64>, <2 x i64>* %1 %5 = and <2 x i64> %3, @@ -125,6 +182,124 @@ define <2 x i64> @bitselect_v2i64_mm(<2 x i64>* nocapture readonly, <2 x i64>* n ret <2 x i64> %7 } +define <2 x i64> @bitselect_v2i64_broadcast_rrr(<2 x i64> %a0, <2 x i64> %a1, i64 %a2) { +; SSE-LABEL: bitselect_v2i64_broadcast_rrr: +; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pandn %xmm1, %xmm2 +; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: retq +; +; XOP-LABEL: bitselect_v2i64_broadcast_rrr: +; XOP: # %bb.0: +; XOP-NEXT: vmovq %rdi, %xmm2 +; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 +; XOP-NEXT: retq +; +; AVX1-LABEL: bitselect_v2i64_broadcast_rrr: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovq %rdi, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpandn %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: bitselect_v2i64_broadcast_rrr: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovq %rdi, %xmm2 +; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpandn %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: bitselect_v2i64_broadcast_rrr: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovq %rdi, %xmm2 +; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2 +; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vpandn %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v2i64_broadcast_rrr: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastq %rdi, %xmm2 +; AVX512VL-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0 +; AVX512VL-NEXT: retq + %1 = insertelement <2 x i64> undef, i64 %a2, i32 0 + %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer + %3 = xor <2 x i64> %1, + %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer + %5 = and <2 x i64> %a0, %2 + %6 = and <2 x i64> %a1, %4 + %7 = or <2 x i64> %5, %6 + ret <2 x i64> %7 +} + +define <2 x i64> @bitselect_v2i64_broadcast_rrm(<2 x i64> %a0, <2 x i64> %a1, i64* %p2) { +; SSE-LABEL: bitselect_v2i64_broadcast_rrm: +; SSE: # %bb.0: +; SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pandn %xmm1, %xmm2 +; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: retq +; +; XOP-LABEL: bitselect_v2i64_broadcast_rrm: +; XOP: # %bb.0: +; XOP-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; XOP-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,1,0,1] +; XOP-NEXT: vandps %xmm2, %xmm0, %xmm0 +; XOP-NEXT: vandnps %xmm1, %xmm2, %xmm1 +; XOP-NEXT: vorps %xmm1, %xmm0, %xmm0 +; XOP-NEXT: retq +; +; AVX1-LABEL: bitselect_v2i64_broadcast_rrm: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,1,0,1] +; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vandnps %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: bitselect_v2i64_broadcast_rrm: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] +; AVX2-NEXT: vandps %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vandnps %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: bitselect_v2i64_broadcast_rrm: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] +; AVX512F-NEXT: vandps %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vandnps %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v2i64_broadcast_rrm: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpternlogq $228, (%rdi){1to2}, %xmm1, %xmm0 +; AVX512VL-NEXT: retq + %a2 = load i64, i64* %p2 + %1 = insertelement <2 x i64> undef, i64 %a2, i32 0 + %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer + %3 = xor <2 x i64> %1, + %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer + %5 = and <2 x i64> %a0, %2 + %6 = and <2 x i64> %a1, %4 + %7 = or <2 x i64> %5, %6 + ret <2 x i64> %7 +} + ; ; 256-bit vectors ; @@ -151,6 +326,18 @@ define <4 x i64> @bitselect_v4i64_rr(<4 x i64>, <4 x i64>) { ; AVX-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 ; AVX-NEXT: vorps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq +; +; AVX512F-LABEL: bitselect_v4i64_rr: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v4i64_rr: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: retq %3 = and <4 x i64> %0, %4 = and <4 x i64> %1, %5 = or <4 x i64> %4, %3 @@ -187,6 +374,20 @@ define <4 x i64> @bitselect_v4i64_rm(<4 x i64>, <4 x i64>* nocapture readonly) { ; AVX-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 ; AVX-NEXT: vorps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq +; +; AVX512F-LABEL: bitselect_v4i64_rm: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovaps (%rdi), %ymm1 +; AVX512F-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v4i64_rm: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: retq %3 = load <4 x i64>, <4 x i64>* %1 %4 = and <4 x i64> %0, %5 = and <4 x i64> %3, @@ -224,6 +425,20 @@ define <4 x i64> @bitselect_v4i64_mr(<4 x i64>* nocapture readonly, <4 x i64>) { ; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; AVX-NEXT: vorps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq +; +; AVX512F-LABEL: bitselect_v4i64_mr: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovaps (%rdi), %ymm1 +; AVX512F-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v4i64_mr: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: retq %3 = load <4 x i64>, <4 x i64>* %0 %4 = and <4 x i64> %3, %5 = and <4 x i64> %1, @@ -261,6 +476,22 @@ define <4 x i64> @bitselect_v4i64_mm(<4 x i64>* nocapture readonly, <4 x i64>* n ; AVX-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 ; AVX-NEXT: vorps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq +; +; AVX512F-LABEL: bitselect_v4i64_mm: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovaps (%rdi), %ymm0 +; AVX512F-NEXT: vmovaps (%rsi), %ymm1 +; AVX512F-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v4i64_mm: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] +; AVX512VL-NEXT: vpternlogq $202, (%rdi), %ymm1, %ymm0 +; AVX512VL-NEXT: retq %3 = load <4 x i64>, <4 x i64>* %0 %4 = load <4 x i64>, <4 x i64>* %1 %5 = and <4 x i64> %3, @@ -269,6 +500,130 @@ define <4 x i64> @bitselect_v4i64_mm(<4 x i64>* nocapture readonly, <4 x i64>* n ret <4 x i64> %7 } +define <4 x i64> @bitselect_v4i64_broadcast_rrr(<4 x i64> %a0, <4 x i64> %a1, i64 %a2) { +; SSE-LABEL: bitselect_v4i64_broadcast_rrr: +; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %xmm4 +; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1] +; SSE-NEXT: pand %xmm4, %xmm1 +; SSE-NEXT: pand %xmm4, %xmm0 +; SSE-NEXT: movdqa %xmm4, %xmm5 +; SSE-NEXT: pandn %xmm3, %xmm5 +; SSE-NEXT: por %xmm5, %xmm1 +; SSE-NEXT: pandn %xmm2, %xmm4 +; SSE-NEXT: por %xmm4, %xmm0 +; SSE-NEXT: retq +; +; XOP-LABEL: bitselect_v4i64_broadcast_rrr: +; XOP: # %bb.0: +; XOP-NEXT: vmovq %rdi, %xmm2 +; XOP-NEXT: vmovq %rdi, %xmm3 +; XOP-NEXT: vmovddup {{.*#+}} xmm2 = xmm2[0,0] +; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 +; XOP-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,1] +; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; XOP-NEXT: vandps %ymm2, %ymm0, %ymm0 +; XOP-NEXT: vandnps %ymm1, %ymm3, %ymm1 +; XOP-NEXT: vorps %ymm1, %ymm0, %ymm0 +; XOP-NEXT: retq +; +; AVX1-LABEL: bitselect_v4i64_broadcast_rrr: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovq %rdi, %xmm2 +; AVX1-NEXT: vmovq %rdi, %xmm3 +; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = xmm2[0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,1] +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vandnps %ymm1, %ymm3, %ymm1 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: bitselect_v4i64_broadcast_rrr: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovq %rdi, %xmm2 +; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpandn %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: bitselect_v4i64_broadcast_rrr: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovq %rdi, %xmm2 +; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2 +; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpandn %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v4i64_broadcast_rrr: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastq %rdi, %ymm2 +; AVX512VL-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0 +; AVX512VL-NEXT: retq + %1 = insertelement <4 x i64> undef, i64 %a2, i32 0 + %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <4 x i32> zeroinitializer + %3 = xor <4 x i64> %1, + %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> zeroinitializer + %5 = and <4 x i64> %a0, %2 + %6 = and <4 x i64> %a1, %4 + %7 = or <4 x i64> %5, %6 + ret <4 x i64> %7 +} + +define <4 x i64> @bitselect_v4i64_broadcast_rrm(<4 x i64> %a0, <4 x i64> %a1, i64* %p2) { +; SSE-LABEL: bitselect_v4i64_broadcast_rrm: +; SSE: # %bb.0: +; SSE-NEXT: movq {{.*#+}} xmm4 = mem[0],zero +; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1] +; SSE-NEXT: pand %xmm4, %xmm1 +; SSE-NEXT: pand %xmm4, %xmm0 +; SSE-NEXT: movdqa %xmm4, %xmm5 +; SSE-NEXT: pandn %xmm3, %xmm5 +; SSE-NEXT: por %xmm5, %xmm1 +; SSE-NEXT: pandn %xmm2, %xmm4 +; SSE-NEXT: por %xmm4, %xmm0 +; SSE-NEXT: retq +; +; XOP-LABEL: bitselect_v4i64_broadcast_rrm: +; XOP: # %bb.0: +; XOP-NEXT: vbroadcastsd (%rdi), %ymm2 +; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 +; XOP-NEXT: retq +; +; AVX-LABEL: bitselect_v4i64_broadcast_rrm: +; AVX: # %bb.0: +; AVX-NEXT: vbroadcastsd (%rdi), %ymm2 +; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX-NEXT: vandnps %ymm1, %ymm2, %ymm1 +; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: bitselect_v4i64_broadcast_rrm: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vbroadcastsd (%rdi), %ymm2 +; AVX512F-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vandnps %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v4i64_broadcast_rrm: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpternlogq $228, (%rdi){1to4}, %ymm1, %ymm0 +; AVX512VL-NEXT: retq + %a2 = load i64, i64* %p2 + %1 = insertelement <4 x i64> undef, i64 %a2, i32 0 + %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <4 x i32> zeroinitializer + %3 = xor <4 x i64> %1, + %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> zeroinitializer + %5 = and <4 x i64> %a0, %2 + %6 = and <4 x i64> %a1, %4 + %7 = or <4 x i64> %5, %6 + ret <4 x i64> %7 +} + ; ; 512-bit vectors ; @@ -305,32 +660,21 @@ define <8 x i64> @bitselect_v8i64_rr(<8 x i64>, <8 x i64>) { ; XOP-NEXT: vpcmov %ymm4, %ymm1, %ymm3, %ymm1 ; XOP-NEXT: retq ; -; AVX1-LABEL: bitselect_v8i64_rr: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [18446744069414584319,18446744060824649725,18446744060824649725,18446744060824649725] -; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3 -; AVX1-NEXT: vandps %ymm4, %ymm2, %ymm2 -; AVX1-NEXT: vandnps %ymm0, %ymm4, %ymm0 -; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0 -; AVX1-NEXT: vandnps %ymm1, %ymm4, %ymm1 -; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX1-NEXT: retq -; -; AVX2-LABEL: bitselect_v8i64_rr: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovaps {{.*#+}} ymm4 = [18446744069414584319,18446744060824649725,18446744060824649725,18446744060824649725] -; AVX2-NEXT: vandps %ymm4, %ymm3, %ymm3 -; AVX2-NEXT: vandps %ymm4, %ymm2, %ymm2 -; AVX2-NEXT: vandnps %ymm0, %ymm4, %ymm0 -; AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vandnps %ymm1, %ymm4, %ymm1 -; AVX2-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: retq +; AVX-LABEL: bitselect_v8i64_rr: +; AVX: # %bb.0: +; AVX-NEXT: vmovaps {{.*#+}} ymm4 = [18446744069414584319,18446744060824649725,18446744060824649725,18446744060824649725] +; AVX-NEXT: vandps %ymm4, %ymm3, %ymm3 +; AVX-NEXT: vandps %ymm4, %ymm2, %ymm2 +; AVX-NEXT: vandnps %ymm0, %ymm4, %ymm0 +; AVX-NEXT: vorps %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vandnps %ymm1, %ymm4, %ymm1 +; AVX-NEXT: vorps %ymm1, %ymm3, %ymm1 +; AVX-NEXT: retq ; -; AVX512F-LABEL: bitselect_v8i64_rr: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 -; AVX512F-NEXT: retq +; AVX512-LABEL: bitselect_v8i64_rr: +; AVX512: # %bb.0: +; AVX512-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512-NEXT: retq %3 = and <8 x i64> %0, %4 = and <8 x i64> %1, %5 = or <8 x i64> %4, %3 @@ -376,35 +720,23 @@ define <8 x i64> @bitselect_v8i64_rm(<8 x i64>, <8 x i64>* nocapture readonly) { ; XOP-NEXT: vpcmov %ymm4, %ymm1, %ymm3, %ymm1 ; XOP-NEXT: retq ; -; AVX1-LABEL: bitselect_v8i64_rm: -; AVX1: # %bb.0: -; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612] -; AVX1-NEXT: # ymm2 = mem[0,1,0,1] -; AVX1-NEXT: vandps 32(%rdi), %ymm2, %ymm3 -; AVX1-NEXT: vandps (%rdi), %ymm2, %ymm4 -; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 -; AVX1-NEXT: vorps %ymm0, %ymm4, %ymm0 -; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 -; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX1-NEXT: retq -; -; AVX2-LABEL: bitselect_v8i64_rm: -; AVX2: # %bb.0: -; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612] -; AVX2-NEXT: # ymm2 = mem[0,1,0,1] -; AVX2-NEXT: vandps 32(%rdi), %ymm2, %ymm3 -; AVX2-NEXT: vandps (%rdi), %ymm2, %ymm4 -; AVX2-NEXT: vandnps %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vorps %ymm0, %ymm4, %ymm0 -; AVX2-NEXT: vandnps %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: retq +; AVX-LABEL: bitselect_v8i64_rm: +; AVX: # %bb.0: +; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612] +; AVX-NEXT: # ymm2 = mem[0,1,0,1] +; AVX-NEXT: vandps 32(%rdi), %ymm2, %ymm3 +; AVX-NEXT: vandps (%rdi), %ymm2, %ymm4 +; AVX-NEXT: vandnps %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vorps %ymm0, %ymm4, %ymm0 +; AVX-NEXT: vandnps %ymm1, %ymm2, %ymm1 +; AVX-NEXT: vorps %ymm1, %ymm3, %ymm1 +; AVX-NEXT: retq ; -; AVX512F-LABEL: bitselect_v8i64_rm: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1 -; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 -; AVX512F-NEXT: retq +; AVX512-LABEL: bitselect_v8i64_rm: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm1 +; AVX512-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512-NEXT: retq %3 = load <8 x i64>, <8 x i64>* %1 %4 = and <8 x i64> %0, %5 = and <8 x i64> %3, @@ -451,35 +783,23 @@ define <8 x i64> @bitselect_v8i64_mr(<8 x i64>* nocapture readonly, <8 x i64>) { ; XOP-NEXT: vpcmov %ymm4, %ymm1, %ymm3, %ymm1 ; XOP-NEXT: retq ; -; AVX1-LABEL: bitselect_v8i64_mr: -; AVX1: # %bb.0: -; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [12884901890,4294967296,12884901890,4294967296] -; AVX1-NEXT: # ymm2 = mem[0,1,0,1] -; AVX1-NEXT: vandps 32(%rdi), %ymm2, %ymm3 -; AVX1-NEXT: vandps (%rdi), %ymm2, %ymm4 -; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 -; AVX1-NEXT: vorps %ymm0, %ymm4, %ymm0 -; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 -; AVX1-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX1-NEXT: retq -; -; AVX2-LABEL: bitselect_v8i64_mr: -; AVX2: # %bb.0: -; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [12884901890,4294967296,12884901890,4294967296] -; AVX2-NEXT: # ymm2 = mem[0,1,0,1] -; AVX2-NEXT: vandps 32(%rdi), %ymm2, %ymm3 -; AVX2-NEXT: vandps (%rdi), %ymm2, %ymm4 -; AVX2-NEXT: vandnps %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vorps %ymm0, %ymm4, %ymm0 -; AVX2-NEXT: vandnps %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vorps %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: retq +; AVX-LABEL: bitselect_v8i64_mr: +; AVX: # %bb.0: +; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [12884901890,4294967296,12884901890,4294967296] +; AVX-NEXT: # ymm2 = mem[0,1,0,1] +; AVX-NEXT: vandps 32(%rdi), %ymm2, %ymm3 +; AVX-NEXT: vandps (%rdi), %ymm2, %ymm4 +; AVX-NEXT: vandnps %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vorps %ymm0, %ymm4, %ymm0 +; AVX-NEXT: vandnps %ymm1, %ymm2, %ymm1 +; AVX-NEXT: vorps %ymm1, %ymm3, %ymm1 +; AVX-NEXT: retq ; -; AVX512F-LABEL: bitselect_v8i64_mr: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1 -; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 -; AVX512F-NEXT: retq +; AVX512-LABEL: bitselect_v8i64_mr: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm1 +; AVX512-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512-NEXT: retq %3 = load <8 x i64>, <8 x i64>* %0 %4 = and <8 x i64> %3, %5 = and <8 x i64> %1, @@ -522,36 +842,24 @@ define <8 x i64> @bitselect_v8i64_mm(<8 x i64>* nocapture readonly, <8 x i64>* n ; XOP-NEXT: vpcmov %ymm2, 32(%rdi), %ymm1, %ymm1 ; XOP-NEXT: retq ; -; AVX1-LABEL: bitselect_v8i64_mm: -; AVX1: # %bb.0: -; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] -; AVX1-NEXT: # ymm1 = mem[0,1,0,1] -; AVX1-NEXT: vandps 32(%rsi), %ymm1, %ymm2 -; AVX1-NEXT: vandps (%rsi), %ymm1, %ymm0 -; AVX1-NEXT: vandnps (%rdi), %ymm1, %ymm3 -; AVX1-NEXT: vorps %ymm3, %ymm0, %ymm0 -; AVX1-NEXT: vandnps 32(%rdi), %ymm1, %ymm1 -; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1 -; AVX1-NEXT: retq -; -; AVX2-LABEL: bitselect_v8i64_mm: -; AVX2: # %bb.0: -; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] -; AVX2-NEXT: # ymm1 = mem[0,1,0,1] -; AVX2-NEXT: vandps 32(%rsi), %ymm1, %ymm2 -; AVX2-NEXT: vandps (%rsi), %ymm1, %ymm0 -; AVX2-NEXT: vandnps (%rdi), %ymm1, %ymm3 -; AVX2-NEXT: vorps %ymm3, %ymm0, %ymm0 -; AVX2-NEXT: vandnps 32(%rdi), %ymm1, %ymm1 -; AVX2-NEXT: vorps %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: retq +; AVX-LABEL: bitselect_v8i64_mm: +; AVX: # %bb.0: +; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] +; AVX-NEXT: # ymm1 = mem[0,1,0,1] +; AVX-NEXT: vandps 32(%rsi), %ymm1, %ymm2 +; AVX-NEXT: vandps (%rsi), %ymm1, %ymm0 +; AVX-NEXT: vandnps (%rdi), %ymm1, %ymm3 +; AVX-NEXT: vorps %ymm3, %ymm0, %ymm0 +; AVX-NEXT: vandnps 32(%rdi), %ymm1, %ymm1 +; AVX-NEXT: vorps %ymm1, %ymm2, %ymm1 +; AVX-NEXT: retq ; -; AVX512F-LABEL: bitselect_v8i64_mm: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm1 -; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] -; AVX512F-NEXT: vpternlogq $202, (%rdi), %zmm1, %zmm0 -; AVX512F-NEXT: retq +; AVX512-LABEL: bitselect_v8i64_mm: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa64 (%rsi), %zmm1 +; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm0 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] +; AVX512-NEXT: vpternlogq $202, (%rdi), %zmm1, %zmm0 +; AVX512-NEXT: retq %3 = load <8 x i64>, <8 x i64>* %0 %4 = load <8 x i64>, <8 x i64>* %1 %5 = and <8 x i64> %3, @@ -560,6 +868,142 @@ define <8 x i64> @bitselect_v8i64_mm(<8 x i64>* nocapture readonly, <8 x i64>* n ret <8 x i64> %7 } +define <8 x i64> @bitselect_v8i64_broadcast_rrr(<8 x i64> %a0, <8 x i64> %a1, i64 %a2) { +; SSE-LABEL: bitselect_v8i64_broadcast_rrr: +; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %xmm8 +; SSE-NEXT: pshufd {{.*#+}} xmm8 = xmm8[0,1,0,1] +; SSE-NEXT: pand %xmm8, %xmm3 +; SSE-NEXT: pand %xmm8, %xmm2 +; SSE-NEXT: pand %xmm8, %xmm1 +; SSE-NEXT: pand %xmm8, %xmm0 +; SSE-NEXT: movdqa %xmm8, %xmm9 +; SSE-NEXT: pandn %xmm7, %xmm9 +; SSE-NEXT: por %xmm9, %xmm3 +; SSE-NEXT: movdqa %xmm8, %xmm7 +; SSE-NEXT: pandn %xmm6, %xmm7 +; SSE-NEXT: por %xmm7, %xmm2 +; SSE-NEXT: movdqa %xmm8, %xmm6 +; SSE-NEXT: pandn %xmm5, %xmm6 +; SSE-NEXT: por %xmm6, %xmm1 +; SSE-NEXT: pandn %xmm4, %xmm8 +; SSE-NEXT: por %xmm8, %xmm0 +; SSE-NEXT: retq +; +; XOP-LABEL: bitselect_v8i64_broadcast_rrr: +; XOP: # %bb.0: +; XOP-NEXT: vmovq %rdi, %xmm4 +; XOP-NEXT: vmovq %rdi, %xmm5 +; XOP-NEXT: vmovddup {{.*#+}} xmm4 = xmm4[0,0] +; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4 +; XOP-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[0,1,0,1] +; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm5, %ymm5 +; XOP-NEXT: vandps %ymm4, %ymm1, %ymm1 +; XOP-NEXT: vandps %ymm4, %ymm0, %ymm0 +; XOP-NEXT: vandnps %ymm3, %ymm5, %ymm3 +; XOP-NEXT: vorps %ymm3, %ymm1, %ymm1 +; XOP-NEXT: vandnps %ymm2, %ymm5, %ymm2 +; XOP-NEXT: vorps %ymm2, %ymm0, %ymm0 +; XOP-NEXT: retq +; +; AVX1-LABEL: bitselect_v8i64_broadcast_rrr: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovq %rdi, %xmm4 +; AVX1-NEXT: vmovq %rdi, %xmm5 +; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = xmm4[0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4 +; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[0,1,0,1] +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm5, %ymm5 +; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1 +; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vandnps %ymm3, %ymm5, %ymm3 +; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 +; AVX1-NEXT: vandnps %ymm2, %ymm5, %ymm2 +; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: bitselect_v8i64_broadcast_rrr: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovq %rdi, %xmm4 +; AVX2-NEXT: vpbroadcastq %xmm4, %ymm4 +; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 +; AVX2-NEXT: vpandn %ymm3, %ymm4, %ymm3 +; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpandn %ymm2, %ymm4, %ymm2 +; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: bitselect_v8i64_broadcast_rrr: +; AVX512: # %bb.0: +; AVX512-NEXT: vpbroadcastq %rdi, %zmm2 +; AVX512-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512-NEXT: retq + %1 = insertelement <8 x i64> undef, i64 %a2, i32 0 + %2 = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> zeroinitializer + %3 = xor <8 x i64> %1, + %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> zeroinitializer + %5 = and <8 x i64> %a0, %2 + %6 = and <8 x i64> %a1, %4 + %7 = or <8 x i64> %5, %6 + ret <8 x i64> %7 +} + +define <8 x i64> @bitselect_v8i64_broadcast_rrm(<8 x i64> %a0, <8 x i64> %a1, i64* %p2) { +; SSE-LABEL: bitselect_v8i64_broadcast_rrm: +; SSE: # %bb.0: +; SSE-NEXT: movq {{.*#+}} xmm8 = mem[0],zero +; SSE-NEXT: pshufd {{.*#+}} xmm8 = xmm8[0,1,0,1] +; SSE-NEXT: pand %xmm8, %xmm3 +; SSE-NEXT: pand %xmm8, %xmm2 +; SSE-NEXT: pand %xmm8, %xmm1 +; SSE-NEXT: pand %xmm8, %xmm0 +; SSE-NEXT: movdqa %xmm8, %xmm9 +; SSE-NEXT: pandn %xmm7, %xmm9 +; SSE-NEXT: por %xmm9, %xmm3 +; SSE-NEXT: movdqa %xmm8, %xmm7 +; SSE-NEXT: pandn %xmm6, %xmm7 +; SSE-NEXT: por %xmm7, %xmm2 +; SSE-NEXT: movdqa %xmm8, %xmm6 +; SSE-NEXT: pandn %xmm5, %xmm6 +; SSE-NEXT: por %xmm6, %xmm1 +; SSE-NEXT: pandn %xmm4, %xmm8 +; SSE-NEXT: por %xmm8, %xmm0 +; SSE-NEXT: retq +; +; XOP-LABEL: bitselect_v8i64_broadcast_rrm: +; XOP: # %bb.0: +; XOP-NEXT: vbroadcastsd (%rdi), %ymm4 +; XOP-NEXT: vpcmov %ymm4, %ymm2, %ymm0, %ymm0 +; XOP-NEXT: vpcmov %ymm4, %ymm3, %ymm1, %ymm1 +; XOP-NEXT: retq +; +; AVX-LABEL: bitselect_v8i64_broadcast_rrm: +; AVX: # %bb.0: +; AVX-NEXT: vbroadcastsd (%rdi), %ymm4 +; AVX-NEXT: vandps %ymm4, %ymm1, %ymm1 +; AVX-NEXT: vandps %ymm4, %ymm0, %ymm0 +; AVX-NEXT: vandnps %ymm3, %ymm4, %ymm3 +; AVX-NEXT: vorps %ymm3, %ymm1, %ymm1 +; AVX-NEXT: vandnps %ymm2, %ymm4, %ymm2 +; AVX-NEXT: vorps %ymm2, %ymm0, %ymm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: bitselect_v8i64_broadcast_rrm: +; AVX512: # %bb.0: +; AVX512-NEXT: vpternlogq $228, (%rdi){1to8}, %zmm1, %zmm0 +; AVX512-NEXT: retq + %a2 = load i64, i64* %p2 + %1 = insertelement <8 x i64> undef, i64 %a2, i32 0 + %2 = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> zeroinitializer + %3 = xor <8 x i64> %1, + %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> zeroinitializer + %5 = and <8 x i64> %a0, %2 + %6 = and <8 x i64> %a1, %4 + %7 = or <8 x i64> %5, %6 + ret <8 x i64> %7 +} + ; Check that mask registers don't get canonicalized. define <4 x i1> @bitselect_v4i1_loop(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: bitselect_v4i1_loop: @@ -617,6 +1061,17 @@ define <4 x i1> @bitselect_v4i1_loop(<4 x i32> %a0, <4 x i32> %a1) { ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v4i1_loop: +; AVX512VL: # %bb.0: # %bb +; AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip){1to4}, %xmm1, %k1 +; AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip){1to4}, %xmm1, %k2 +; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0 {%k2} +; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 {%k1} +; AVX512VL-NEXT: korw %k0, %k1, %k1 +; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512VL-NEXT: retq bb: %tmp = icmp ne <4 x i32> %a0, zeroinitializer %tmp2 = icmp eq <4 x i32> %a1, diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll index 8c38352c6b2f3..98fbd37f0d640 100644 --- a/llvm/test/CodeGen/X86/combine-mul.ll +++ b/llvm/test/CodeGen/X86/combine-mul.ll @@ -283,6 +283,77 @@ define <4 x i32> @combine_vec_mul_add(<4 x i32> %x) { ret <4 x i32> %2 } +; TODO fold mul(abs(x),abs(x)) -> mul(x,x) + +define i31 @combine_mul_abs_i31(i31 %0) { +; SSE-LABEL: combine_mul_abs_i31: +; SSE: # %bb.0: +; SSE-NEXT: addl %edi, %edi +; SSE-NEXT: sarl %edi +; SSE-NEXT: movl %edi, %eax +; SSE-NEXT: negl %eax +; SSE-NEXT: cmovll %edi, %eax +; SSE-NEXT: imull %eax, %eax +; SSE-NEXT: retq +; +; AVX-LABEL: combine_mul_abs_i31: +; AVX: # %bb.0: +; AVX-NEXT: addl %edi, %edi +; AVX-NEXT: sarl %edi +; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: negl %eax +; AVX-NEXT: cmovll %edi, %eax +; AVX-NEXT: imull %eax, %eax +; AVX-NEXT: retq + %c = icmp slt i31 %0, 0 + %s = sub nsw i31 0, %0 + %r = select i1 %c, i31 %s, i31 %0 + %m = mul i31 %r, %r + ret i31 %m +} + +define i32 @combine_mul_abs_i32(i32 %0) { +; SSE-LABEL: combine_mul_abs_i32: +; SSE: # %bb.0: +; SSE-NEXT: movl %edi, %eax +; SSE-NEXT: negl %eax +; SSE-NEXT: cmovll %edi, %eax +; SSE-NEXT: imull %eax, %eax +; SSE-NEXT: retq +; +; AVX-LABEL: combine_mul_abs_i32: +; AVX: # %bb.0: +; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: negl %eax +; AVX-NEXT: cmovll %edi, %eax +; AVX-NEXT: imull %eax, %eax +; AVX-NEXT: retq + %c = icmp slt i32 %0, 0 + %s = sub nsw i32 0, %0 + %r = select i1 %c, i32 %s, i32 %0 + %m = mul i32 %r, %r + ret i32 %m +} + +define <4 x i32> @combine_mul_abs_v4i32(<4 x i32> %0) { +; SSE-LABEL: combine_mul_abs_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pabsd %xmm0, %xmm0 +; SSE-NEXT: pmulld %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_mul_abs_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: vpabsd %xmm0, %xmm0 +; AVX-NEXT: vpmulld %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq + %c = icmp slt <4 x i32> %0, zeroinitializer + %s = sub nsw <4 x i32> zeroinitializer, %0 + %r = select <4 x i1> %c, <4 x i32> %s, <4 x i32> %0 + %m = mul <4 x i32> %r, %r + ret <4 x i32> %m +} + ; This would infinite loop because DAGCombiner wants to turn this into a shift, ; but x86 lowering wants to avoid non-uniform vector shift amounts. diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll index e7e14f81c5fd9..286ef85195a69 100644 --- a/llvm/test/CodeGen/X86/combine-udiv.ll +++ b/llvm/test/CodeGen/X86/combine-udiv.ll @@ -653,7 +653,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; SSE2-NEXT: movd %eax, %xmm3 ; SSE2-NEXT: pmullw %xmm0, %xmm3 ; SSE2-NEXT: psrlw $8, %xmm3 -; SSE2-NEXT: packuswb %xmm0, %xmm3 +; SSE2-NEXT: packuswb %xmm3, %xmm3 ; SSE2-NEXT: psrlw $7, %xmm3 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm3 ; SSE2-NEXT: pandn %xmm3, %xmm2 @@ -669,7 +669,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; SSE41-NEXT: pmullw %xmm0, %xmm2 ; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: packuswb %xmm0, %xmm2 +; SSE41-NEXT: packuswb %xmm2, %xmm2 ; SSE41-NEXT: psrlw $7, %xmm2 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] @@ -684,7 +684,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX-NEXT: vpmullw %xmm1, %xmm2, %xmm1 ; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX-NEXT: vpackuswb %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vpackuswb %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpsrlw $7, %xmm1, %xmm1 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] diff --git a/llvm/test/CodeGen/X86/concat-cast.ll b/llvm/test/CodeGen/X86/concat-cast.ll new file mode 100644 index 0000000000000..6162bafa96111 --- /dev/null +++ b/llvm/test/CodeGen/X86/concat-cast.ll @@ -0,0 +1,501 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 + +define <4 x float> @sitofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) { +; SSE-LABEL: sitofp_v4i32_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-NEXT: cvtdq2ps %xmm1, %xmm1 +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; AVX-LABEL: sitofp_v4i32_v4f32: +; AVX: # %bb.0: +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq + %s0 = sitofp <2 x i32> %x to <2 x float> + %s1 = sitofp <2 x i32> %y to <2 x float> + %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> + ret <4 x float> %r +} + +define <4 x float> @uitofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) { +; SSE2-LABEL: uitofp_v4i32_v4f32: +; SSE2: # %bb.0: +; SSE2-NEXT: xorpd %xmm2, %xmm2 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15] +; SSE2-NEXT: orpd %xmm3, %xmm0 +; SSE2-NEXT: subpd %xmm3, %xmm0 +; SSE2-NEXT: cvtpd2ps %xmm0, %xmm0 +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: subpd %xmm3, %xmm1 +; SSE2-NEXT: cvtpd2ps %xmm1, %xmm1 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; SSE4-LABEL: uitofp_v4i32_v4f32: +; SSE4: # %bb.0: +; SSE4-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; SSE4-NEXT: por %xmm2, %xmm0 +; SSE4-NEXT: subpd %xmm2, %xmm0 +; SSE4-NEXT: cvtpd2ps %xmm0, %xmm0 +; SSE4-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; SSE4-NEXT: por %xmm2, %xmm1 +; SSE4-NEXT: subpd %xmm2, %xmm1 +; SSE4-NEXT: cvtpd2ps %xmm1, %xmm1 +; SSE4-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE4-NEXT: retq +; +; AVX1-LABEL: uitofp_v4i32_v4f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vsubpd %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vcvtpd2ps %xmm1, %xmm1 +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_v4i32_v4f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vcvtpd2ps %xmm0, %xmm0 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vsubpd %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vcvtpd2ps %xmm1, %xmm1 +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX2-NEXT: retq +; +; AVX512-LABEL: uitofp_v4i32_v4f32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512-NEXT: vcvtudq2ps %zmm1, %zmm1 +; AVX512-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %s0 = uitofp <2 x i32> %x to <2 x float> + %s1 = uitofp <2 x i32> %y to <2 x float> + %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> + ret <4 x float> %r +} + +define <4 x i32> @fptosi_v4f32_v4i32(<2 x float> %x, <2 x float> %y) { +; SSE-LABEL: fptosi_v4f32_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: cvttps2dq %xmm1, %xmm1 +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; AVX-LABEL: fptosi_v4f32_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX-NEXT: vcvttps2dq %xmm1, %xmm1 +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq + %s0 = fptosi <2 x float> %x to <2 x i32> + %s1 = fptosi <2 x float> %y to <2 x i32> + %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @fptoui_v4f32_v4i32(<2 x float> %x, <2 x float> %y) { +; SSE2-LABEL: fptoui_v4f32_v4i32: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps {{.*#+}} xmm3 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: cmpltps %xmm3, %xmm2 +; SSE2-NEXT: cvttps2dq %xmm0, %xmm4 +; SSE2-NEXT: subps %xmm3, %xmm0 +; SSE2-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE2-NEXT: movaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: xorps %xmm5, %xmm0 +; SSE2-NEXT: andps %xmm2, %xmm4 +; SSE2-NEXT: andnps %xmm0, %xmm2 +; SSE2-NEXT: orps %xmm4, %xmm2 +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: cmpltps %xmm3, %xmm0 +; SSE2-NEXT: cvttps2dq %xmm1, %xmm4 +; SSE2-NEXT: subps %xmm3, %xmm1 +; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 +; SSE2-NEXT: xorps %xmm5, %xmm1 +; SSE2-NEXT: andps %xmm0, %xmm4 +; SSE2-NEXT: andnps %xmm1, %xmm0 +; SSE2-NEXT: orps %xmm4, %xmm0 +; SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE4-LABEL: fptoui_v4f32_v4i32: +; SSE4: # %bb.0: +; SSE4-NEXT: movaps {{.*#+}} xmm4 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] +; SSE4-NEXT: movaps %xmm0, %xmm2 +; SSE4-NEXT: cmpltps %xmm4, %xmm2 +; SSE4-NEXT: cvttps2dq %xmm0, %xmm5 +; SSE4-NEXT: subps %xmm4, %xmm0 +; SSE4-NEXT: cvttps2dq %xmm0, %xmm3 +; SSE4-NEXT: movaps {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648] +; SSE4-NEXT: xorps %xmm6, %xmm3 +; SSE4-NEXT: movaps %xmm2, %xmm0 +; SSE4-NEXT: blendvps %xmm0, %xmm5, %xmm3 +; SSE4-NEXT: movaps %xmm1, %xmm0 +; SSE4-NEXT: cmpltps %xmm4, %xmm0 +; SSE4-NEXT: cvttps2dq %xmm1, %xmm2 +; SSE4-NEXT: subps %xmm4, %xmm1 +; SSE4-NEXT: cvttps2dq %xmm1, %xmm1 +; SSE4-NEXT: xorps %xmm6, %xmm1 +; SSE4-NEXT: blendvps %xmm0, %xmm2, %xmm1 +; SSE4-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm1[0] +; SSE4-NEXT: movaps %xmm3, %xmm0 +; SSE4-NEXT: retq +; +; AVX1-LABEL: fptoui_v4f32_v4i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] +; AVX1-NEXT: vcmpltps %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vsubps %xmm2, %xmm0, %xmm4 +; AVX1-NEXT: vcvttps2dq %xmm4, %xmm4 +; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] +; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX1-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vcmpltps %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vsubps %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 +; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 +; AVX1-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: retq +; +; AVX2-LABEL: fptoui_v4f32_v4i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] +; AVX2-NEXT: vcmpltps %xmm2, %xmm0, %xmm3 +; AVX2-NEXT: vsubps %xmm2, %xmm0, %xmm4 +; AVX2-NEXT: vcvttps2dq %xmm4, %xmm4 +; AVX2-NEXT: vbroadcastss {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] +; AVX2-NEXT: vxorps %xmm5, %xmm4, %xmm4 +; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX2-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm0 +; AVX2-NEXT: vcmpltps %xmm2, %xmm1, %xmm3 +; AVX2-NEXT: vsubps %xmm2, %xmm1, %xmm2 +; AVX2-NEXT: vcvttps2dq %xmm2, %xmm2 +; AVX2-NEXT: vxorps %xmm5, %xmm2, %xmm2 +; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1 +; AVX2-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX2-NEXT: retq +; +; AVX512-LABEL: fptoui_v4f32_v4i32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512-NEXT: vcvttps2udq %zmm0, %zmm0 +; AVX512-NEXT: vcvttps2udq %zmm1, %zmm1 +; AVX512-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %s0 = fptoui <2 x float> %x to <2 x i32> + %s1 = fptoui <2 x float> %y to <2 x i32> + %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> + ret <4 x i32> %r +} + +define <4 x double> @sitofp_v4i32_v4f64(<2 x i32> %x, <2 x i32> %y) { +; SSE-LABEL: sitofp_v4i32_v4f64: +; SSE: # %bb.0: +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: sitofp_v4i32_v4f64: +; AVX: # %bb.0: +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm1, %xmm1 +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: retq + %s0 = sitofp <2 x i32> %x to <2 x double> + %s1 = sitofp <2 x i32> %y to <2 x double> + %r = shufflevector <2 x double> %s0, <2 x double> %s1, <4 x i32> + ret <4 x double> %r +} + +define <4 x double> @uitofp_v4i32_v4f64(<2 x i32> %x, <2 x i32> %y) { +; SSE2-LABEL: uitofp_v4i32_v4f64: +; SSE2: # %bb.0: +; SSE2-NEXT: xorpd %xmm2, %xmm2 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15] +; SSE2-NEXT: orpd %xmm3, %xmm0 +; SSE2-NEXT: subpd %xmm3, %xmm0 +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: orpd %xmm3, %xmm1 +; SSE2-NEXT: subpd %xmm3, %xmm1 +; SSE2-NEXT: retq +; +; SSE4-LABEL: uitofp_v4i32_v4f64: +; SSE4: # %bb.0: +; SSE4-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; SSE4-NEXT: por %xmm2, %xmm0 +; SSE4-NEXT: subpd %xmm2, %xmm0 +; SSE4-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; SSE4-NEXT: por %xmm2, %xmm1 +; SSE4-NEXT: subpd %xmm2, %xmm1 +; SSE4-NEXT: retq +; +; AVX1-LABEL: uitofp_v4i32_v4f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vsubpd %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: uitofp_v4i32_v4f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vsubpd %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: uitofp_v4i32_v4f64: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512-NEXT: vcvtudq2pd %ymm1, %zmm1 +; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %s0 = uitofp <2 x i32> %x to <2 x double> + %s1 = uitofp <2 x i32> %y to <2 x double> + %r = shufflevector <2 x double> %s0, <2 x double> %s1, <4 x i32> + ret <4 x double> %r +} + +define <4 x i32> @fptosi_v4f64_v4i32(<2 x double> %x, <2 x double> %y) { +; SSE-LABEL: fptosi_v4f64_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: cvttpd2dq %xmm1, %xmm1 +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; AVX-LABEL: fptosi_v4f64_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX-NEXT: vcvttpd2dq %xmm1, %xmm1 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq + %s0 = fptosi <2 x double> %x to <2 x i32> + %s1 = fptosi <2 x double> %y to <2 x i32> + %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @fptoui_v4f64_v4i32(<2 x double> %x, <2 x double> %y) { +; SSE2-LABEL: fptoui_v4f64_v4i32: +; SSE2: # %bb.0: +; SSE2-NEXT: cvttsd2si %xmm0, %rax +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: cvttsd2si %xmm0, %rcx +; SSE2-NEXT: cvttsd2si %xmm1, %rdx +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; SSE2-NEXT: cvttsd2si %xmm1, %rsi +; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: movd %esi, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movd %ecx, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; SSE4-LABEL: fptoui_v4f64_v4i32: +; SSE4: # %bb.0: +; SSE4-NEXT: cvttsd2si %xmm0, %rax +; SSE4-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; SSE4-NEXT: cvttsd2si %xmm0, %rcx +; SSE4-NEXT: cvttsd2si %xmm1, %rdx +; SSE4-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; SSE4-NEXT: cvttsd2si %xmm1, %rsi +; SSE4-NEXT: movd %eax, %xmm0 +; SSE4-NEXT: pinsrd $1, %ecx, %xmm0 +; SSE4-NEXT: pinsrd $2, %edx, %xmm0 +; SSE4-NEXT: pinsrd $3, %esi, %xmm0 +; SSE4-NEXT: retq +; +; AVX1-LABEL: fptoui_v4f64_v4i32: +; AVX1: # %bb.0: +; AVX1-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 +; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] +; AVX1-NEXT: vcmpltpd %ymm2, %ymm0, %ymm3 +; AVX1-NEXT: vpackssdw %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vsubpd %ymm2, %ymm0, %ymm4 +; AVX1-NEXT: vcvttpd2dq %ymm4, %xmm4 +; AVX1-NEXT: vmovapd {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] +; AVX1-NEXT: vxorpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0 +; AVX1-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vcmpltpd %ymm2, %ymm1, %ymm3 +; AVX1-NEXT: vpackssdw %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vsubpd %ymm2, %ymm1, %ymm2 +; AVX1-NEXT: vcvttpd2dq %ymm2, %xmm2 +; AVX1-NEXT: vxorpd %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1 +; AVX1-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: fptoui_v4f64_v4i32: +; AVX2: # %bb.0: +; AVX2-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 +; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] +; AVX2-NEXT: vcmpltpd %ymm2, %ymm0, %ymm3 +; AVX2-NEXT: vpackssdw %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm4 +; AVX2-NEXT: vcvttpd2dq %ymm4, %xmm4 +; AVX2-NEXT: vbroadcastss {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] +; AVX2-NEXT: vxorpd %xmm5, %xmm4, %xmm4 +; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0 +; AVX2-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm0 +; AVX2-NEXT: vcmpltpd %ymm2, %ymm1, %ymm3 +; AVX2-NEXT: vpackssdw %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vsubpd %ymm2, %ymm1, %ymm2 +; AVX2-NEXT: vcvttpd2dq %ymm2, %xmm2 +; AVX2-NEXT: vxorpd %xmm5, %xmm2, %xmm2 +; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1 +; AVX2-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: fptoui_v4f64_v4i32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512-NEXT: vcvttpd2udq %zmm0, %ymm0 +; AVX512-NEXT: vcvttpd2udq %zmm1, %ymm1 +; AVX512-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %s0 = fptoui <2 x double> %x to <2 x i32> + %s1 = fptoui <2 x double> %y to <2 x i32> + %r = shufflevector <2 x i32> %s0, <2 x i32> %s1, <4 x i32> + ret <4 x i32> %r +} + +define <4 x float> @mismatch_tofp_v4i32_v4f32(<2 x i32> %x, <2 x i32> %y) { +; SSE2-LABEL: mismatch_tofp_v4i32_v4f32: +; SSE2: # %bb.0: +; SSE2-NEXT: xorpd %xmm2, %xmm2 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movapd {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; SSE2-NEXT: orpd %xmm2, %xmm0 +; SSE2-NEXT: subpd %xmm2, %xmm0 +; SSE2-NEXT: cvtpd2ps %xmm0, %xmm0 +; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; SSE4-LABEL: mismatch_tofp_v4i32_v4f32: +; SSE4: # %bb.0: +; SSE4-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; SSE4-NEXT: por %xmm2, %xmm0 +; SSE4-NEXT: subpd %xmm2, %xmm0 +; SSE4-NEXT: cvtpd2ps %xmm0, %xmm0 +; SSE4-NEXT: cvtdq2ps %xmm1, %xmm1 +; SSE4-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE4-NEXT: retq +; +; AVX1-LABEL: mismatch_tofp_v4i32_v4f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 +; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: retq +; +; AVX2-LABEL: mismatch_tofp_v4i32_v4f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] +; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vcvtpd2ps %xmm0, %xmm0 +; AVX2-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX2-NEXT: retq +; +; AVX512-LABEL: mismatch_tofp_v4i32_v4f32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX512-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %s0 = uitofp <2 x i32> %x to <2 x float> + %s1 = sitofp <2 x i32> %y to <2 x float> + %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> + ret <4 x float> %r +} + +define <4 x float> @sitofp_v4i32_v4f32_extra_use(<2 x i32> %x, <2 x i32> %y, <2 x float>* %p) { +; SSE-LABEL: sitofp_v4i32_v4f32_extra_use: +; SSE: # %bb.0: +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-NEXT: cvtdq2ps %xmm1, %xmm1 +; SSE-NEXT: movlps %xmm1, (%rdi) +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; AVX-LABEL: sitofp_v4i32_v4f32_extra_use: +; AVX: # %bb.0: +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm1, %xmm1 +; AVX-NEXT: vmovlps %xmm1, (%rdi) +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq + %s0 = sitofp <2 x i32> %x to <2 x float> + %s1 = sitofp <2 x i32> %y to <2 x float> + store <2 x float> %s1, <2 x float>* %p + %r = shufflevector <2 x float> %s0, <2 x float> %s1, <4 x i32> + ret <4 x float> %r +} diff --git a/llvm/test/CodeGen/X86/emit-big-cst.ll b/llvm/test/CodeGen/X86/emit-big-cst.ll index 51852d00f823b..930684cc8b1f6 100644 --- a/llvm/test/CodeGen/X86/emit-big-cst.ll +++ b/llvm/test/CodeGen/X86/emit-big-cst.ll @@ -3,7 +3,9 @@ ; CHECK: bigCst: ; CHECK-NEXT: .quad 12713950999227904 -; CHECK-NEXT: .quad 26220 +; CHECK-NEXT: .short 26220 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .zero 5 ; CHECK-NEXT: .size bigCst, 16 @bigCst = internal constant i82 483673642326615442599424 diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir index 69dd3e44aa3a0..fbfb58d75b71e 100755 --- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -19,444 +19,444 @@ body: | bb.0: ; CHECK: VMOVAPDYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVAPDZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 - ; CHECK: $ymm0 = VMOVAPDYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVAPDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVAPDYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVAPDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVAPDYrr $ymm0 $ymm0 = VMOVAPDZ256rr $ymm0 ; CHECK: $ymm0 = VMOVAPDYrr_REV $ymm0 $ymm0 = VMOVAPDZ256rr_REV $ymm0 ; CHECK: VMOVAPSYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVAPSZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 - ; CHECK: $ymm0 = VMOVAPSYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVAPSZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVAPSYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVAPSZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVAPSYrr $ymm0 $ymm0 = VMOVAPSZ256rr $ymm0 ; CHECK: $ymm0 = VMOVAPSYrr_REV $ymm0 $ymm0 = VMOVAPSZ256rr_REV $ymm0 - ; CHECK: $ymm0 = VMOVDDUPYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVDDUPZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVDDUPYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVDDUPZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVDDUPYrr $ymm0 $ymm0 = VMOVDDUPZ256rr $ymm0 ; CHECK: VMOVDQAYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVDQA32Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 - ; CHECK: $ymm0 = VMOVDQAYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVDQA32Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVDQAYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVDQA32Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVDQAYrr $ymm0 $ymm0 = VMOVDQA32Z256rr $ymm0 ; CHECK: $ymm0 = VMOVDQAYrr_REV $ymm0 $ymm0 = VMOVDQA32Z256rr_REV $ymm0 ; CHECK: VMOVDQAYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVDQA64Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 - ; CHECK: $ymm0 = VMOVDQAYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVDQA64Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVDQAYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVDQA64Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVDQAYrr $ymm0 $ymm0 = VMOVDQA64Z256rr $ymm0 ; CHECK: $ymm0 = VMOVDQAYrr_REV $ymm0 $ymm0 = VMOVDQA64Z256rr_REV $ymm0 ; CHECK: VMOVDQUYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVDQU16Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 - ; CHECK: $ymm0 = VMOVDQUYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVDQU16Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVDQUYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVDQU16Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVDQUYrr $ymm0 $ymm0 = VMOVDQU16Z256rr $ymm0 ; CHECK: $ymm0 = VMOVDQUYrr_REV $ymm0 $ymm0 = VMOVDQU16Z256rr_REV $ymm0 ; CHECK: VMOVDQUYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVDQU32Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 - ; CHECK: $ymm0 = VMOVDQUYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVDQU32Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVDQUYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVDQU32Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVDQUYrr $ymm0 $ymm0 = VMOVDQU32Z256rr $ymm0 ; CHECK: $ymm0 = VMOVDQUYrr_REV $ymm0 $ymm0 = VMOVDQU32Z256rr_REV $ymm0 ; CHECK: VMOVDQUYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVDQU64Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 - ; CHECK: $ymm0 = VMOVDQUYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVDQU64Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVDQUYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVDQU64Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVDQUYrr $ymm0 $ymm0 = VMOVDQU64Z256rr $ymm0 ; CHECK: $ymm0 = VMOVDQUYrr_REV $ymm0 $ymm0 = VMOVDQU64Z256rr_REV $ymm0 ; CHECK: VMOVDQUYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVDQU8Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 - ; CHECK: $ymm0 = VMOVDQUYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVDQU8Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVDQUYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVDQU8Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVDQUYrr $ymm0 $ymm0 = VMOVDQU8Z256rr $ymm0 ; CHECK: $ymm0 = VMOVDQUYrr_REV $ymm0 $ymm0 = VMOVDQU8Z256rr_REV $ymm0 - ; CHECK: $ymm0 = VMOVNTDQAYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVNTDQAZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVNTDQAYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVNTDQAZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: VMOVNTDQYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVNTDQZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 ; CHECK: VMOVNTPDYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVNTPDZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 ; CHECK: VMOVNTPSYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 - VMOVNTPSZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 - ; CHECK: $ymm0 = VMOVSHDUPYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVSHDUPZ256rm $rip, 1, $rax, 0, $noreg + VMOVNTPSZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 + ; CHECK: $ymm0 = VMOVSHDUPYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVSHDUPZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVSHDUPYrr $ymm0 $ymm0 = VMOVSHDUPZ256rr $ymm0 - ; CHECK: $ymm0 = VMOVSLDUPYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVSLDUPZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVSLDUPYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVSLDUPZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVSLDUPYrr $ymm0 $ymm0 = VMOVSLDUPZ256rr $ymm0 ; CHECK: VMOVUPDYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVUPDZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 - ; CHECK: $ymm0 = VMOVUPDYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VMOVUPDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VMOVUPDYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VMOVUPDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VMOVUPDYrr $ymm0 $ymm0 = VMOVUPDZ256rr $ymm0 ; CHECK: $ymm0 = VMOVUPDYrr_REV $ymm0 $ymm0 = VMOVUPDZ256rr_REV $ymm0 ; CHECK: VMOVUPSYmr $rdi, 1, $noreg, 0, $noreg, $ymm0 VMOVUPSZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm0 - ; CHECK: $ymm0 = VPANDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPANDDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPANDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPANDDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPANDYrr $ymm0, $ymm1 $ymm0 = VPANDDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPANDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPANDQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPANDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPANDQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPANDYrr $ymm0, $ymm1 $ymm0 = VPANDQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPANDNYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPANDNDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPANDNYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPANDNDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPANDNYrr $ymm0, $ymm1 $ymm0 = VPANDNDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPANDNYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPANDNQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPANDNYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPANDNQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPANDNYrr $ymm0, $ymm1 $ymm0 = VPANDNQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPAVGBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPAVGBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPAVGBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPAVGBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPAVGBYrr $ymm0, $ymm1 $ymm0 = VPAVGBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPAVGWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPAVGWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPAVGWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPAVGWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPAVGWYrr $ymm0, $ymm1 $ymm0 = VPAVGWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPADDBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPADDBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPADDBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPADDBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPADDBYrr $ymm0, $ymm1 $ymm0 = VPADDBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPADDDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPADDDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPADDDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPADDDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPADDDYrr $ymm0, $ymm1 $ymm0 = VPADDDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPADDQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPADDQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPADDQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPADDQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPADDQYrr $ymm0, $ymm1 $ymm0 = VPADDQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPADDSBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPADDSBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPADDSBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPADDSBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPADDSBYrr $ymm0, $ymm1 $ymm0 = VPADDSBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPADDSWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPADDSWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPADDSWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPADDSWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPADDSWYrr $ymm0, $ymm1 $ymm0 = VPADDSWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPADDUSBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPADDUSBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPADDUSBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPADDUSBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPADDUSBYrr $ymm0, $ymm1 $ymm0 = VPADDUSBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPADDUSWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPADDUSWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPADDUSWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPADDUSWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPADDUSWYrr $ymm0, $ymm1 $ymm0 = VPADDUSWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPADDWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPADDWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPADDWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPADDWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPADDWYrr $ymm0, $ymm1 $ymm0 = VPADDWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VMULPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VMULPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMULPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VMULPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VMULPDYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VMULPDZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VMULPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VMULPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMULPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VMULPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VMULPSYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VMULPSZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VORPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VORPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VORPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VORPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VORPDYrr $ymm0, $ymm1 $ymm0 = VORPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VORPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VORPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VORPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VORPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VORPSYrr $ymm0, $ymm1 $ymm0 = VORPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMADDUBSWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMADDUBSWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMADDUBSWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMADDUBSWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMADDUBSWYrr $ymm0, $ymm1 $ymm0 = VPMADDUBSWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMADDWDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMADDWDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMADDWDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMADDWDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMADDWDYrr $ymm0, $ymm1 $ymm0 = VPMADDWDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMAXSBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMAXSBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMAXSBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMAXSBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMAXSBYrr $ymm0, $ymm1 $ymm0 = VPMAXSBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMAXSDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMAXSDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMAXSDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMAXSDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMAXSDYrr $ymm0, $ymm1 $ymm0 = VPMAXSDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMAXSWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMAXSWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMAXSWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMAXSWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMAXSWYrr $ymm0, $ymm1 $ymm0 = VPMAXSWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMAXUBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMAXUBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMAXUBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMAXUBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMAXUBYrr $ymm0, $ymm1 $ymm0 = VPMAXUBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMAXUDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMAXUDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMAXUDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMAXUDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMAXUDYrr $ymm0, $ymm1 $ymm0 = VPMAXUDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMAXUWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMAXUWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMAXUWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMAXUWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMAXUWYrr $ymm0, $ymm1 $ymm0 = VPMAXUWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMINSBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMINSBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMINSBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMINSBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMINSBYrr $ymm0, $ymm1 $ymm0 = VPMINSBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMINSDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMINSDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMINSDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMINSDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMINSDYrr $ymm0, $ymm1 $ymm0 = VPMINSDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMINSWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMINSWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMINSWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMINSWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMINSWYrr $ymm0, $ymm1 $ymm0 = VPMINSWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMINUBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMINUBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMINUBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMINUBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMINUBYrr $ymm0, $ymm1 $ymm0 = VPMINUBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMINUDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMINUDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMINUDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMINUDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMINUDYrr $ymm0, $ymm1 $ymm0 = VPMINUDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMINUWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMINUWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMINUWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMINUWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMINUWYrr $ymm0, $ymm1 $ymm0 = VPMINUWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMULDQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMULDQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMULDQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMULDQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMULDQYrr $ymm0, $ymm1 $ymm0 = VPMULDQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMULHRSWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMULHRSWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMULHRSWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMULHRSWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMULHRSWYrr $ymm0, $ymm1 $ymm0 = VPMULHRSWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMULHUWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMULHUWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMULHUWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMULHUWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMULHUWYrr $ymm0, $ymm1 $ymm0 = VPMULHUWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMULHWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMULHWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMULHWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMULHWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMULHWYrr $ymm0, $ymm1 $ymm0 = VPMULHWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMULLDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMULLDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMULLDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMULLDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMULLDYrr $ymm0, $ymm1 $ymm0 = VPMULLDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMULLWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMULLWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMULLWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMULLWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMULLWYrr $ymm0, $ymm1 $ymm0 = VPMULLWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPMULUDQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPMULUDQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMULUDQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMULUDQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMULUDQYrr $ymm0, $ymm1 $ymm0 = VPMULUDQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPORYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPORDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPORYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPORDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPORYrr $ymm0, $ymm1 $ymm0 = VPORDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPORYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPORQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPORYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPORQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPORYrr $ymm0, $ymm1 $ymm0 = VPORQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPSUBBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSUBBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSUBBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSUBBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSUBBYrr $ymm0, $ymm1 $ymm0 = VPSUBBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPSUBDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSUBDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSUBDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSUBDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSUBDYrr $ymm0, $ymm1 $ymm0 = VPSUBDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPSUBQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSUBQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSUBQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSUBQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSUBQYrr $ymm0, $ymm1 $ymm0 = VPSUBQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPSUBSBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSUBSBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSUBSBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSUBSBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSUBSBYrr $ymm0, $ymm1 $ymm0 = VPSUBSBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPSUBSWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSUBSWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSUBSWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSUBSWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSUBSWYrr $ymm0, $ymm1 $ymm0 = VPSUBSWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPSUBUSBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSUBUSBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSUBUSBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSUBUSBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSUBUSBYrr $ymm0, $ymm1 $ymm0 = VPSUBUSBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPSUBUSWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSUBUSWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSUBUSWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSUBUSWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSUBUSWYrr $ymm0, $ymm1 $ymm0 = VPSUBUSWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPSUBWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSUBWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSUBWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSUBWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSUBWYrr $ymm0, $ymm1 $ymm0 = VPSUBWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPXORYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPXORDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPXORYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPXORDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPXORYrr $ymm0, $ymm1 $ymm0 = VPXORDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPXORYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPXORQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPXORYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPXORQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPXORYrr $ymm0, $ymm1 $ymm0 = VPXORQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VADDPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VADDPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VADDPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VADDPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VADDPDYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VADDPDZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VADDPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VADDPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VADDPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VADDPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VADDPSYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VADDPSZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VANDNPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VANDNPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VANDNPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VANDNPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VANDNPDYrr $ymm0, $ymm1 $ymm0 = VANDNPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VANDNPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VANDNPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VANDNPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VANDNPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VANDNPSYrr $ymm0, $ymm1 $ymm0 = VANDNPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VANDPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VANDPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VANDPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VANDPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VANDPDYrr $ymm0, $ymm1 $ymm0 = VANDPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VANDPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VANDPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VANDPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VANDPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VANDPSYrr $ymm0, $ymm1 $ymm0 = VANDPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VDIVPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VDIVPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VDIVPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VDIVPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VDIVPDYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VDIVPDZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VDIVPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VDIVPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VDIVPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VDIVPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VDIVPSYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VDIVPSZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VMAXCPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VMAXCPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMAXCPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VMAXCPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VMAXCPDYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VMAXCPDZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VMAXCPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VMAXCPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMAXCPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VMAXCPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VMAXCPSYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VMAXCPSZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VMAXPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VMAXPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMAXPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VMAXPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VMAXPDYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VMAXPDZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VMAXPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VMAXPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMAXPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VMAXPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VMAXPSYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VMAXPSZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VMINCPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VMINCPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMINCPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VMINCPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VMINCPDYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VMINCPDZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VMINCPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VMINCPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMINCPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VMINCPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VMINCPSYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VMINCPSZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VMINPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VMINPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMINPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VMINPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VMINPDYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VMINPDZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VMINPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VMINPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VMINPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VMINPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VMINPSYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VMINPSZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VXORPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VXORPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VXORPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VXORPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VXORPDYrr $ymm0, $ymm1 $ymm0 = VXORPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VXORPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VXORPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VXORPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VXORPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VXORPSYrr $ymm0, $ymm1 $ymm0 = VXORPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPACKSSDWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPACKSSDWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPACKSSDWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPACKSSDWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPACKSSDWYrr $ymm0, $ymm1 $ymm0 = VPACKSSDWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPACKSSWBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPACKSSWBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPACKSSWBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPACKSSWBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPACKSSWBYrr $ymm0, $ymm1 $ymm0 = VPACKSSWBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPACKUSDWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPACKUSDWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPACKUSDWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPACKUSDWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPACKUSDWYrr $ymm0, $ymm1 $ymm0 = VPACKUSDWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPACKUSWBYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPACKUSWBZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPACKUSWBYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPACKUSWBZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPACKUSWBYrr $ymm0, $ymm1 $ymm0 = VPACKUSWBZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VUNPCKHPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VUNPCKHPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VUNPCKHPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VUNPCKHPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VUNPCKHPDYrr $ymm0, $ymm1 $ymm0 = VUNPCKHPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VUNPCKHPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VUNPCKHPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VUNPCKHPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VUNPCKHPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VUNPCKHPSYrr $ymm0, $ymm1 $ymm0 = VUNPCKHPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VUNPCKLPDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VUNPCKLPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VUNPCKLPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VUNPCKLPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VUNPCKLPDYrr $ymm0, $ymm1 $ymm0 = VUNPCKLPDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VUNPCKLPSYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VUNPCKLPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VUNPCKLPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VUNPCKLPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VUNPCKLPSYrr $ymm0, $ymm1 $ymm0 = VUNPCKLPSZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VSUBPDYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VSUBPDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VSUBPDYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VSUBPDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VSUBPDYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VSUBPDZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VSUBPSYrm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm0 = VSUBPSZ256rm $ymm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm0 = VSUBPSYrm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm0 = VSUBPSZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm0 = VSUBPSYrr $ymm0, $ymm1, implicit $mxcsr $ymm0 = VSUBPSZ256rr $ymm0, $ymm1, implicit $mxcsr - ; CHECK: $ymm0 = VPUNPCKHBWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPUNPCKHBWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPUNPCKHBWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPUNPCKHBWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPUNPCKHBWYrr $ymm0, $ymm1 $ymm0 = VPUNPCKHBWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPUNPCKHDQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPUNPCKHDQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPUNPCKHDQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPUNPCKHDQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPUNPCKHDQYrr $ymm0, $ymm1 $ymm0 = VPUNPCKHDQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPUNPCKHQDQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPUNPCKHQDQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPUNPCKHQDQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPUNPCKHQDQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPUNPCKHQDQYrr $ymm0, $ymm1 $ymm0 = VPUNPCKHQDQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPUNPCKHWDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPUNPCKHWDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPUNPCKHWDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPUNPCKHWDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPUNPCKHWDYrr $ymm0, $ymm1 $ymm0 = VPUNPCKHWDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPUNPCKLBWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPUNPCKLBWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPUNPCKLBWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPUNPCKLBWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPUNPCKLBWYrr $ymm0, $ymm1 $ymm0 = VPUNPCKLBWZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPUNPCKLDQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPUNPCKLDQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPUNPCKLDQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPUNPCKLDQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPUNPCKLDQYrr $ymm0, $ymm1 $ymm0 = VPUNPCKLDQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPUNPCKLQDQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPUNPCKLQDQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPUNPCKLQDQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPUNPCKLQDQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPUNPCKLQDQYrr $ymm0, $ymm1 $ymm0 = VPUNPCKLQDQZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPUNPCKLWDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPUNPCKLWDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPUNPCKLWDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPUNPCKLWDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPUNPCKLWDYrr $ymm0, $ymm1 $ymm0 = VPUNPCKLWDZ256rr $ymm0, $ymm1 ; CHECK: $ymm0 = VFMADD132PDYm $ymm0, $ymm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr @@ -605,146 +605,146 @@ body: | $ymm0 = VFNMSUB231PSZ256r $ymm0, $ymm1, $ymm2, implicit $mxcsr ; CHECK: $ymm0 = VPSRADYri $ymm0, 7 $ymm0 = VPSRADZ256ri $ymm0, 7 - ; CHECK: $ymm0 = VPSRADYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSRADZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSRADYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSRADZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSRADYrr $ymm0, $xmm1 $ymm0 = VPSRADZ256rr $ymm0, $xmm1 - ; CHECK: $ymm0 = VPSRAVDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSRAVDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSRAVDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSRAVDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSRAVDYrr $ymm0, $ymm1 $ymm0 = VPSRAVDZ256rr $ymm0, $ymm1 ; CHECK: $ymm0 = VPSRAWYri $ymm0, 7 $ymm0 = VPSRAWZ256ri $ymm0, 7 - ; CHECK: $ymm0 = VPSRAWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSRAWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSRAWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSRAWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSRAWYrr $ymm0, $xmm1 $ymm0 = VPSRAWZ256rr $ymm0, $xmm1 ; CHECK: $ymm0 = VPSRLDQYri $ymm0, 7 $ymm0 = VPSRLDQZ256ri $ymm0, 7 ; CHECK: $ymm0 = VPSRLDYri $ymm0, 7 $ymm0 = VPSRLDZ256ri $ymm0, 7 - ; CHECK: $ymm0 = VPSRLDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSRLDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSRLDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSRLDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSRLDYrr $ymm0, $xmm1 $ymm0 = VPSRLDZ256rr $ymm0, $xmm1 ; CHECK: $ymm0 = VPSRLQYri $ymm0, 7 $ymm0 = VPSRLQZ256ri $ymm0, 7 - ; CHECK: $ymm0 = VPSRLQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSRLQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSRLQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSRLQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSRLQYrr $ymm0, $xmm1 $ymm0 = VPSRLQZ256rr $ymm0, $xmm1 - ; CHECK: $ymm0 = VPSRLVDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSRLVDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSRLVDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSRLVDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSRLVDYrr $ymm0, $ymm1 $ymm0 = VPSRLVDZ256rr $ymm0, $ymm1 - ; CHECK: $ymm0 = VPSRLVQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSRLVQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSRLVQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSRLVQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSRLVQYrr $ymm0, $ymm1 $ymm0 = VPSRLVQZ256rr $ymm0, $ymm1 ; CHECK: $ymm0 = VPSRLWYri $ymm0, 7 $ymm0 = VPSRLWZ256ri $ymm0, 7 - ; CHECK: $ymm0 = VPSRLWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSRLWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSRLWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSRLWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSRLWYrr $ymm0, $xmm1 $ymm0 = VPSRLWZ256rr $ymm0, $xmm1 - ; CHECK: $ymm0 = VPMOVSXBDYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVSXBDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVSXBDYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVSXBDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVSXBDYrr $xmm0 $ymm0 = VPMOVSXBDZ256rr $xmm0 - ; CHECK: $ymm0 = VPMOVSXBQYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVSXBQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVSXBQYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVSXBQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVSXBQYrr $xmm0 $ymm0 = VPMOVSXBQZ256rr $xmm0 - ; CHECK: $ymm0 = VPMOVSXBWYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVSXBWZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVSXBWYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVSXBWZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVSXBWYrr $xmm0 $ymm0 = VPMOVSXBWZ256rr $xmm0 - ; CHECK: $ymm0 = VPMOVSXDQYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVSXDQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVSXDQYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVSXDQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVSXDQYrr $xmm0 $ymm0 = VPMOVSXDQZ256rr $xmm0 - ; CHECK: $ymm0 = VPMOVSXWDYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVSXWDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVSXWDYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVSXWDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVSXWDYrr $xmm0 $ymm0 = VPMOVSXWDZ256rr $xmm0 - ; CHECK: $ymm0 = VPMOVSXWQYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVSXWQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVSXWQYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVSXWQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVSXWQYrr $xmm0 $ymm0 = VPMOVSXWQZ256rr $xmm0 - ; CHECK: $ymm0 = VPMOVZXBDYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVZXBDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVZXBDYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVZXBDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVZXBDYrr $xmm0 $ymm0 = VPMOVZXBDZ256rr $xmm0 - ; CHECK: $ymm0 = VPMOVZXBQYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVZXBQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVZXBQYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVZXBQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVZXBQYrr $xmm0 $ymm0 = VPMOVZXBQZ256rr $xmm0 - ; CHECK: $ymm0 = VPMOVZXBWYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVZXBWZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVZXBWYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVZXBWZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVZXBWYrr $xmm0 $ymm0 = VPMOVZXBWZ256rr $xmm0 - ; CHECK: $ymm0 = VPMOVZXDQYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVZXDQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVZXDQYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVZXDQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVZXDQYrr $xmm0 $ymm0 = VPMOVZXDQZ256rr $xmm0 - ; CHECK: $ymm0 = VPMOVZXWDYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVZXWDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVZXWDYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVZXWDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVZXWDYrr $xmm0 $ymm0 = VPMOVZXWDZ256rr $xmm0 - ; CHECK: $ymm0 = VPMOVZXWQYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPMOVZXWQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPMOVZXWQYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPMOVZXWQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPMOVZXWQYrr $xmm0 $ymm0 = VPMOVZXWQZ256rr $xmm0 - ; CHECK: $ymm0 = VBROADCASTF128 $rip, 1, $rax, 0, $noreg - $ymm0 = VBROADCASTF32X4Z256rm $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VBROADCASTSDYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VBROADCASTF32X2Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VBROADCASTF128 $rip, 1, $noreg, 0, $noreg + $ymm0 = VBROADCASTF32X4Z256rm $rip, 1, $noreg, 0, $noreg + ; CHECK: $ymm0 = VBROADCASTSDYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VBROADCASTF32X2Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VBROADCASTSDYrr $xmm0 $ymm0 = VBROADCASTF32X2Z256rr $xmm0 - ; CHECK: $ymm0 = VBROADCASTSDYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VBROADCASTSDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VBROADCASTSDYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VBROADCASTSDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VBROADCASTSDYrr $xmm0 $ymm0 = VBROADCASTSDZ256rr $xmm0 - ; CHECK: $ymm0 = VBROADCASTSSYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VBROADCASTSSZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VBROADCASTSSYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VBROADCASTSSZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VBROADCASTSSYrr $xmm0 $ymm0 = VBROADCASTSSZ256rr $xmm0 - ; CHECK: $ymm0 = VPBROADCASTBYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPBROADCASTBZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPBROADCASTBYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPBROADCASTBZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPBROADCASTBYrr $xmm0 $ymm0 = VPBROADCASTBZ256rr $xmm0 - ; CHECK: $ymm0 = VPBROADCASTDYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPBROADCASTDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPBROADCASTDYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPBROADCASTDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPBROADCASTDYrr $xmm0 $ymm0 = VPBROADCASTDZ256rr $xmm0 - ; CHECK: $ymm0 = VPBROADCASTWYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPBROADCASTWZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPBROADCASTWYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPBROADCASTWZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPBROADCASTWYrr $xmm0 $ymm0 = VPBROADCASTWZ256rr $xmm0 - ; CHECK: $ymm0 = VBROADCASTI128 $rip, 1, $rax, 0, $noreg - $ymm0 = VBROADCASTI32X4Z256rm $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm0 = VPBROADCASTQYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VBROADCASTI32X2Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VBROADCASTI128 $rip, 1, $noreg, 0, $noreg + $ymm0 = VBROADCASTI32X4Z256rm $rip, 1, $noreg, 0, $noreg + ; CHECK: $ymm0 = VPBROADCASTQYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VBROADCASTI32X2Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPBROADCASTQYrr $xmm0 $ymm0 = VBROADCASTI32X2Z256rr $xmm0 - ; CHECK: $ymm0 = VPBROADCASTQYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPBROADCASTQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPBROADCASTQYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPBROADCASTQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPBROADCASTQYrr $xmm0 $ymm0 = VPBROADCASTQZ256rr $xmm0 - ; CHECK: $ymm0 = VPABSBYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPABSBZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPABSBYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPABSBZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPABSBYrr $ymm0 $ymm0 = VPABSBZ256rr $ymm0 - ; CHECK: $ymm0 = VPABSDYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPABSDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPABSDYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPABSDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPABSDYrr $ymm0 $ymm0 = VPABSDZ256rr $ymm0 - ; CHECK: $ymm0 = VPABSWYrm $rip, 1, $rax, 0, $noreg - $ymm0 = VPABSWZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPABSWYrm $rip, 1, $noreg, 0, $noreg + $ymm0 = VPABSWZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPABSWYrr $ymm0 $ymm0 = VPABSWZ256rr $ymm0 - ; CHECK: $ymm0 = VPSADBWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSADBWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSADBWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSADBWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSADBWYrr $ymm0, $ymm1 $ymm0 = VPSADBWZ256rr $ymm0, $ymm1 ; CHECK: $ymm0 = VPERMDYrm $ymm0, $rdi, 1, $noreg, 0, $noreg @@ -783,28 +783,28 @@ body: | $ymm0 = VPSLLDQZ256ri $ymm0, 14 ; CHECK: $ymm0 = VPSLLDYri $ymm0, 7 $ymm0 = VPSLLDZ256ri $ymm0, 7 - ; CHECK: $ymm0 = VPSLLDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSLLDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSLLDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSLLDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSLLDYrr $ymm0, $xmm0 $ymm0 = VPSLLDZ256rr $ymm0, $xmm0 ; CHECK: $ymm0 = VPSLLQYri $ymm0, 7 $ymm0 = VPSLLQZ256ri $ymm0, 7 - ; CHECK: $ymm0 = VPSLLQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSLLQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSLLQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSLLQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSLLQYrr $ymm0, $xmm0 $ymm0 = VPSLLQZ256rr $ymm0, $xmm0 - ; CHECK: $ymm0 = VPSLLVDYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSLLVDZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSLLVDYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSLLVDZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSLLVDYrr $ymm0, $ymm0 $ymm0 = VPSLLVDZ256rr $ymm0, $ymm0 - ; CHECK: $ymm0 = VPSLLVQYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSLLVQZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSLLVQYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSLLVQZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSLLVQYrr $ymm0, $ymm0 $ymm0 = VPSLLVQZ256rr $ymm0, $ymm0 ; CHECK: $ymm0 = VPSLLWYri $ymm0, 7 $ymm0 = VPSLLWZ256ri $ymm0, 7 - ; CHECK: $ymm0 = VPSLLWYrm $ymm0, $rip, 1, $rax, 0, $noreg - $ymm0 = VPSLLWZ256rm $ymm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm0 = VPSLLWYrm $ymm0, $rip, 1, $noreg, 0, $noreg + $ymm0 = VPSLLWZ256rm $ymm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm0 = VPSLLWYrr $ymm0, $xmm0 $ymm0 = VPSLLWZ256rr $ymm0, $xmm0 ; CHECK: $ymm0 = VCVTDQ2PDYrm $rdi, 1, $noreg, 0, $noreg @@ -889,32 +889,32 @@ body: | $ymm0 = VSHUFPSZ256rmi $ymm0, $rdi, 1, $noreg, 0, $noreg, -24 ; CHECK: $ymm0 = VSHUFPSYrri $ymm0, $ymm1, -24 $ymm0 = VSHUFPSZ256rri $ymm0, $ymm1, -24 - ; CHECK: $ymm0 = VROUNDPDYm $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPDYm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $ymm0 = VROUNDPDYr $ymm0, 15, implicit $mxcsr $ymm0 = VRNDSCALEPDZ256rri $ymm0, 15, implicit $mxcsr - ; CHECK: $ymm0 = VROUNDPSYm $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPSYm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $ymm0 = VROUNDPSYr $ymm0, 15, implicit $mxcsr $ymm0 = VRNDSCALEPSZ256rri $ymm0, 15, implicit $mxcsr - ; CHECK: $ymm0 = VPERM2F128rm $ymm0, $rip, 1, $rax, 0, $noreg, 32 - $ymm0 = VSHUFF32X4Z256rmi $ymm0, $rip, 1, $rax, 0, $noreg, 228 + ; CHECK: $ymm0 = VPERM2F128rm $ymm0, $rip, 1, $noreg, 0, $noreg, 32 + $ymm0 = VSHUFF32X4Z256rmi $ymm0, $rip, 1, $noreg, 0, $noreg, 228 ; CHECK: $ymm0 = VPERM2F128rr $ymm0, $ymm1, 32 $ymm0 = VSHUFF32X4Z256rri $ymm0, $ymm1, 228 - ; CHECK: $ymm0 = VPERM2F128rm $ymm0, $rip, 1, $rax, 0, $noreg, 32 - $ymm0 = VSHUFF64X2Z256rmi $ymm0, $rip, 1, $rax, 0, $noreg, 228 + ; CHECK: $ymm0 = VPERM2F128rm $ymm0, $rip, 1, $noreg, 0, $noreg, 32 + $ymm0 = VSHUFF64X2Z256rmi $ymm0, $rip, 1, $noreg, 0, $noreg, 228 ; CHECK: $ymm0 = VPERM2F128rr $ymm0, $ymm1, 32 $ymm0 = VSHUFF64X2Z256rri $ymm0, $ymm1, 228 - ; CHECK: $ymm0 = VPERM2I128rm $ymm0, $rip, 1, $rax, 0, $noreg, 32 - $ymm0 = VSHUFI32X4Z256rmi $ymm0, $rip, 1, $rax, 0, $noreg, 228 + ; CHECK: $ymm0 = VPERM2I128rm $ymm0, $rip, 1, $noreg, 0, $noreg, 32 + $ymm0 = VSHUFI32X4Z256rmi $ymm0, $rip, 1, $noreg, 0, $noreg, 228 ; CHECK: $ymm0 = VPERM2I128rr $ymm0, $ymm1, 32 $ymm0 = VSHUFI32X4Z256rri $ymm0, $ymm1, 228 - ; CHECK: $ymm0 = VPERM2I128rm $ymm0, $rip, 1, $rax, 0, $noreg, 32 - $ymm0 = VSHUFI64X2Z256rmi $ymm0, $rip, 1, $rax, 0, $noreg, 228 + ; CHECK: $ymm0 = VPERM2I128rm $ymm0, $rip, 1, $noreg, 0, $noreg, 32 + $ymm0 = VSHUFI64X2Z256rmi $ymm0, $rip, 1, $noreg, 0, $noreg, 228 ; CHECK: $ymm0 = VPERM2I128rr $ymm0, $ymm1, 32 $ymm0 = VSHUFI64X2Z256rri $ymm0, $ymm1, 228 - RET 0, $zmm0, $zmm1 + RETQ ... --- # CHECK-LABEL: name: evex_z128_to_vex_test @@ -925,68 +925,68 @@ body: | bb.0: ; CHECK: VMOVAPDmr $rdi, 1, $noreg, 0, $noreg, $xmm0 VMOVAPDZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 - ; CHECK: $xmm0 = VMOVAPDrm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVAPDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVAPDrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVAPDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVAPDrr $xmm0 $xmm0 = VMOVAPDZ128rr $xmm0 ; CHECK: VMOVAPSmr $rdi, 1, $noreg, 0, $noreg, $xmm0 VMOVAPSZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 - ; CHECK: $xmm0 = VMOVAPSrm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVAPSZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVAPSrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVAPSZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVAPSrr $xmm0 $xmm0 = VMOVAPSZ128rr $xmm0 ; CHECK: VMOVDQAmr $rdi, 1, $noreg, 0, $noreg, $xmm0 VMOVDQA32Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 - ; CHECK: $xmm0 = VMOVDQArm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVDQA32Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVDQArm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVDQA32Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVDQArr $xmm0 $xmm0 = VMOVDQA32Z128rr $xmm0 ; CHECK: VMOVDQAmr $rdi, 1, $noreg, 0, $noreg, $xmm0 VMOVDQA64Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 - ; CHECK: $xmm0 = VMOVDQArm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVDQA64Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVDQArm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVDQA64Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVDQArr $xmm0 $xmm0 = VMOVDQA64Z128rr $xmm0 ; CHECK: VMOVDQUmr $rdi, 1, $noreg, 0, $noreg, $xmm0 VMOVDQU16Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 - ; CHECK: $xmm0 = VMOVDQUrm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVDQU16Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVDQUrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVDQU16Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVDQUrr $xmm0 $xmm0 = VMOVDQU16Z128rr $xmm0 ; CHECK: VMOVDQUmr $rdi, 1, $noreg, 0, $noreg, $xmm0 VMOVDQU32Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 - ; CHECK: $xmm0 = VMOVDQUrm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVDQU32Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVDQUrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVDQU32Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVDQUrr $xmm0 $xmm0 = VMOVDQU32Z128rr $xmm0 ; CHECK: VMOVDQUmr $rdi, 1, $noreg, 0, $noreg, $xmm0 VMOVDQU64Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 - ; CHECK: $xmm0 = VMOVDQUrm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVDQU64Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVDQUrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVDQU64Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVDQUrr $xmm0 $xmm0 = VMOVDQU64Z128rr $xmm0 ; CHECK: VMOVDQUmr $rdi, 1, $noreg, 0, $noreg, $xmm0 VMOVDQU8Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 - ; CHECK: $xmm0 = VMOVDQUrm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVDQU8Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVDQUrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVDQU8Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVDQUrr $xmm0 $xmm0 = VMOVDQU8Z128rr $xmm0 ; CHECK: $xmm0 = VMOVDQUrr_REV $xmm0 $xmm0 = VMOVDQU8Z128rr_REV $xmm0 - ; CHECK: $xmm0 = VMOVNTDQArm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVNTDQAZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVNTDQArm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVNTDQAZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: VMOVUPDmr $rdi, 1, $noreg, 0, $noreg, $xmm0 VMOVUPDZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 - ; CHECK: $xmm0 = VMOVUPDrm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVUPDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVUPDrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVUPDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVUPDrr $xmm0 $xmm0 = VMOVUPDZ128rr $xmm0 ; CHECK: $xmm0 = VMOVUPDrr_REV $xmm0 $xmm0 = VMOVUPDZ128rr_REV $xmm0 ; CHECK: VMOVUPSmr $rdi, 1, $noreg, 0, $noreg, $xmm0 VMOVUPSZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 - ; CHECK: $xmm0 = VMOVUPSrm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVUPSZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVUPSrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVUPSZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVUPSrr $xmm0 $xmm0 = VMOVUPSZ128rr $xmm0 ; CHECK: $xmm0 = VMOVUPSrr_REV $xmm0 @@ -1011,52 +1011,52 @@ body: | $xmm0 = VMOVDQU32Z128rr_REV $xmm0 ; CHECK: $xmm0 = VMOVDQUrr_REV $xmm0 $xmm0 = VMOVDQU64Z128rr_REV $xmm0 - ; CHECK: $xmm0 = VPMOVSXBDrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVSXBDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVSXBDrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVSXBDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVSXBDrr $xmm0 $xmm0 = VPMOVSXBDZ128rr $xmm0 - ; CHECK: $xmm0 = VPMOVSXBQrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVSXBQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVSXBQrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVSXBQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVSXBQrr $xmm0 $xmm0 = VPMOVSXBQZ128rr $xmm0 - ; CHECK: $xmm0 = VPMOVSXBWrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVSXBWZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVSXBWrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVSXBWZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVSXBWrr $xmm0 $xmm0 = VPMOVSXBWZ128rr $xmm0 - ; CHECK: $xmm0 = VPMOVSXDQrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVSXDQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVSXDQrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVSXDQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVSXDQrr $xmm0 $xmm0 = VPMOVSXDQZ128rr $xmm0 - ; CHECK: $xmm0 = VPMOVSXWDrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVSXWDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVSXWDrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVSXWDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVSXWDrr $xmm0 $xmm0 = VPMOVSXWDZ128rr $xmm0 - ; CHECK: $xmm0 = VPMOVSXWQrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVSXWQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVSXWQrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVSXWQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVSXWQrr $xmm0 $xmm0 = VPMOVSXWQZ128rr $xmm0 - ; CHECK: $xmm0 = VPMOVZXBDrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVZXBDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVZXBDrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVZXBDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVZXBDrr $xmm0 $xmm0 = VPMOVZXBDZ128rr $xmm0 - ; CHECK: $xmm0 = VPMOVZXBQrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVZXBQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVZXBQrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVZXBQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVZXBQrr $xmm0 $xmm0 = VPMOVZXBQZ128rr $xmm0 - ; CHECK: $xmm0 = VPMOVZXBWrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVZXBWZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVZXBWrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVZXBWZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVZXBWrr $xmm0 $xmm0 = VPMOVZXBWZ128rr $xmm0 - ; CHECK: $xmm0 = VPMOVZXDQrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVZXDQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVZXDQrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVZXDQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVZXDQrr $xmm0 $xmm0 = VPMOVZXDQZ128rr $xmm0 - ; CHECK: $xmm0 = VPMOVZXWDrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVZXWDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVZXWDrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVZXWDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVZXWDrr $xmm0 $xmm0 = VPMOVZXWDZ128rr $xmm0 - ; CHECK: $xmm0 = VPMOVZXWQrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPMOVZXWQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMOVZXWQrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMOVZXWQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMOVZXWQrr $xmm0 $xmm0 = VPMOVZXWQZ128rr $xmm0 ; CHECK: VMOVHPDmr $rdi, 1, $noreg, 0, $noreg, $xmm0 @@ -1075,352 +1075,352 @@ body: | VMOVLPSZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm0 ; CHECK: $xmm0 = VMOVLPSrm $xmm0, $rdi, 1, $noreg, 0, $noreg $xmm0 = VMOVLPSZ128rm $xmm0, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm0 = VMAXCPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMAXCPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXCPDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXCPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMAXCPDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMAXCPDZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMAXCPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMAXCPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXCPSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXCPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMAXCPSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMAXCPSZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMAXPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMAXPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXPDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMAXPDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMAXPDZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMAXPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMAXPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXPSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMAXPSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMAXPSZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMINCPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMINCPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINCPDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMINCPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMINCPDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMINCPDZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMINCPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMINCPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINCPSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMINCPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMINCPSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMINCPSZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMINPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMINPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINPDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMINPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMINPDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMINPDZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMINPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMINPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINPSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMINPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMINPSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMINPSZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMULPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMULPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULPDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMULPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMULPDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMULPDZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMULPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMULPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULPSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMULPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMULPSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMULPSZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VORPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VORPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VORPDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VORPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VORPDrr $xmm0, $xmm1 $xmm0 = VORPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VORPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VORPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VORPSrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VORPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VORPSrr $xmm0, $xmm1 $xmm0 = VORPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPADDBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPADDBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPADDBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPADDBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPADDBrr $xmm0, $xmm1 $xmm0 = VPADDBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPADDDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPADDDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPADDDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPADDDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPADDDrr $xmm0, $xmm1 $xmm0 = VPADDDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPADDQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPADDQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPADDQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPADDQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPADDQrr $xmm0, $xmm1 $xmm0 = VPADDQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPADDSBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPADDSBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPADDSBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPADDSBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPADDSBrr $xmm0, $xmm1 $xmm0 = VPADDSBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPADDSWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPADDSWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPADDSWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPADDSWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPADDSWrr $xmm0, $xmm1 $xmm0 = VPADDSWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPADDUSBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPADDUSBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPADDUSBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPADDUSBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPADDUSBrr $xmm0, $xmm1 $xmm0 = VPADDUSBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPADDUSWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPADDUSWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPADDUSWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPADDUSWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPADDUSWrr $xmm0, $xmm1 $xmm0 = VPADDUSWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPADDWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPADDWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPADDWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPADDWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPADDWrr $xmm0, $xmm1 $xmm0 = VPADDWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPANDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPANDDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPANDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPANDDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPANDrr $xmm0, $xmm1 $xmm0 = VPANDDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPANDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPANDQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPANDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPANDQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPANDrr $xmm0, $xmm1 $xmm0 = VPANDQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPANDNrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPANDNDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPANDNrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPANDNDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPANDNrr $xmm0, $xmm1 $xmm0 = VPANDNDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPANDNrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPANDNQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPANDNrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPANDNQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPANDNrr $xmm0, $xmm1 $xmm0 = VPANDNQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPAVGBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPAVGBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPAVGBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPAVGBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPAVGBrr $xmm0, $xmm1 $xmm0 = VPAVGBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPAVGWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPAVGWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPAVGWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPAVGWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPAVGWrr $xmm0, $xmm1 $xmm0 = VPAVGWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMAXSBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMAXSBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMAXSBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMAXSBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMAXSBrr $xmm0, $xmm1 $xmm0 = VPMAXSBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMAXSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMAXSDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMAXSDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMAXSDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMAXSDrr $xmm0, $xmm1 $xmm0 = VPMAXSDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMAXSWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMAXSWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMAXSWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMAXSWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMAXSWrr $xmm0, $xmm1 $xmm0 = VPMAXSWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMAXUBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMAXUBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMAXUBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMAXUBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMAXUBrr $xmm0, $xmm1 $xmm0 = VPMAXUBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMAXUDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMAXUDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMAXUDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMAXUDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMAXUDrr $xmm0, $xmm1 $xmm0 = VPMAXUDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMAXUWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMAXUWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMAXUWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMAXUWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMAXUWrr $xmm0, $xmm1 $xmm0 = VPMAXUWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMINSBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMINSBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMINSBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMINSBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMINSBrr $xmm0, $xmm1 $xmm0 = VPMINSBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMINSDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMINSDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMINSDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMINSDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMINSDrr $xmm0, $xmm1 $xmm0 = VPMINSDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMINSWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMINSWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMINSWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMINSWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMINSWrr $xmm0, $xmm1 $xmm0 = VPMINSWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMINUBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMINUBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMINUBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMINUBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMINUBrr $xmm0, $xmm1 $xmm0 = VPMINUBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMINUDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMINUDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMINUDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMINUDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMINUDrr $xmm0, $xmm1 $xmm0 = VPMINUDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMINUWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMINUWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMINUWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMINUWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMINUWrr $xmm0, $xmm1 $xmm0 = VPMINUWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMULDQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMULDQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMULDQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMULDQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMULDQrr $xmm0, $xmm1 $xmm0 = VPMULDQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMULHRSWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMULHRSWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMULHRSWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMULHRSWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMULHRSWrr $xmm0, $xmm1 $xmm0 = VPMULHRSWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMULHUWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMULHUWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMULHUWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMULHUWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMULHUWrr $xmm0, $xmm1 $xmm0 = VPMULHUWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMULHWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMULHWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMULHWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMULHWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMULHWrr $xmm0, $xmm1 $xmm0 = VPMULHWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMULLDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMULLDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMULLDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMULLDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMULLDrr $xmm0, $xmm1 $xmm0 = VPMULLDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMULLWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMULLWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMULLWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMULLWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMULLWrr $xmm0, $xmm1 $xmm0 = VPMULLWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMULUDQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMULUDQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMULUDQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMULUDQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMULUDQrr $xmm0, $xmm1 $xmm0 = VPMULUDQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPORrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPORDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPORrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPORDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPORrr $xmm0, $xmm1 $xmm0 = VPORDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPORrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPORQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPORrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPORQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPORrr $xmm0, $xmm1 $xmm0 = VPORQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPSUBBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSUBBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSUBBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSUBBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSUBBrr $xmm0, $xmm1 $xmm0 = VPSUBBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPSUBDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSUBDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSUBDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSUBDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSUBDrr $xmm0, $xmm1 $xmm0 = VPSUBDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPSUBQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSUBQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSUBQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSUBQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSUBQrr $xmm0, $xmm1 $xmm0 = VPSUBQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPSUBSBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSUBSBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSUBSBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSUBSBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSUBSBrr $xmm0, $xmm1 $xmm0 = VPSUBSBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPSUBSWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSUBSWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSUBSWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSUBSWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSUBSWrr $xmm0, $xmm1 $xmm0 = VPSUBSWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPSUBUSBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSUBUSBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSUBUSBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSUBUSBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSUBUSBrr $xmm0, $xmm1 $xmm0 = VPSUBUSBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPSUBUSWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSUBUSWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSUBUSWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSUBUSWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSUBUSWrr $xmm0, $xmm1 $xmm0 = VPSUBUSWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPSUBWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSUBWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSUBWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSUBWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSUBWrr $xmm0, $xmm1 $xmm0 = VPSUBWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VADDPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VADDPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDPDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VADDPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VADDPDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VADDPDZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VADDPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VADDPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDPSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VADDPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VADDPSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VADDPSZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VANDNPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VANDNPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VANDNPDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VANDNPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VANDNPDrr $xmm0, $xmm1 $xmm0 = VANDNPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VANDNPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VANDNPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VANDNPSrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VANDNPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VANDNPSrr $xmm0, $xmm1 $xmm0 = VANDNPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VANDPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VANDPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VANDPDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VANDPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VANDPDrr $xmm0, $xmm1 $xmm0 = VANDPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VANDPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VANDPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VANDPSrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VANDPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VANDPSrr $xmm0, $xmm1 $xmm0 = VANDPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VDIVPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VDIVPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVPDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VDIVPDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VDIVPDZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VDIVPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VDIVPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVPSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VDIVPSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VDIVPSZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VPXORrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPXORDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPXORrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPXORDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPXORrr $xmm0, $xmm1 $xmm0 = VPXORDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPXORrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPXORQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPXORrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPXORQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPXORrr $xmm0, $xmm1 $xmm0 = VPXORQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VSUBPDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VSUBPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBPDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VSUBPDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VSUBPDZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VSUBPSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VSUBPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBPSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VSUBPSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VSUBPSZ128rr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VXORPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VXORPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VXORPDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VXORPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VXORPDrr $xmm0, $xmm1 $xmm0 = VXORPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VXORPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VXORPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VXORPSrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VXORPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VXORPSrr $xmm0, $xmm1 $xmm0 = VXORPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMADDUBSWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMADDUBSWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMADDUBSWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMADDUBSWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMADDUBSWrr $xmm0, $xmm1 $xmm0 = VPMADDUBSWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPMADDWDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPMADDWDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPMADDWDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPMADDWDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPMADDWDrr $xmm0, $xmm1 $xmm0 = VPMADDWDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPACKSSDWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPACKSSDWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPACKSSDWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPACKSSDWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPACKSSDWrr $xmm0, $xmm1 $xmm0 = VPACKSSDWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPACKSSWBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPACKSSWBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPACKSSWBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPACKSSWBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPACKSSWBrr $xmm0, $xmm1 $xmm0 = VPACKSSWBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPACKUSDWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPACKUSDWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPACKUSDWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPACKUSDWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPACKUSDWrr $xmm0, $xmm1 $xmm0 = VPACKUSDWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPACKUSWBrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPACKUSWBZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPACKUSWBrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPACKUSWBZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPACKUSWBrr $xmm0, $xmm1 $xmm0 = VPACKUSWBZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPUNPCKHBWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPUNPCKHBWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPUNPCKHBWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPUNPCKHBWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPUNPCKHBWrr $xmm0, $xmm1 $xmm0 = VPUNPCKHBWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPUNPCKHDQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPUNPCKHDQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPUNPCKHDQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPUNPCKHDQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPUNPCKHDQrr $xmm0, $xmm1 $xmm0 = VPUNPCKHDQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPUNPCKHQDQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPUNPCKHQDQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPUNPCKHQDQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPUNPCKHQDQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPUNPCKHQDQrr $xmm0, $xmm1 $xmm0 = VPUNPCKHQDQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPUNPCKHWDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPUNPCKHWDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPUNPCKHWDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPUNPCKHWDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPUNPCKHWDrr $xmm0, $xmm1 $xmm0 = VPUNPCKHWDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPUNPCKLBWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPUNPCKLBWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPUNPCKLBWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPUNPCKLBWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPUNPCKLBWrr $xmm0, $xmm1 $xmm0 = VPUNPCKLBWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPUNPCKLDQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPUNPCKLDQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPUNPCKLDQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPUNPCKLDQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPUNPCKLDQrr $xmm0, $xmm1 $xmm0 = VPUNPCKLDQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPUNPCKLQDQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPUNPCKLQDQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPUNPCKLQDQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPUNPCKLQDQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPUNPCKLQDQrr $xmm0, $xmm1 $xmm0 = VPUNPCKLQDQZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VPUNPCKLWDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPUNPCKLWDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPUNPCKLWDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPUNPCKLWDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPUNPCKLWDrr $xmm0, $xmm1 $xmm0 = VPUNPCKLWDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VUNPCKHPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VUNPCKHPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VUNPCKHPDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VUNPCKHPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VUNPCKHPDrr $xmm0, $xmm1 $xmm0 = VUNPCKHPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VUNPCKHPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VUNPCKHPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VUNPCKHPSrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VUNPCKHPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VUNPCKHPSrr $xmm0, $xmm1 $xmm0 = VUNPCKHPSZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VUNPCKLPDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VUNPCKLPDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VUNPCKLPDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VUNPCKLPDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VUNPCKLPDrr $xmm0, $xmm1 $xmm0 = VUNPCKLPDZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VUNPCKLPSrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VUNPCKLPSZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VUNPCKLPSrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VUNPCKLPSZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VUNPCKLPSrr $xmm0, $xmm1 $xmm0 = VUNPCKLPSZ128rr $xmm0, $xmm1 ; CHECK: $xmm0 = VFMADD132PDm $xmm0, $xmm0, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr @@ -1569,72 +1569,72 @@ body: | $xmm0 = VFNMSUB231PSZ128r $xmm0, $xmm1, $xmm2, implicit $mxcsr ; CHECK: $xmm0 = VPSLLDri $xmm0, 7 $xmm0 = VPSLLDZ128ri $xmm0, 7 - ; CHECK: $xmm0 = VPSLLDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSLLDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSLLDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSLLDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSLLDrr $xmm0, $xmm0 $xmm0 = VPSLLDZ128rr $xmm0, $xmm0 ; CHECK: $xmm0 = VPSLLQri $xmm0, 7 $xmm0 = VPSLLQZ128ri $xmm0, 7 - ; CHECK: $xmm0 = VPSLLQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSLLQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSLLQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSLLQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSLLQrr $xmm0, $xmm0 $xmm0 = VPSLLQZ128rr $xmm0, $xmm0 - ; CHECK: $xmm0 = VPSLLVDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSLLVDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSLLVDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSLLVDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSLLVDrr $xmm0, $xmm0 $xmm0 = VPSLLVDZ128rr $xmm0, $xmm0 - ; CHECK: $xmm0 = VPSLLVQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSLLVQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSLLVQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSLLVQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSLLVQrr $xmm0, $xmm0 $xmm0 = VPSLLVQZ128rr $xmm0, $xmm0 ; CHECK: $xmm0 = VPSLLWri $xmm0, 7 $xmm0 = VPSLLWZ128ri $xmm0, 7 - ; CHECK: $xmm0 = VPSLLWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSLLWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSLLWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSLLWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSLLWrr $xmm0, $xmm0 $xmm0 = VPSLLWZ128rr $xmm0, $xmm0 ; CHECK: $xmm0 = VPSRADri $xmm0, 7 $xmm0 = VPSRADZ128ri $xmm0, 7 - ; CHECK: $xmm0 = VPSRADrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSRADZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSRADrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSRADZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSRADrr $xmm0, $xmm0 $xmm0 = VPSRADZ128rr $xmm0, $xmm0 - ; CHECK: $xmm0 = VPSRAVDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSRAVDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSRAVDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSRAVDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSRAVDrr $xmm0, $xmm0 $xmm0 = VPSRAVDZ128rr $xmm0, $xmm0 ; CHECK: $xmm0 = VPSRAWri $xmm0, 7 $xmm0 = VPSRAWZ128ri $xmm0, 7 - ; CHECK: $xmm0 = VPSRAWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSRAWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSRAWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSRAWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSRAWrr $xmm0, $xmm0 $xmm0 = VPSRAWZ128rr $xmm0, $xmm0 ; CHECK: $xmm0 = VPSRLDQri $xmm0, 14 $xmm0 = VPSRLDQZ128ri $xmm0, 14 ; CHECK: $xmm0 = VPSRLDri $xmm0, 7 $xmm0 = VPSRLDZ128ri $xmm0, 7 - ; CHECK: $xmm0 = VPSRLDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSRLDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSRLDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSRLDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSRLDrr $xmm0, $xmm0 $xmm0 = VPSRLDZ128rr $xmm0, $xmm0 ; CHECK: $xmm0 = VPSRLQri $xmm0, 7 $xmm0 = VPSRLQZ128ri $xmm0, 7 - ; CHECK: $xmm0 = VPSRLQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSRLQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSRLQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSRLQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSRLQrr $xmm0, $xmm0 $xmm0 = VPSRLQZ128rr $xmm0, $xmm0 - ; CHECK: $xmm0 = VPSRLVDrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSRLVDZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSRLVDrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSRLVDZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSRLVDrr $xmm0, $xmm0 $xmm0 = VPSRLVDZ128rr $xmm0, $xmm0 - ; CHECK: $xmm0 = VPSRLVQrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSRLVQZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSRLVQrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSRLVQZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSRLVQrr $xmm0, $xmm0 $xmm0 = VPSRLVQZ128rr $xmm0, $xmm0 ; CHECK: $xmm0 = VPSRLWri $xmm0, 7 $xmm0 = VPSRLWZ128ri $xmm0, 7 - ; CHECK: $xmm0 = VPSRLWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSRLWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSRLWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSRLWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSRLWrr $xmm0, $xmm0 $xmm0 = VPSRLWZ128rr $xmm0, $xmm0 ; CHECK: $xmm0 = VPERMILPDmi $rdi, 1, $noreg, 0, $noreg, 9 @@ -1727,80 +1727,80 @@ body: | $xmm0 = VPSHUFLWZ128ri $xmm0, -24 ; CHECK: $xmm0 = VPSLLDQri $xmm0, 7 $xmm0 = VPSLLDQZ128ri $xmm0, 7 - ; CHECK: $xmm0 = VSHUFPDrmi $xmm0, $rip, 1, $rax, 0, $noreg, -24 - $xmm0 = VSHUFPDZ128rmi $xmm0, $rip, 1, $rax, 0, $noreg, -24 + ; CHECK: $xmm0 = VSHUFPDrmi $xmm0, $rip, 1, $noreg, 0, $noreg, -24 + $xmm0 = VSHUFPDZ128rmi $xmm0, $rip, 1, $noreg, 0, $noreg, -24 ; CHECK: $xmm0 = VSHUFPDrri $xmm0, $xmm1, -24 $xmm0 = VSHUFPDZ128rri $xmm0, $xmm1, -24 - ; CHECK: $xmm0 = VSHUFPSrmi $xmm0, $rip, 1, $rax, 0, $noreg, -24 - $xmm0 = VSHUFPSZ128rmi $xmm0, $rip, 1, $rax, 0, $noreg, -24 + ; CHECK: $xmm0 = VSHUFPSrmi $xmm0, $rip, 1, $noreg, 0, $noreg, -24 + $xmm0 = VSHUFPSZ128rmi $xmm0, $rip, 1, $noreg, 0, $noreg, -24 ; CHECK: $xmm0 = VSHUFPSrri $xmm0, $xmm1, -24 $xmm0 = VSHUFPSZ128rri $xmm0, $xmm1, -24 - ; CHECK: $xmm0 = VPSADBWrm $xmm0, $rip, 1, $rax, 0, $noreg - $xmm0 = VPSADBWZ128rm $xmm0, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPSADBWrm $xmm0, $rip, 1, $noreg, 0, $noreg + $xmm0 = VPSADBWZ128rm $xmm0, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPSADBWrr $xmm0, $xmm1 $xmm0 = VPSADBWZ128rr $xmm0, $xmm1 - ; CHECK: $xmm0 = VBROADCASTSSrm $rip, 1, $rax, 0, $noreg - $xmm0 = VBROADCASTSSZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VBROADCASTSSrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VBROADCASTSSZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VBROADCASTSSrr $xmm0 $xmm0 = VBROADCASTSSZ128rr $xmm0 - ; CHECK: $xmm0 = VPBROADCASTBrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPBROADCASTBZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPBROADCASTBrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPBROADCASTBZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPBROADCASTBrr $xmm0 $xmm0 = VPBROADCASTBZ128rr $xmm0 - ; CHECK: $xmm0 = VPBROADCASTDrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPBROADCASTDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPBROADCASTDrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPBROADCASTDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPBROADCASTDrr $xmm0 $xmm0 = VPBROADCASTDZ128rr $xmm0 - ; CHECK: $xmm0 = VPBROADCASTQrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPBROADCASTQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPBROADCASTQrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPBROADCASTQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPBROADCASTQrr $xmm0 $xmm0 = VPBROADCASTQZ128rr $xmm0 - ; CHECK: $xmm0 = VPBROADCASTWrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPBROADCASTWZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPBROADCASTWrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPBROADCASTWZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPBROADCASTWrr $xmm0 $xmm0 = VPBROADCASTWZ128rr $xmm0 - ; CHECK: $xmm0 = VPBROADCASTQrm $rip, 1, $rax, 0, $noreg - $xmm0 = VBROADCASTI32X2Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPBROADCASTQrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VBROADCASTI32X2Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPBROADCASTQrr $xmm0 $xmm0 = VBROADCASTI32X2Z128rr $xmm0 ; CHECK: $xmm0 = VCVTPS2PHrr $xmm0, 2, implicit $mxcsr $xmm0 = VCVTPS2PHZ128rr $xmm0, 2, implicit $mxcsr ; CHECK: VCVTPS2PHmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 2, implicit $mxcsr VCVTPS2PHZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm0, 2, implicit $mxcsr - ; CHECK: $xmm0 = VPABSBrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPABSBZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPABSBrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPABSBZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPABSBrr $xmm0 $xmm0 = VPABSBZ128rr $xmm0 - ; CHECK: $xmm0 = VPABSDrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPABSDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPABSDrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPABSDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPABSDrr $xmm0 $xmm0 = VPABSDZ128rr $xmm0 - ; CHECK: $xmm0 = VPABSWrm $rip, 1, $rax, 0, $noreg - $xmm0 = VPABSWZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VPABSWrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VPABSWZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VPABSWrr $xmm0 $xmm0 = VPABSWZ128rr $xmm0 - ; CHECK: $xmm0 = VPALIGNRrmi $xmm0, $rip, 1, $rax, 0, $noreg, 15 - $xmm0 = VPALIGNRZ128rmi $xmm0, $rip, 1, $rax, 0, $noreg, 15 + ; CHECK: $xmm0 = VPALIGNRrmi $xmm0, $rip, 1, $noreg, 0, $noreg, 15 + $xmm0 = VPALIGNRZ128rmi $xmm0, $rip, 1, $noreg, 0, $noreg, 15 ; CHECK: $xmm0 = VPALIGNRrri $xmm0, $xmm1, 15 $xmm0 = VPALIGNRZ128rri $xmm0, $xmm1, 15 - ; CHECK: $xmm0 = VPALIGNRrmi $xmm0, $rip, 1, $rax, 0, $noreg, 4 - $xmm0 = VALIGNDZ128rmi $xmm0, $rip, 1, $rax, 0, $noreg, 1 + ; CHECK: $xmm0 = VPALIGNRrmi $xmm0, $rip, 1, $noreg, 0, $noreg, 4 + $xmm0 = VALIGNDZ128rmi $xmm0, $rip, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm0 = VPALIGNRrri $xmm0, $xmm1, 4 $xmm0 = VALIGNDZ128rri $xmm0, $xmm1, 1 - ; CHECK: $xmm0 = VPALIGNRrmi $xmm0, $rip, 1, $rax, 0, $noreg, 8 - $xmm0 = VALIGNQZ128rmi $xmm0, $rip, 1, $rax, 0, $noreg, 1 + ; CHECK: $xmm0 = VPALIGNRrmi $xmm0, $rip, 1, $noreg, 0, $noreg, 8 + $xmm0 = VALIGNQZ128rmi $xmm0, $rip, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm0 = VPALIGNRrri $xmm0, $xmm1, 8 $xmm0 = VALIGNQZ128rri $xmm0, $xmm1, 1 - ; CHECK: $xmm0 = VROUNDPDm $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPDm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm0 = VROUNDPDr $xmm0, 15, implicit $mxcsr $xmm0 = VRNDSCALEPDZ128rri $xmm0, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDPSm $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPSm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm0 = VROUNDPSr $xmm0, 15, implicit $mxcsr $xmm0 = VRNDSCALEPSZ128rri $xmm0, 15, implicit $mxcsr - RET 0, $zmm0, $zmm1 + RETQ ... --- # CHECK-LABEL: name: evex_scalar_to_vex_test @@ -1810,114 +1810,114 @@ name: evex_scalar_to_vex_test body: | bb.0: - ; CHECK: $xmm0 = VADDSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VADDSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VADDSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VADDSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSDZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSDrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSDZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VADDSDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VADDSDZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VADDSDrr_Int $xmm0, $xmm1, implicit $mxcsr $xmm0 = VADDSDZrr_Int $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VADDSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VADDSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VADDSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VADDSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSSZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VADDSSrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VADDSSZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VADDSSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VADDSSZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VADDSSrr_Int $xmm0, $xmm1, implicit $mxcsr $xmm0 = VADDSSZrr_Int $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VDIVSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VDIVSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VDIVSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VDIVSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSDZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSDrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSDZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VDIVSDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VDIVSDZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VDIVSDrr_Int $xmm0, $xmm1, implicit $mxcsr $xmm0 = VDIVSDZrr_Int $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VDIVSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VDIVSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VDIVSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VDIVSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSSZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VDIVSSrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VDIVSSZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VDIVSSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VDIVSSZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VDIVSSrr_Int $xmm0, $xmm1, implicit $mxcsr $xmm0 = VDIVSSZrr_Int $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMAXCSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMAXCSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXCSDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXCSDZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMAXCSDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMAXCSDZrr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMAXCSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMAXCSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXCSSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXCSSZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMAXCSSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMAXCSSZrr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMAXSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMAXSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VMAXSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMAXSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXSDZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSDrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXSDZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMAXSDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMAXSDZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VMAXSDrr_Int $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMAXSDZrr_Int $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMAXSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMAXSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VMAXSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMAXSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXSSZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMAXSSrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMAXSSZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMAXSSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMAXSSZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VMAXSSrr_Int $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMAXSSZrr_Int $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMINCSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMINCSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINCSDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMINCSDZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMINCSDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMINCSDZrr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMINCSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMINCSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINCSSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMINCSSZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMINCSSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMINCSSZrr $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMINSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMINSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VMINSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMINSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINSDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMINSDZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINSDrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMINSDZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMINSDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMINSDZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VMINSDrr_Int $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMINSDZrr_Int $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMINSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMINSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VMINSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMINSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINSSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMINSSZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMINSSrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMINSSZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMINSSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMINSSZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VMINSSrr_Int $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMINSSZrr_Int $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMULSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMULSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VMULSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMULSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSDZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSDrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSDZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMULSDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMULSDZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VMULSDrr_Int $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMULSDZrr_Int $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VMULSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMULSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VMULSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VMULSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSSZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VMULSSrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VMULSSZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VMULSSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMULSSZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VMULSSrr_Int $xmm0, $xmm1, implicit $mxcsr $xmm0 = VMULSSZrr_Int $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VSUBSDrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VSUBSDZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VSUBSDrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VSUBSDZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSDrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSDZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSDrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSDZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VSUBSDrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VSUBSDZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VSUBSDrr_Int $xmm0, $xmm1, implicit $mxcsr $xmm0 = VSUBSDZrr_Int $xmm0, $xmm1, implicit $mxcsr - ; CHECK: $xmm0 = VSUBSSrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VSUBSSZrm $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VSUBSSrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm0 = VSUBSSZrm_Int $xmm0, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSSrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSSZrm $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSUBSSrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm0 = VSUBSSZrm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VSUBSSrr $xmm0, $xmm1, implicit $mxcsr $xmm0 = VSUBSSZrr $xmm0, $xmm1, implicit $mxcsr ; CHECK: $xmm0 = VSUBSSrr_Int $xmm0, $xmm1, implicit $mxcsr @@ -2130,7 +2130,7 @@ body: | VPEXTRWZmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 3 ; CHECK: $eax = VPEXTRWrr $xmm0, 1 $eax = VPEXTRWZrr $xmm0, 1 - ; CHECK: $eax = VPEXTRWrr_REV $xmm0, 1 + ; CHECK: $eax = VPEXTRWrr_REV $xmm0, 1 $eax = VPEXTRWZrr_REV $xmm0, 1 ; CHECK: $xmm0 = VPINSRBrm $xmm0, $rsi, 1, $noreg, 0, $noreg, 3 $xmm0 = VPINSRBZrm $xmm0, $rsi, 1, $noreg, 0, $noreg, 3 @@ -2152,10 +2152,10 @@ body: | $xmm0 = VSQRTSDZm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VSQRTSDm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $xmm0 = VSQRTSDZm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VSQRTSDr $xmm0, $noreg, implicit $mxcsr - $xmm0 = VSQRTSDZr $xmm0, $noreg, implicit $mxcsr - ; CHECK: $xmm0 = VSQRTSDr_Int $xmm0, $noreg, implicit $mxcsr - $xmm0 = VSQRTSDZr_Int $xmm0, $noreg, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTSDr $xmm0, $xmm0, implicit $mxcsr + $xmm0 = VSQRTSDZr $xmm0, $xmm0, implicit $mxcsr + ; CHECK: $xmm0 = VSQRTSDr_Int $xmm0, $xmm0, implicit $mxcsr + $xmm0 = VSQRTSDZr_Int $xmm0, $xmm0, implicit $mxcsr ; CHECK: $xmm0 = VSQRTSSm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $xmm0 = VSQRTSSZm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm0 = VSQRTSSm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr @@ -2290,8 +2290,8 @@ body: | $xmm0 = VMOV64toPQIZrm $rdi, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOV64toSDrr $rdi $xmm0 = VMOV64toSDZrr $rdi - ; CHECK: $xmm0 = VMOVDI2PDIrm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVDI2PDIZrm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVDI2PDIrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVDI2PDIZrm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVDI2PDIrr $edi $xmm0 = VMOVDI2PDIZrr $edi ; CHECK: $xmm0 = VMOVLHPSrr $xmm0, $xmm1 @@ -2310,8 +2310,8 @@ body: | $rdi = VMOVPQIto64Zrr $xmm0 ; CHECK: VMOVPQIto64mr $rdi, 1, $noreg, 0, $noreg, $xmm0 VMOVPQIto64Zmr $rdi, 1, $noreg, 0, $noreg, $xmm0 - ; CHECK: $xmm0 = VMOVQI2PQIrm $rip, 1, $rax, 0, $noreg - $xmm0 = VMOVQI2PQIZrm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm0 = VMOVQI2PQIrm $rip, 1, $noreg, 0, $noreg + $xmm0 = VMOVQI2PQIZrm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm0 = VMOVZPQILo2PQIrr $xmm0 $xmm0 = VMOVZPQILo2PQIZrr $xmm0 ; CHECK: VCOMISDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr @@ -2354,24 +2354,24 @@ body: | $xmm0 = VINSERTPSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm0 = VINSERTPSrr $xmm0, $xmm0, 1 $xmm0 = VINSERTPSZrr $xmm0, $xmm0, 1 - ; CHECK: $xmm0 = VROUNDSDm $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm0 = VROUNDSDr $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSm $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm0 = VROUNDSSr $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSDm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm0 = VROUNDSDr_Int $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm0 = VROUNDSSr_Int $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 15, implicit $mxcsr - RET 0, $zmm0, $zmm1 + RETQ ... --- # CHECK-LABEL: name: evex_z256_to_evex_test @@ -2382,444 +2382,444 @@ body: | bb.0: ; CHECK: VMOVAPDZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVAPDZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 - ; CHECK: $ymm16 = VMOVAPDZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVAPDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVAPDZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVAPDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVAPDZ256rr $ymm16 $ymm16 = VMOVAPDZ256rr $ymm16 ; CHECK: $ymm16 = VMOVAPDZ256rr_REV $ymm16 $ymm16 = VMOVAPDZ256rr_REV $ymm16 ; CHECK: VMOVAPSZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVAPSZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 - ; CHECK: $ymm16 = VMOVAPSZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVAPSZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVAPSZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVAPSZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVAPSZ256rr $ymm16 $ymm16 = VMOVAPSZ256rr $ymm16 ; CHECK: $ymm16 = VMOVAPSZ256rr_REV $ymm16 $ymm16 = VMOVAPSZ256rr_REV $ymm16 - ; CHECK: $ymm16 = VMOVDDUPZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVDDUPZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVDDUPZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVDDUPZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVDDUPZ256rr $ymm16 $ymm16 = VMOVDDUPZ256rr $ymm16 ; CHECK: VMOVDQA32Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVDQA32Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 - ; CHECK: $ymm16 = VMOVDQA32Z256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVDQA32Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVDQA32Z256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVDQA32Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVDQA32Z256rr $ymm16 $ymm16 = VMOVDQA32Z256rr $ymm16 ; CHECK: $ymm16 = VMOVDQA32Z256rr_REV $ymm16 $ymm16 = VMOVDQA32Z256rr_REV $ymm16 ; CHECK: VMOVDQA64Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVDQA64Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 - ; CHECK: $ymm16 = VMOVDQA64Z256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVDQA64Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVDQA64Z256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVDQA64Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVDQA64Z256rr $ymm16 $ymm16 = VMOVDQA64Z256rr $ymm16 ; CHECK: $ymm16 = VMOVDQA64Z256rr_REV $ymm16 $ymm16 = VMOVDQA64Z256rr_REV $ymm16 ; CHECK: VMOVDQU16Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVDQU16Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 - ; CHECK: $ymm16 = VMOVDQU16Z256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVDQU16Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVDQU16Z256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVDQU16Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVDQU16Z256rr $ymm16 $ymm16 = VMOVDQU16Z256rr $ymm16 ; CHECK: $ymm16 = VMOVDQU16Z256rr_REV $ymm16 $ymm16 = VMOVDQU16Z256rr_REV $ymm16 ; CHECK: VMOVDQU32Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVDQU32Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 - ; CHECK: $ymm16 = VMOVDQU32Z256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVDQU32Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVDQU32Z256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVDQU32Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVDQU32Z256rr $ymm16 $ymm16 = VMOVDQU32Z256rr $ymm16 ; CHECK: $ymm16 = VMOVDQU32Z256rr_REV $ymm16 $ymm16 = VMOVDQU32Z256rr_REV $ymm16 ; CHECK: VMOVDQU64Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVDQU64Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 - ; CHECK: $ymm16 = VMOVDQU64Z256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVDQU64Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVDQU64Z256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVDQU64Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVDQU64Z256rr $ymm16 $ymm16 = VMOVDQU64Z256rr $ymm16 ; CHECK: $ymm16 = VMOVDQU64Z256rr_REV $ymm16 $ymm16 = VMOVDQU64Z256rr_REV $ymm16 ; CHECK: VMOVDQU8Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVDQU8Z256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 - ; CHECK: $ymm16 = VMOVDQU8Z256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVDQU8Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVDQU8Z256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVDQU8Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVDQU8Z256rr $ymm16 $ymm16 = VMOVDQU8Z256rr $ymm16 ; CHECK: $ymm16 = VMOVDQU8Z256rr_REV $ymm16 $ymm16 = VMOVDQU8Z256rr_REV $ymm16 - ; CHECK: $ymm16 = VMOVNTDQAZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVNTDQAZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVNTDQAZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVNTDQAZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: VMOVNTDQZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVNTDQZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 ; CHECK: VMOVNTPDZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVNTPDZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 ; CHECK: VMOVNTPSZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVNTPSZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 - ; CHECK: $ymm16 = VMOVSHDUPZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVSHDUPZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVSHDUPZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVSHDUPZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVSHDUPZ256rr $ymm16 $ymm16 = VMOVSHDUPZ256rr $ymm16 - ; CHECK: $ymm16 = VMOVSLDUPZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVSLDUPZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVSLDUPZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVSLDUPZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVSLDUPZ256rr $ymm16 $ymm16 = VMOVSLDUPZ256rr $ymm16 ; CHECK: VMOVUPDZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVUPDZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 - ; CHECK: $ymm16 = VMOVUPDZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VMOVUPDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VMOVUPDZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VMOVUPDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VMOVUPDZ256rr $ymm16 $ymm16 = VMOVUPDZ256rr $ymm16 ; CHECK: $ymm16 = VMOVUPDZ256rr_REV $ymm16 $ymm16 = VMOVUPDZ256rr_REV $ymm16 ; CHECK: VMOVUPSZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 VMOVUPSZ256mr $rdi, 1, $noreg, 0, $noreg, $ymm16 - ; CHECK: $ymm16 = VPANDDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPANDDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPANDDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPANDDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPANDDZ256rr $ymm16, $ymm1 $ymm16 = VPANDDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPANDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPANDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPANDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPANDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPANDQZ256rr $ymm16, $ymm1 $ymm16 = VPANDQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPANDNDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPANDNDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPANDNDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPANDNDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPANDNDZ256rr $ymm16, $ymm1 $ymm16 = VPANDNDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPANDNQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPANDNQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPANDNQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPANDNQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPANDNQZ256rr $ymm16, $ymm1 $ymm16 = VPANDNQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPAVGBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPAVGBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPAVGBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPAVGBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPAVGBZ256rr $ymm16, $ymm1 $ymm16 = VPAVGBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPAVGWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPAVGWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPAVGWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPAVGWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPAVGWZ256rr $ymm16, $ymm1 $ymm16 = VPAVGWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPADDBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPADDBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPADDBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPADDBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPADDBZ256rr $ymm16, $ymm1 $ymm16 = VPADDBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPADDDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPADDDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPADDDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPADDDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPADDDZ256rr $ymm16, $ymm1 $ymm16 = VPADDDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPADDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPADDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPADDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPADDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPADDQZ256rr $ymm16, $ymm1 $ymm16 = VPADDQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPADDSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPADDSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPADDSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPADDSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPADDSBZ256rr $ymm16, $ymm1 $ymm16 = VPADDSBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPADDSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPADDSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPADDSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPADDSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPADDSWZ256rr $ymm16, $ymm1 $ymm16 = VPADDSWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPADDUSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPADDUSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPADDUSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPADDUSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPADDUSBZ256rr $ymm16, $ymm1 $ymm16 = VPADDUSBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPADDUSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPADDUSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPADDUSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPADDUSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPADDUSWZ256rr $ymm16, $ymm1 $ymm16 = VPADDUSWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPADDWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPADDWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPADDWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPADDWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPADDWZ256rr $ymm16, $ymm1 $ymm16 = VPADDWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VMULPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VMULPDZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VMULPDZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VMULPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VMULPSZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VMULPSZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VORPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VORPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VORPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VORPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VORPDZ256rr $ymm16, $ymm1 $ymm16 = VORPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VORPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VORPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VORPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VORPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VORPSZ256rr $ymm16, $ymm1 $ymm16 = VORPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMADDUBSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMADDUBSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMADDUBSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMADDUBSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMADDUBSWZ256rr $ymm16, $ymm1 $ymm16 = VPMADDUBSWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMADDWDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMADDWDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMADDWDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMADDWDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMADDWDZ256rr $ymm16, $ymm1 $ymm16 = VPMADDWDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMAXSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMAXSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMAXSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMAXSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMAXSBZ256rr $ymm16, $ymm1 $ymm16 = VPMAXSBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMAXSDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMAXSDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMAXSDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMAXSDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMAXSDZ256rr $ymm16, $ymm1 $ymm16 = VPMAXSDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMAXSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMAXSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMAXSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMAXSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMAXSWZ256rr $ymm16, $ymm1 $ymm16 = VPMAXSWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMAXUBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMAXUBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMAXUBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMAXUBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMAXUBZ256rr $ymm16, $ymm1 $ymm16 = VPMAXUBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMAXUDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMAXUDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMAXUDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMAXUDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMAXUDZ256rr $ymm16, $ymm1 $ymm16 = VPMAXUDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMAXUWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMAXUWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMAXUWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMAXUWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMAXUWZ256rr $ymm16, $ymm1 $ymm16 = VPMAXUWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMINSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMINSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMINSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMINSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMINSBZ256rr $ymm16, $ymm1 $ymm16 = VPMINSBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMINSDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMINSDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMINSDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMINSDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMINSDZ256rr $ymm16, $ymm1 $ymm16 = VPMINSDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMINSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMINSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMINSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMINSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMINSWZ256rr $ymm16, $ymm1 $ymm16 = VPMINSWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMINUBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMINUBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMINUBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMINUBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMINUBZ256rr $ymm16, $ymm1 $ymm16 = VPMINUBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMINUDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMINUDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMINUDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMINUDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMINUDZ256rr $ymm16, $ymm1 $ymm16 = VPMINUDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMINUWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMINUWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMINUWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMINUWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMINUWZ256rr $ymm16, $ymm1 $ymm16 = VPMINUWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMULDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMULDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMULDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMULDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMULDQZ256rr $ymm16, $ymm1 $ymm16 = VPMULDQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMULHRSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMULHRSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMULHRSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMULHRSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMULHRSWZ256rr $ymm16, $ymm1 $ymm16 = VPMULHRSWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMULHUWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMULHUWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMULHUWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMULHUWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMULHUWZ256rr $ymm16, $ymm1 $ymm16 = VPMULHUWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMULHWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMULHWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMULHWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMULHWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMULHWZ256rr $ymm16, $ymm1 $ymm16 = VPMULHWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMULLDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMULLDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMULLDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMULLDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMULLDZ256rr $ymm16, $ymm1 $ymm16 = VPMULLDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMULLWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMULLWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMULLWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMULLWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMULLWZ256rr $ymm16, $ymm1 $ymm16 = VPMULLWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPMULUDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPMULUDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMULUDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMULUDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMULUDQZ256rr $ymm16, $ymm1 $ymm16 = VPMULUDQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPORDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPORDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPORDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPORDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPORDZ256rr $ymm16, $ymm1 $ymm16 = VPORDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPORQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPORQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPORQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPORQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPORQZ256rr $ymm16, $ymm1 $ymm16 = VPORQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPSUBBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSUBBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSUBBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSUBBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSUBBZ256rr $ymm16, $ymm1 $ymm16 = VPSUBBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPSUBDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSUBDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSUBDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSUBDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSUBDZ256rr $ymm16, $ymm1 $ymm16 = VPSUBDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPSUBQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSUBQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSUBQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSUBQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSUBQZ256rr $ymm16, $ymm1 $ymm16 = VPSUBQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPSUBSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSUBSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSUBSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSUBSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSUBSBZ256rr $ymm16, $ymm1 $ymm16 = VPSUBSBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPSUBSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSUBSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSUBSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSUBSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSUBSWZ256rr $ymm16, $ymm1 $ymm16 = VPSUBSWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPSUBUSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSUBUSBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSUBUSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSUBUSBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSUBUSBZ256rr $ymm16, $ymm1 $ymm16 = VPSUBUSBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPSUBUSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSUBUSWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSUBUSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSUBUSWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSUBUSWZ256rr $ymm16, $ymm1 $ymm16 = VPSUBUSWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPSUBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSUBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSUBWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSUBWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSUBWZ256rr $ymm16, $ymm1 $ymm16 = VPSUBWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPXORDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPXORDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPXORDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPXORDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPXORDZ256rr $ymm16, $ymm1 $ymm16 = VPXORDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPXORQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPXORQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPXORQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPXORQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPXORQZ256rr $ymm16, $ymm1 $ymm16 = VPXORQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VADDPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VADDPDZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VADDPDZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VADDPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VADDPSZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VADDPSZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VANDNPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VANDNPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VANDNPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VANDNPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VANDNPDZ256rr $ymm16, $ymm1 $ymm16 = VANDNPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VANDNPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VANDNPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VANDNPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VANDNPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VANDNPSZ256rr $ymm16, $ymm1 $ymm16 = VANDNPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VANDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VANDPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VANDPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VANDPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VANDPDZ256rr $ymm16, $ymm1 $ymm16 = VANDPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VANDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VANDPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VANDPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VANDPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VANDPSZ256rr $ymm16, $ymm1 $ymm16 = VANDPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VDIVPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VDIVPDZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VDIVPDZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VDIVPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VDIVPSZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VDIVPSZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VMAXCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VMAXCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMAXCPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VMAXCPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VMAXCPDZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VMAXCPDZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VMAXCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VMAXCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMAXCPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VMAXCPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VMAXCPSZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VMAXCPSZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VMAXPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VMAXPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMAXPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VMAXPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VMAXPDZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VMAXPDZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VMAXPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VMAXPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMAXPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VMAXPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VMAXPSZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VMAXPSZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VMINCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VMINCPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMINCPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VMINCPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VMINCPDZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VMINCPDZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VMINCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VMINCPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMINCPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VMINCPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VMINCPSZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VMINCPSZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VMINPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VMINPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMINPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VMINPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VMINPDZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VMINPDZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VMINPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VMINPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VMINPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VMINPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VMINPSZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VMINPSZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VXORPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VXORPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VXORPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VXORPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VXORPDZ256rr $ymm16, $ymm1 $ymm16 = VXORPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VXORPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VXORPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VXORPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VXORPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VXORPSZ256rr $ymm16, $ymm1 $ymm16 = VXORPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPACKSSDWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPACKSSDWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPACKSSDWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPACKSSDWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPACKSSDWZ256rr $ymm16, $ymm1 $ymm16 = VPACKSSDWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPACKSSWBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPACKSSWBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPACKSSWBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPACKSSWBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPACKSSWBZ256rr $ymm16, $ymm1 $ymm16 = VPACKSSWBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPACKUSDWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPACKUSDWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPACKUSDWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPACKUSDWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPACKUSDWZ256rr $ymm16, $ymm1 $ymm16 = VPACKUSDWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPACKUSWBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPACKUSWBZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPACKUSWBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPACKUSWBZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPACKUSWBZ256rr $ymm16, $ymm1 $ymm16 = VPACKUSWBZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VUNPCKHPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VUNPCKHPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VUNPCKHPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VUNPCKHPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VUNPCKHPDZ256rr $ymm16, $ymm1 $ymm16 = VUNPCKHPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VUNPCKHPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VUNPCKHPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VUNPCKHPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VUNPCKHPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VUNPCKHPSZ256rr $ymm16, $ymm1 $ymm16 = VUNPCKHPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VUNPCKLPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VUNPCKLPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VUNPCKLPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VUNPCKLPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VUNPCKLPDZ256rr $ymm16, $ymm1 $ymm16 = VUNPCKLPDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VUNPCKLPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VUNPCKLPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VUNPCKLPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VUNPCKLPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VUNPCKLPSZ256rr $ymm16, $ymm1 $ymm16 = VUNPCKLPSZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VSUBPDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VSUBPDZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VSUBPDZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $ymm16 = VSUBPSZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $ymm16 = VSUBPSZ256rr $ymm16, $ymm1, implicit $mxcsr $ymm16 = VSUBPSZ256rr $ymm16, $ymm1, implicit $mxcsr - ; CHECK: $ymm16 = VPUNPCKHBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPUNPCKHBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPUNPCKHBWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPUNPCKHBWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPUNPCKHBWZ256rr $ymm16, $ymm1 $ymm16 = VPUNPCKHBWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPUNPCKHDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPUNPCKHDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPUNPCKHDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPUNPCKHDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPUNPCKHDQZ256rr $ymm16, $ymm1 $ymm16 = VPUNPCKHDQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPUNPCKHQDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPUNPCKHQDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPUNPCKHQDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPUNPCKHQDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPUNPCKHQDQZ256rr $ymm16, $ymm1 $ymm16 = VPUNPCKHQDQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPUNPCKHWDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPUNPCKHWDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPUNPCKHWDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPUNPCKHWDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPUNPCKHWDZ256rr $ymm16, $ymm1 $ymm16 = VPUNPCKHWDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPUNPCKLBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPUNPCKLBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPUNPCKLBWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPUNPCKLBWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPUNPCKLBWZ256rr $ymm16, $ymm1 $ymm16 = VPUNPCKLBWZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPUNPCKLDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPUNPCKLDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPUNPCKLDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPUNPCKLDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPUNPCKLDQZ256rr $ymm16, $ymm1 $ymm16 = VPUNPCKLDQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPUNPCKLQDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPUNPCKLQDQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPUNPCKLQDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPUNPCKLQDQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPUNPCKLQDQZ256rr $ymm16, $ymm1 $ymm16 = VPUNPCKLQDQZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPUNPCKLWDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPUNPCKLWDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPUNPCKLWDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPUNPCKLWDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPUNPCKLWDZ256rr $ymm16, $ymm1 $ymm16 = VPUNPCKLWDZ256rr $ymm16, $ymm1 ; CHECK: $ymm16 = VFMADD132PDZ256m $ymm16, $ymm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr @@ -2968,146 +2968,146 @@ body: | $ymm16 = VFNMSUB231PSZ256r $ymm16, $ymm1, $ymm2, implicit $mxcsr ; CHECK: $ymm16 = VPSRADZ256ri $ymm16, 7 $ymm16 = VPSRADZ256ri $ymm16, 7 - ; CHECK: $ymm16 = VPSRADZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSRADZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSRADZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSRADZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSRADZ256rr $ymm16, $xmm1 $ymm16 = VPSRADZ256rr $ymm16, $xmm1 - ; CHECK: $ymm16 = VPSRAVDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSRAVDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSRAVDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSRAVDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSRAVDZ256rr $ymm16, $ymm1 $ymm16 = VPSRAVDZ256rr $ymm16, $ymm1 ; CHECK: $ymm16 = VPSRAWZ256ri $ymm16, 7 $ymm16 = VPSRAWZ256ri $ymm16, 7 - ; CHECK: $ymm16 = VPSRAWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSRAWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSRAWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSRAWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSRAWZ256rr $ymm16, $xmm1 $ymm16 = VPSRAWZ256rr $ymm16, $xmm1 ; CHECK: $ymm16 = VPSRLDQZ256ri $ymm16, 7 $ymm16 = VPSRLDQZ256ri $ymm16, 7 ; CHECK: $ymm16 = VPSRLDZ256ri $ymm16, 7 $ymm16 = VPSRLDZ256ri $ymm16, 7 - ; CHECK: $ymm16 = VPSRLDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSRLDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSRLDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSRLDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSRLDZ256rr $ymm16, $xmm1 $ymm16 = VPSRLDZ256rr $ymm16, $xmm1 ; CHECK: $ymm16 = VPSRLQZ256ri $ymm16, 7 $ymm16 = VPSRLQZ256ri $ymm16, 7 - ; CHECK: $ymm16 = VPSRLQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSRLQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSRLQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSRLQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSRLQZ256rr $ymm16, $xmm1 $ymm16 = VPSRLQZ256rr $ymm16, $xmm1 - ; CHECK: $ymm16 = VPSRLVDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSRLVDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSRLVDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSRLVDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSRLVDZ256rr $ymm16, $ymm1 $ymm16 = VPSRLVDZ256rr $ymm16, $ymm1 - ; CHECK: $ymm16 = VPSRLVQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSRLVQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSRLVQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSRLVQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSRLVQZ256rr $ymm16, $ymm1 $ymm16 = VPSRLVQZ256rr $ymm16, $ymm1 ; CHECK: $ymm16 = VPSRLWZ256ri $ymm16, 7 $ymm16 = VPSRLWZ256ri $ymm16, 7 - ; CHECK: $ymm16 = VPSRLWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSRLWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSRLWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSRLWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSRLWZ256rr $ymm16, $xmm1 $ymm16 = VPSRLWZ256rr $ymm16, $xmm1 - ; CHECK: $ymm16 = VPMOVSXBDZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVSXBDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVSXBDZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVSXBDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVSXBDZ256rr $xmm0 $ymm16 = VPMOVSXBDZ256rr $xmm0 - ; CHECK: $ymm16 = VPMOVSXBQZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVSXBQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVSXBQZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVSXBQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVSXBQZ256rr $xmm0 $ymm16 = VPMOVSXBQZ256rr $xmm0 - ; CHECK: $ymm16 = VPMOVSXBWZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVSXBWZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVSXBWZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVSXBWZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVSXBWZ256rr $xmm0 $ymm16 = VPMOVSXBWZ256rr $xmm0 - ; CHECK: $ymm16 = VPMOVSXDQZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVSXDQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVSXDQZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVSXDQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVSXDQZ256rr $xmm0 $ymm16 = VPMOVSXDQZ256rr $xmm0 - ; CHECK: $ymm16 = VPMOVSXWDZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVSXWDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVSXWDZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVSXWDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVSXWDZ256rr $xmm0 $ymm16 = VPMOVSXWDZ256rr $xmm0 - ; CHECK: $ymm16 = VPMOVSXWQZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVSXWQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVSXWQZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVSXWQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVSXWQZ256rr $xmm0 $ymm16 = VPMOVSXWQZ256rr $xmm0 - ; CHECK: $ymm16 = VPMOVZXBDZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVZXBDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVZXBDZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVZXBDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVZXBDZ256rr $xmm0 $ymm16 = VPMOVZXBDZ256rr $xmm0 - ; CHECK: $ymm16 = VPMOVZXBQZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVZXBQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVZXBQZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVZXBQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVZXBQZ256rr $xmm0 $ymm16 = VPMOVZXBQZ256rr $xmm0 - ; CHECK: $ymm16 = VPMOVZXBWZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVZXBWZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVZXBWZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVZXBWZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVZXBWZ256rr $xmm0 $ymm16 = VPMOVZXBWZ256rr $xmm0 - ; CHECK: $ymm16 = VPMOVZXDQZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVZXDQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVZXDQZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVZXDQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVZXDQZ256rr $xmm0 $ymm16 = VPMOVZXDQZ256rr $xmm0 - ; CHECK: $ymm16 = VPMOVZXWDZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVZXWDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVZXWDZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVZXWDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVZXWDZ256rr $xmm0 $ymm16 = VPMOVZXWDZ256rr $xmm0 - ; CHECK: $ymm16 = VPMOVZXWQZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPMOVZXWQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPMOVZXWQZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPMOVZXWQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPMOVZXWQZ256rr $xmm0 $ymm16 = VPMOVZXWQZ256rr $xmm0 - ; CHECK: $ymm16 = VBROADCASTF32X2Z256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VBROADCASTF32X2Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VBROADCASTF32X2Z256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VBROADCASTF32X2Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VBROADCASTF32X2Z256rr $xmm16 $ymm16 = VBROADCASTF32X2Z256rr $xmm16 - ; CHECK: $ymm16 = VBROADCASTF32X4Z256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VBROADCASTF32X4Z256rm $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VBROADCASTSDZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VBROADCASTSDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VBROADCASTF32X4Z256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VBROADCASTF32X4Z256rm $rip, 1, $noreg, 0, $noreg + ; CHECK: $ymm16 = VBROADCASTSDZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VBROADCASTSDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VBROADCASTSDZ256rr $xmm0 $ymm16 = VBROADCASTSDZ256rr $xmm0 - ; CHECK: $ymm16 = VBROADCASTSSZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VBROADCASTSSZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VBROADCASTSSZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VBROADCASTSSZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VBROADCASTSSZ256rr $xmm0 $ymm16 = VBROADCASTSSZ256rr $xmm0 - ; CHECK: $ymm16 = VPBROADCASTBZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPBROADCASTBZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPBROADCASTBZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPBROADCASTBZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPBROADCASTBZ256rr $xmm0 $ymm16 = VPBROADCASTBZ256rr $xmm0 - ; CHECK: $ymm16 = VPBROADCASTDZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPBROADCASTDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPBROADCASTDZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPBROADCASTDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPBROADCASTDZ256rr $xmm0 $ymm16 = VPBROADCASTDZ256rr $xmm0 - ; CHECK: $ymm16 = VPBROADCASTWZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPBROADCASTWZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPBROADCASTWZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPBROADCASTWZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPBROADCASTWZ256rr $xmm0 $ymm16 = VPBROADCASTWZ256rr $xmm0 - ; CHECK: $ymm16 = VBROADCASTI32X4Z256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VBROADCASTI32X4Z256rm $rip, 1, $rax, 0, $noreg - ; CHECK: $ymm16 = VBROADCASTI32X2Z256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VBROADCASTI32X2Z256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VBROADCASTI32X4Z256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VBROADCASTI32X4Z256rm $rip, 1, $noreg, 0, $noreg + ; CHECK: $ymm16 = VBROADCASTI32X2Z256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VBROADCASTI32X2Z256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VBROADCASTI32X2Z256rr $xmm16 $ymm16 = VBROADCASTI32X2Z256rr $xmm16 - ; CHECK: $ymm16 = VPBROADCASTQZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPBROADCASTQZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPBROADCASTQZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPBROADCASTQZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPBROADCASTQZ256rr $xmm0 $ymm16 = VPBROADCASTQZ256rr $xmm0 - ; CHECK: $ymm16 = VPABSBZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPABSBZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPABSBZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPABSBZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPABSBZ256rr $ymm16 $ymm16 = VPABSBZ256rr $ymm16 - ; CHECK: $ymm16 = VPABSDZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPABSDZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPABSDZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPABSDZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPABSDZ256rr $ymm16 $ymm16 = VPABSDZ256rr $ymm16 - ; CHECK: $ymm16 = VPABSWZ256rm $rip, 1, $rax, 0, $noreg - $ymm16 = VPABSWZ256rm $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPABSWZ256rm $rip, 1, $noreg, 0, $noreg + $ymm16 = VPABSWZ256rm $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPABSWZ256rr $ymm16 $ymm16 = VPABSWZ256rr $ymm16 - ; CHECK: $ymm16 = VPSADBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSADBWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSADBWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSADBWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSADBWZ256rr $ymm16, $ymm1 $ymm16 = VPSADBWZ256rr $ymm16, $ymm1 ; CHECK: $ymm16 = VPERMDZ256rm $ymm16, $rdi, 1, $noreg, 0, $noreg @@ -3146,28 +3146,28 @@ body: | $ymm16 = VPSLLDQZ256ri $ymm16, 14 ; CHECK: $ymm16 = VPSLLDZ256ri $ymm16, 7 $ymm16 = VPSLLDZ256ri $ymm16, 7 - ; CHECK: $ymm16 = VPSLLDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSLLDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSLLDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSLLDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSLLDZ256rr $ymm16, $xmm16 $ymm16 = VPSLLDZ256rr $ymm16, $xmm16 ; CHECK: $ymm16 = VPSLLQZ256ri $ymm16, 7 $ymm16 = VPSLLQZ256ri $ymm16, 7 - ; CHECK: $ymm16 = VPSLLQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSLLQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSLLQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSLLQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSLLQZ256rr $ymm16, $xmm16 $ymm16 = VPSLLQZ256rr $ymm16, $xmm16 - ; CHECK: $ymm16 = VPSLLVDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSLLVDZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSLLVDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSLLVDZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSLLVDZ256rr $ymm16, $ymm16 $ymm16 = VPSLLVDZ256rr $ymm16, $ymm16 - ; CHECK: $ymm16 = VPSLLVQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSLLVQZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSLLVQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSLLVQZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSLLVQZ256rr $ymm16, $ymm16 $ymm16 = VPSLLVQZ256rr $ymm16, $ymm16 ; CHECK: $ymm16 = VPSLLWZ256ri $ymm16, 7 $ymm16 = VPSLLWZ256ri $ymm16, 7 - ; CHECK: $ymm16 = VPSLLWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg - $ymm16 = VPSLLWZ256rm $ymm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $ymm16 = VPSLLWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg + $ymm16 = VPSLLWZ256rm $ymm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $ymm16 = VPSLLWZ256rr $ymm16, $xmm16 $ymm16 = VPSLLWZ256rr $ymm16, $xmm16 ; CHECK: $ymm16 = VCVTDQ2PDZ256rm $rdi, 1, $noreg, 0, $noreg @@ -3244,48 +3244,48 @@ body: | $ymm16 = VPSHUFLWZ256mi $rdi, 1, $noreg, 0, $noreg, -24 ; CHECK: $ymm16 = VPSHUFLWZ256ri $ymm16, -24 $ymm16 = VPSHUFLWZ256ri $ymm16, -24 - ; CHECK: $ymm16 = VSHUFPDZ256rmi $ymm16, $rip, 1, $rax, 0, $noreg, -24 - $ymm16 = VSHUFPDZ256rmi $ymm16, $rip, 1, $rax, 0, $noreg, -24 + ; CHECK: $ymm16 = VSHUFPDZ256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, -24 + $ymm16 = VSHUFPDZ256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, -24 ; CHECK: $ymm16 = VSHUFPDZ256rri $ymm16, $ymm1, -24 $ymm16 = VSHUFPDZ256rri $ymm16, $ymm1, -24 - ; CHECK: $ymm16 = VSHUFPSZ256rmi $ymm16, $rip, 1, $rax, 0, $noreg, -24 - $ymm16 = VSHUFPSZ256rmi $ymm16, $rip, 1, $rax, 0, $noreg, -24 + ; CHECK: $ymm16 = VSHUFPSZ256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, -24 + $ymm16 = VSHUFPSZ256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, -24 ; CHECK: $ymm16 = VSHUFPSZ256rri $ymm16, $ymm1, -24 $ymm16 = VSHUFPSZ256rri $ymm16, $ymm1, -24 - ; CHECK: $ymm16 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $ymm16 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm16 = VRNDSCALEPDZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $ymm16 = VRNDSCALEPDZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $ymm16 = VRNDSCALEPDZ256rri $ymm16, 15, implicit $mxcsr $ymm16 = VRNDSCALEPDZ256rri $ymm16, 15, implicit $mxcsr - ; CHECK: $ymm16 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $ymm16 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm16 = VRNDSCALEPSZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $ymm16 = VRNDSCALEPSZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $ymm16 = VRNDSCALEPSZ256rri $ymm16, 15, implicit $mxcsr $ymm16 = VRNDSCALEPSZ256rri $ymm16, 15, implicit $mxcsr - ; CHECK: $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr - $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr + $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr ; CHECK: $ymm0 = VRNDSCALEPDZ256rri $ymm0, 31, implicit $mxcsr $ymm0 = VRNDSCALEPDZ256rri $ymm0, 31, implicit $mxcsr - ; CHECK: $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr - $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr + $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr ; CHECK: $ymm0 = VRNDSCALEPSZ256rri $ymm0, 31, implicit $mxcsr $ymm0 = VRNDSCALEPSZ256rri $ymm0, 31, implicit $mxcsr - ; CHECK: $ymm16 = VSHUFF32X4Z256rmi $ymm16, $rip, 1, $rax, 0, $noreg, 228 - $ymm16 = VSHUFF32X4Z256rmi $ymm16, $rip, 1, $rax, 0, $noreg, 228 + ; CHECK: $ymm16 = VSHUFF32X4Z256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, 228 + $ymm16 = VSHUFF32X4Z256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, 228 ; CHECK: $ymm16 = VSHUFF32X4Z256rri $ymm16, $ymm1, 228 $ymm16 = VSHUFF32X4Z256rri $ymm16, $ymm1, 228 - ; CHECK: $ymm16 = VSHUFF64X2Z256rmi $ymm16, $rip, 1, $rax, 0, $noreg, 228 - $ymm16 = VSHUFF64X2Z256rmi $ymm16, $rip, 1, $rax, 0, $noreg, 228 + ; CHECK: $ymm16 = VSHUFF64X2Z256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, 228 + $ymm16 = VSHUFF64X2Z256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, 228 ; CHECK: $ymm16 = VSHUFF64X2Z256rri $ymm16, $ymm1, 228 $ymm16 = VSHUFF64X2Z256rri $ymm16, $ymm1, 228 - ; CHECK: $ymm16 = VSHUFI32X4Z256rmi $ymm16, $rip, 1, $rax, 0, $noreg, 228 - $ymm16 = VSHUFI32X4Z256rmi $ymm16, $rip, 1, $rax, 0, $noreg, 228 + ; CHECK: $ymm16 = VSHUFI32X4Z256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, 228 + $ymm16 = VSHUFI32X4Z256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, 228 ; CHECK: $ymm16 = VSHUFI32X4Z256rri $ymm16, $ymm1, 228 $ymm16 = VSHUFI32X4Z256rri $ymm16, $ymm1, 228 - ; CHECK: $ymm16 = VSHUFI64X2Z256rmi $ymm16, $rip, 1, $rax, 0, $noreg, 228 - $ymm16 = VSHUFI64X2Z256rmi $ymm16, $rip, 1, $rax, 0, $noreg, 228 + ; CHECK: $ymm16 = VSHUFI64X2Z256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, 228 + $ymm16 = VSHUFI64X2Z256rmi $ymm16, $rip, 1, $noreg, 0, $noreg, 228 ; CHECK: $ymm16 = VSHUFI64X2Z256rri $ymm16, $ymm1, 228 $ymm16 = VSHUFI64X2Z256rri $ymm16, $ymm1, 228 - RET 0, $zmm0, $zmm1 + RETQ ... --- # CHECK-LABEL: name: evex_z128_to_evex_test @@ -3296,68 +3296,68 @@ body: | bb.0: ; CHECK: VMOVAPDZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVAPDZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVAPDZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVAPDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVAPDZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVAPDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVAPDZ128rr $xmm16 $xmm16 = VMOVAPDZ128rr $xmm16 ; CHECK: VMOVAPSZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVAPSZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVAPSZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVAPSZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVAPSZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVAPSZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVAPSZ128rr $xmm16 $xmm16 = VMOVAPSZ128rr $xmm16 ; CHECK: VMOVDQA32Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVDQA32Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVDQA32Z128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVDQA32Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVDQA32Z128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVDQA32Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVDQA32Z128rr $xmm16 $xmm16 = VMOVDQA32Z128rr $xmm16 ; CHECK: VMOVDQA64Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVDQA64Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVDQA64Z128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVDQA64Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVDQA64Z128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVDQA64Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVDQA64Z128rr $xmm16 $xmm16 = VMOVDQA64Z128rr $xmm16 ; CHECK: VMOVDQU16Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVDQU16Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVDQU16Z128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVDQU16Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVDQU16Z128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVDQU16Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVDQU16Z128rr $xmm16 $xmm16 = VMOVDQU16Z128rr $xmm16 ; CHECK: VMOVDQU32Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVDQU32Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVDQU32Z128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVDQU32Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVDQU32Z128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVDQU32Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVDQU32Z128rr $xmm16 $xmm16 = VMOVDQU32Z128rr $xmm16 ; CHECK: VMOVDQU64Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVDQU64Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVDQU64Z128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVDQU64Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVDQU64Z128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVDQU64Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVDQU64Z128rr $xmm16 $xmm16 = VMOVDQU64Z128rr $xmm16 ; CHECK: VMOVDQU8Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVDQU8Z128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVDQU8Z128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVDQU8Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVDQU8Z128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVDQU8Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVDQU8Z128rr $xmm16 $xmm16 = VMOVDQU8Z128rr $xmm16 ; CHECK: $xmm16 = VMOVDQU8Z128rr_REV $xmm16 $xmm16 = VMOVDQU8Z128rr_REV $xmm16 - ; CHECK: $xmm16 = VMOVNTDQAZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVNTDQAZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVNTDQAZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVNTDQAZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: VMOVUPDZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVUPDZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVUPDZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVUPDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVUPDZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVUPDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVUPDZ128rr $xmm16 $xmm16 = VMOVUPDZ128rr $xmm16 ; CHECK: $xmm16 = VMOVUPDZ128rr_REV $xmm16 $xmm16 = VMOVUPDZ128rr_REV $xmm16 ; CHECK: VMOVUPSZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVUPSZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVUPSZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVUPSZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVUPSZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVUPSZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVUPSZ128rr $xmm16 $xmm16 = VMOVUPSZ128rr $xmm16 ; CHECK: $xmm16 = VMOVUPSZ128rr_REV $xmm16 @@ -3382,52 +3382,52 @@ body: | $xmm16 = VMOVDQU32Z128rr_REV $xmm16 ; CHECK: $xmm16 = VMOVDQU64Z128rr_REV $xmm16 $xmm16 = VMOVDQU64Z128rr_REV $xmm16 - ; CHECK: $xmm16 = VPMOVSXBDZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVSXBDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVSXBDZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVSXBDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVSXBDZ128rr $xmm16 $xmm16 = VPMOVSXBDZ128rr $xmm16 - ; CHECK: $xmm16 = VPMOVSXBQZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVSXBQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVSXBQZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVSXBQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVSXBQZ128rr $xmm16 $xmm16 = VPMOVSXBQZ128rr $xmm16 - ; CHECK: $xmm16 = VPMOVSXBWZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVSXBWZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVSXBWZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVSXBWZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVSXBWZ128rr $xmm16 $xmm16 = VPMOVSXBWZ128rr $xmm16 - ; CHECK: $xmm16 = VPMOVSXDQZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVSXDQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVSXDQZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVSXDQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVSXDQZ128rr $xmm16 $xmm16 = VPMOVSXDQZ128rr $xmm16 - ; CHECK: $xmm16 = VPMOVSXWDZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVSXWDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVSXWDZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVSXWDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVSXWDZ128rr $xmm16 $xmm16 = VPMOVSXWDZ128rr $xmm16 - ; CHECK: $xmm16 = VPMOVSXWQZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVSXWQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVSXWQZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVSXWQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVSXWQZ128rr $xmm16 $xmm16 = VPMOVSXWQZ128rr $xmm16 - ; CHECK: $xmm16 = VPMOVZXBDZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVZXBDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVZXBDZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVZXBDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVZXBDZ128rr $xmm16 $xmm16 = VPMOVZXBDZ128rr $xmm16 - ; CHECK: $xmm16 = VPMOVZXBQZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVZXBQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVZXBQZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVZXBQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVZXBQZ128rr $xmm16 $xmm16 = VPMOVZXBQZ128rr $xmm16 - ; CHECK: $xmm16 = VPMOVZXBWZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVZXBWZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVZXBWZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVZXBWZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVZXBWZ128rr $xmm16 $xmm16 = VPMOVZXBWZ128rr $xmm16 - ; CHECK: $xmm16 = VPMOVZXDQZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVZXDQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVZXDQZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVZXDQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVZXDQZ128rr $xmm16 $xmm16 = VPMOVZXDQZ128rr $xmm16 - ; CHECK: $xmm16 = VPMOVZXWDZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVZXWDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVZXWDZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVZXWDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVZXWDZ128rr $xmm16 $xmm16 = VPMOVZXWDZ128rr $xmm16 - ; CHECK: $xmm16 = VPMOVZXWQZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPMOVZXWQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMOVZXWQZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMOVZXWQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMOVZXWQZ128rr $xmm16 $xmm16 = VPMOVZXWQZ128rr $xmm16 ; CHECK: VMOVHPDZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 @@ -3446,352 +3446,352 @@ body: | VMOVLPSZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16 ; CHECK: $xmm16 = VMOVLPSZ128rm $xmm16, $rdi, 1, $noreg, 0, $noreg $xmm16 = VMOVLPSZ128rm $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VMAXCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMAXCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXCPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXCPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMAXCPDZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMAXCPDZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMAXCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMAXCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXCPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXCPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMAXCPSZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMAXCPSZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMAXPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMAXPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMAXPDZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMAXPDZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMAXPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMAXPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMAXPSZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMAXPSZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMINCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMINCPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINCPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMINCPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMINCPDZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMINCPDZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMINCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMINCPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINCPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMINCPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMINCPSZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMINCPSZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMINPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMINPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMINPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMINPDZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMINPDZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMINPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMINPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMINPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMINPSZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMINPSZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMULPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMULPDZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMULPDZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMULPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMULPSZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMULPSZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VORPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VORPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VORPDZ128rr $xmm16, $xmm1 $xmm16 = VORPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VORPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VORPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VORPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VORPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VORPSZ128rr $xmm16, $xmm1 $xmm16 = VORPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPADDBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPADDBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPADDBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPADDBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPADDBZ128rr $xmm16, $xmm1 $xmm16 = VPADDBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPADDDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPADDDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPADDDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPADDDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPADDDZ128rr $xmm16, $xmm1 $xmm16 = VPADDDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPADDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPADDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPADDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPADDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPADDQZ128rr $xmm16, $xmm1 $xmm16 = VPADDQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPADDSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPADDSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPADDSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPADDSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPADDSBZ128rr $xmm16, $xmm1 $xmm16 = VPADDSBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPADDSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPADDSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPADDSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPADDSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPADDSWZ128rr $xmm16, $xmm1 $xmm16 = VPADDSWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPADDUSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPADDUSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPADDUSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPADDUSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPADDUSBZ128rr $xmm16, $xmm1 $xmm16 = VPADDUSBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPADDUSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPADDUSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPADDUSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPADDUSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPADDUSWZ128rr $xmm16, $xmm1 $xmm16 = VPADDUSWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPADDWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPADDWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPADDWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPADDWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPADDWZ128rr $xmm16, $xmm1 $xmm16 = VPADDWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPANDDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPANDDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPANDDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPANDDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPANDDZ128rr $xmm16, $xmm1 $xmm16 = VPANDDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPANDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPANDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPANDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPANDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPANDQZ128rr $xmm16, $xmm1 $xmm16 = VPANDQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPANDNDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPANDNDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPANDNDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPANDNDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPANDNDZ128rr $xmm16, $xmm1 $xmm16 = VPANDNDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPANDNQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPANDNQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPANDNQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPANDNQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPANDNQZ128rr $xmm16, $xmm1 $xmm16 = VPANDNQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPAVGBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPAVGBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPAVGBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPAVGBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPAVGBZ128rr $xmm16, $xmm1 $xmm16 = VPAVGBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPAVGWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPAVGWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPAVGWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPAVGWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPAVGWZ128rr $xmm16, $xmm1 $xmm16 = VPAVGWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMAXSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMAXSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMAXSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMAXSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMAXSBZ128rr $xmm16, $xmm1 $xmm16 = VPMAXSBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMAXSDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMAXSDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMAXSDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMAXSDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMAXSDZ128rr $xmm16, $xmm1 $xmm16 = VPMAXSDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMAXSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMAXSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMAXSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMAXSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMAXSWZ128rr $xmm16, $xmm1 $xmm16 = VPMAXSWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMAXUBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMAXUBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMAXUBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMAXUBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMAXUBZ128rr $xmm16, $xmm1 $xmm16 = VPMAXUBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMAXUDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMAXUDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMAXUDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMAXUDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMAXUDZ128rr $xmm16, $xmm1 $xmm16 = VPMAXUDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMAXUWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMAXUWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMAXUWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMAXUWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMAXUWZ128rr $xmm16, $xmm1 $xmm16 = VPMAXUWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMINSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMINSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMINSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMINSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMINSBZ128rr $xmm16, $xmm1 $xmm16 = VPMINSBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMINSDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMINSDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMINSDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMINSDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMINSDZ128rr $xmm16, $xmm1 $xmm16 = VPMINSDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMINSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMINSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMINSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMINSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMINSWZ128rr $xmm16, $xmm1 $xmm16 = VPMINSWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMINUBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMINUBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMINUBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMINUBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMINUBZ128rr $xmm16, $xmm1 $xmm16 = VPMINUBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMINUDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMINUDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMINUDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMINUDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMINUDZ128rr $xmm16, $xmm1 $xmm16 = VPMINUDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMINUWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMINUWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMINUWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMINUWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMINUWZ128rr $xmm16, $xmm1 $xmm16 = VPMINUWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMULDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMULDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMULDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMULDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMULDQZ128rr $xmm16, $xmm1 $xmm16 = VPMULDQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMULHRSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMULHRSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMULHRSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMULHRSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMULHRSWZ128rr $xmm16, $xmm1 $xmm16 = VPMULHRSWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMULHUWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMULHUWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMULHUWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMULHUWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMULHUWZ128rr $xmm16, $xmm1 $xmm16 = VPMULHUWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMULHWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMULHWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMULHWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMULHWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMULHWZ128rr $xmm16, $xmm1 $xmm16 = VPMULHWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMULLDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMULLDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMULLDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMULLDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMULLDZ128rr $xmm16, $xmm1 $xmm16 = VPMULLDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMULLWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMULLWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMULLWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMULLWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMULLWZ128rr $xmm16, $xmm1 $xmm16 = VPMULLWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMULUDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMULUDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMULUDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMULUDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMULUDQZ128rr $xmm16, $xmm1 $xmm16 = VPMULUDQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPORDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPORDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPORDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPORDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPORDZ128rr $xmm16, $xmm1 $xmm16 = VPORDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPORQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPORQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPORQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPORQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPORQZ128rr $xmm16, $xmm1 $xmm16 = VPORQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPSUBBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSUBBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSUBBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSUBBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSUBBZ128rr $xmm16, $xmm1 $xmm16 = VPSUBBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPSUBDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSUBDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSUBDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSUBDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSUBDZ128rr $xmm16, $xmm1 $xmm16 = VPSUBDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPSUBQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSUBQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSUBQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSUBQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSUBQZ128rr $xmm16, $xmm1 $xmm16 = VPSUBQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPSUBSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSUBSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSUBSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSUBSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSUBSBZ128rr $xmm16, $xmm1 $xmm16 = VPSUBSBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPSUBSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSUBSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSUBSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSUBSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSUBSWZ128rr $xmm16, $xmm1 $xmm16 = VPSUBSWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPSUBUSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSUBUSBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSUBUSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSUBUSBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSUBUSBZ128rr $xmm16, $xmm1 $xmm16 = VPSUBUSBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPSUBUSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSUBUSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSUBUSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSUBUSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSUBUSWZ128rr $xmm16, $xmm1 $xmm16 = VPSUBUSWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPSUBWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSUBWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSUBWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSUBWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSUBWZ128rr $xmm16, $xmm1 $xmm16 = VPSUBWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VADDPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VADDPDZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VADDPDZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VADDPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VADDPSZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VADDPSZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VANDNPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VANDNPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VANDNPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VANDNPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VANDNPDZ128rr $xmm16, $xmm1 $xmm16 = VANDNPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VANDNPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VANDNPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VANDNPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VANDNPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VANDNPSZ128rr $xmm16, $xmm1 $xmm16 = VANDNPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VANDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VANDPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VANDPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VANDPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VANDPDZ128rr $xmm16, $xmm1 $xmm16 = VANDPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VANDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VANDPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VANDPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VANDPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VANDPSZ128rr $xmm16, $xmm1 $xmm16 = VANDPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VDIVPDZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VDIVPDZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VDIVPSZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VDIVPSZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VPXORDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPXORDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPXORDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPXORDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPXORDZ128rr $xmm16, $xmm1 $xmm16 = VPXORDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPXORQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPXORQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPXORQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPXORQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPXORQZ128rr $xmm16, $xmm1 $xmm16 = VPXORQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VSUBPDZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VSUBPDZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VSUBPSZ128rr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VSUBPSZ128rr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VXORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VXORPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VXORPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VXORPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VXORPDZ128rr $xmm16, $xmm1 $xmm16 = VXORPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VXORPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VXORPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VXORPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VXORPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VXORPSZ128rr $xmm16, $xmm1 $xmm16 = VXORPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMADDUBSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMADDUBSWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMADDUBSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMADDUBSWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMADDUBSWZ128rr $xmm16, $xmm1 $xmm16 = VPMADDUBSWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPMADDWDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPMADDWDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPMADDWDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPMADDWDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPMADDWDZ128rr $xmm16, $xmm1 $xmm16 = VPMADDWDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPACKSSDWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPACKSSDWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPACKSSDWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPACKSSDWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPACKSSDWZ128rr $xmm16, $xmm1 $xmm16 = VPACKSSDWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPACKSSWBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPACKSSWBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPACKSSWBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPACKSSWBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPACKSSWBZ128rr $xmm16, $xmm1 $xmm16 = VPACKSSWBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPACKUSDWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPACKUSDWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPACKUSDWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPACKUSDWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPACKUSDWZ128rr $xmm16, $xmm1 $xmm16 = VPACKUSDWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPACKUSWBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPACKUSWBZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPACKUSWBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPACKUSWBZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPACKUSWBZ128rr $xmm16, $xmm1 $xmm16 = VPACKUSWBZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPUNPCKHBWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPUNPCKHBWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPUNPCKHBWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPUNPCKHBWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPUNPCKHBWZ128rr $xmm16, $xmm1 $xmm16 = VPUNPCKHBWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPUNPCKHDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPUNPCKHDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPUNPCKHDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPUNPCKHDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPUNPCKHDQZ128rr $xmm16, $xmm1 $xmm16 = VPUNPCKHDQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPUNPCKHQDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPUNPCKHQDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPUNPCKHQDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPUNPCKHQDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPUNPCKHQDQZ128rr $xmm16, $xmm1 $xmm16 = VPUNPCKHQDQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPUNPCKHWDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPUNPCKHWDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPUNPCKHWDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPUNPCKHWDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPUNPCKHWDZ128rr $xmm16, $xmm1 $xmm16 = VPUNPCKHWDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPUNPCKLBWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPUNPCKLBWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPUNPCKLBWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPUNPCKLBWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPUNPCKLBWZ128rr $xmm16, $xmm1 $xmm16 = VPUNPCKLBWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPUNPCKLDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPUNPCKLDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPUNPCKLDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPUNPCKLDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPUNPCKLDQZ128rr $xmm16, $xmm1 $xmm16 = VPUNPCKLDQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPUNPCKLQDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPUNPCKLQDQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPUNPCKLQDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPUNPCKLQDQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPUNPCKLQDQZ128rr $xmm16, $xmm1 $xmm16 = VPUNPCKLQDQZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VPUNPCKLWDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPUNPCKLWDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPUNPCKLWDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPUNPCKLWDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPUNPCKLWDZ128rr $xmm16, $xmm1 $xmm16 = VPUNPCKLWDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VUNPCKHPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VUNPCKHPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VUNPCKHPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VUNPCKHPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VUNPCKHPDZ128rr $xmm16, $xmm1 $xmm16 = VUNPCKHPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VUNPCKHPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VUNPCKHPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VUNPCKHPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VUNPCKHPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VUNPCKHPSZ128rr $xmm16, $xmm1 $xmm16 = VUNPCKHPSZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VUNPCKLPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VUNPCKLPDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VUNPCKLPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VUNPCKLPDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VUNPCKLPDZ128rr $xmm16, $xmm1 $xmm16 = VUNPCKLPDZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VUNPCKLPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VUNPCKLPSZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VUNPCKLPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VUNPCKLPSZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VUNPCKLPSZ128rr $xmm16, $xmm1 $xmm16 = VUNPCKLPSZ128rr $xmm16, $xmm1 ; CHECK: $xmm16 = VFMADD132PDZ128m $xmm16, $xmm16, $rsi, 1, $noreg, 0, $noreg, implicit $mxcsr @@ -3940,72 +3940,72 @@ body: | $xmm16 = VFNMSUB231PSZ128r $xmm16, $xmm1, $xmm2, implicit $mxcsr ; CHECK: $xmm16 = VPSLLDZ128ri $xmm16, 7 $xmm16 = VPSLLDZ128ri $xmm16, 7 - ; CHECK: $xmm16 = VPSLLDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSLLDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSLLDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSLLDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSLLDZ128rr $xmm16, $xmm16 $xmm16 = VPSLLDZ128rr $xmm16, $xmm16 ; CHECK: $xmm16 = VPSLLQZ128ri $xmm16, 7 $xmm16 = VPSLLQZ128ri $xmm16, 7 - ; CHECK: $xmm16 = VPSLLQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSLLQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSLLQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSLLQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSLLQZ128rr $xmm16, $xmm16 $xmm16 = VPSLLQZ128rr $xmm16, $xmm16 - ; CHECK: $xmm16 = VPSLLVDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSLLVDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSLLVDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSLLVDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSLLVDZ128rr $xmm16, $xmm16 $xmm16 = VPSLLVDZ128rr $xmm16, $xmm16 - ; CHECK: $xmm16 = VPSLLVQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSLLVQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSLLVQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSLLVQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSLLVQZ128rr $xmm16, $xmm16 $xmm16 = VPSLLVQZ128rr $xmm16, $xmm16 ; CHECK: $xmm16 = VPSLLWZ128ri $xmm16, 7 $xmm16 = VPSLLWZ128ri $xmm16, 7 - ; CHECK: $xmm16 = VPSLLWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSLLWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSLLWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSLLWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSLLWZ128rr $xmm16, $xmm16 $xmm16 = VPSLLWZ128rr $xmm16, $xmm16 ; CHECK: $xmm16 = VPSRADZ128ri $xmm16, 7 $xmm16 = VPSRADZ128ri $xmm16, 7 - ; CHECK: $xmm16 = VPSRADZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSRADZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSRADZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSRADZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSRADZ128rr $xmm16, $xmm16 $xmm16 = VPSRADZ128rr $xmm16, $xmm16 - ; CHECK: $xmm16 = VPSRAVDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSRAVDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSRAVDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSRAVDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSRAVDZ128rr $xmm16, $xmm16 $xmm16 = VPSRAVDZ128rr $xmm16, $xmm16 ; CHECK: $xmm16 = VPSRAWZ128ri $xmm16, 7 $xmm16 = VPSRAWZ128ri $xmm16, 7 - ; CHECK: $xmm16 = VPSRAWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSRAWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSRAWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSRAWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSRAWZ128rr $xmm16, $xmm16 $xmm16 = VPSRAWZ128rr $xmm16, $xmm16 ; CHECK: $xmm16 = VPSRLDQZ128ri $xmm16, 14 $xmm16 = VPSRLDQZ128ri $xmm16, 14 ; CHECK: $xmm16 = VPSRLDZ128ri $xmm16, 7 $xmm16 = VPSRLDZ128ri $xmm16, 7 - ; CHECK: $xmm16 = VPSRLDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSRLDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSRLDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSRLDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSRLDZ128rr $xmm16, $xmm16 $xmm16 = VPSRLDZ128rr $xmm16, $xmm16 ; CHECK: $xmm16 = VPSRLQZ128ri $xmm16, 7 $xmm16 = VPSRLQZ128ri $xmm16, 7 - ; CHECK: $xmm16 = VPSRLQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSRLQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSRLQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSRLQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSRLQZ128rr $xmm16, $xmm16 $xmm16 = VPSRLQZ128rr $xmm16, $xmm16 - ; CHECK: $xmm16 = VPSRLVDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSRLVDZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSRLVDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSRLVDZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSRLVDZ128rr $xmm16, $xmm16 $xmm16 = VPSRLVDZ128rr $xmm16, $xmm16 - ; CHECK: $xmm16 = VPSRLVQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSRLVQZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSRLVQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSRLVQZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSRLVQZ128rr $xmm16, $xmm16 $xmm16 = VPSRLVQZ128rr $xmm16, $xmm16 ; CHECK: $xmm16 = VPSRLWZ128ri $xmm16, 7 $xmm16 = VPSRLWZ128ri $xmm16, 7 - ; CHECK: $xmm16 = VPSRLWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSRLWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSRLWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSRLWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSRLWZ128rr $xmm16, $xmm16 $xmm16 = VPSRLWZ128rr $xmm16, $xmm16 ; CHECK: $xmm16 = VPERMILPDZ128mi $rdi, 1, $noreg, 0, $noreg, 9 @@ -4098,56 +4098,56 @@ body: | $xmm16 = VPSHUFLWZ128ri $xmm16, -24 ; CHECK: $xmm16 = VPSLLDQZ128ri $xmm16, 1 $xmm16 = VPSLLDQZ128ri $xmm16, 1 - ; CHECK: $xmm16 = VSHUFPDZ128rmi $xmm16, $rip, 1, $rax, 0, $noreg, -24 - $xmm16 = VSHUFPDZ128rmi $xmm16, $rip, 1, $rax, 0, $noreg, -24 + ; CHECK: $xmm16 = VSHUFPDZ128rmi $xmm16, $rip, 1, $noreg, 0, $noreg, -24 + $xmm16 = VSHUFPDZ128rmi $xmm16, $rip, 1, $noreg, 0, $noreg, -24 ; CHECK: $xmm16 = VSHUFPDZ128rri $xmm16, $xmm1, -24 $xmm16 = VSHUFPDZ128rri $xmm16, $xmm1, -24 - ; CHECK: $xmm16 = VSHUFPSZ128rmi $xmm16, $rip, 1, $rax, 0, $noreg, -24 - $xmm16 = VSHUFPSZ128rmi $xmm16, $rip, 1, $rax, 0, $noreg, -24 + ; CHECK: $xmm16 = VSHUFPSZ128rmi $xmm16, $rip, 1, $noreg, 0, $noreg, -24 + $xmm16 = VSHUFPSZ128rmi $xmm16, $rip, 1, $noreg, 0, $noreg, -24 ; CHECK: $xmm16 = VSHUFPSZ128rri $xmm16, $xmm1, -24 $xmm16 = VSHUFPSZ128rri $xmm16, $xmm1, -24 - ; CHECK: $xmm16 = VPSADBWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg - $xmm16 = VPSADBWZ128rm $xmm16, $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPSADBWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg + $xmm16 = VPSADBWZ128rm $xmm16, $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPSADBWZ128rr $xmm16, $xmm1 $xmm16 = VPSADBWZ128rr $xmm16, $xmm1 - ; CHECK: $xmm16 = VBROADCASTSSZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VBROADCASTSSZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VBROADCASTSSZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VBROADCASTSSZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VBROADCASTSSZ128rr $xmm16 $xmm16 = VBROADCASTSSZ128rr $xmm16 - ; CHECK: $xmm16 = VPBROADCASTBZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPBROADCASTBZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPBROADCASTBZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPBROADCASTBZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPBROADCASTBZ128rr $xmm16 $xmm16 = VPBROADCASTBZ128rr $xmm16 - ; CHECK: $xmm16 = VPBROADCASTDZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPBROADCASTDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPBROADCASTDZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPBROADCASTDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPBROADCASTDZ128rr $xmm16 $xmm16 = VPBROADCASTDZ128rr $xmm16 - ; CHECK: $xmm16 = VPBROADCASTQZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPBROADCASTQZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPBROADCASTQZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPBROADCASTQZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPBROADCASTQZ128rr $xmm16 $xmm16 = VPBROADCASTQZ128rr $xmm16 - ; CHECK: $xmm16 = VPBROADCASTWZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPBROADCASTWZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPBROADCASTWZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPBROADCASTWZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPBROADCASTWZ128rr $xmm16 $xmm16 = VPBROADCASTWZ128rr $xmm16 - ; CHECK: $xmm16 = VBROADCASTI32X2Z128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VBROADCASTI32X2Z128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VBROADCASTI32X2Z128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VBROADCASTI32X2Z128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VBROADCASTI32X2Z128rr $xmm0 $xmm16 = VBROADCASTI32X2Z128rr $xmm0 ; CHECK: $xmm16 = VCVTPS2PHZ128rr $xmm16, 2, implicit $mxcsr $xmm16 = VCVTPS2PHZ128rr $xmm16, 2, implicit $mxcsr ; CHECK: VCVTPS2PHZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16, 2, implicit $mxcsr VCVTPS2PHZ128mr $rdi, 1, $noreg, 0, $noreg, $xmm16, 2, implicit $mxcsr - ; CHECK: $xmm16 = VPABSBZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPABSBZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPABSBZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPABSBZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPABSBZ128rr $xmm16 $xmm16 = VPABSBZ128rr $xmm16 - ; CHECK: $xmm16 = VPABSDZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPABSDZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPABSDZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPABSDZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPABSDZ128rr $xmm16 $xmm16 = VPABSDZ128rr $xmm16 - ; CHECK: $xmm16 = VPABSWZ128rm $rip, 1, $rax, 0, $noreg - $xmm16 = VPABSWZ128rm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VPABSWZ128rm $rip, 1, $noreg, 0, $noreg + $xmm16 = VPABSWZ128rm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VPABSWZ128rr $xmm16 $xmm16 = VPABSWZ128rr $xmm16 ; CHECK: $xmm16 = VPALIGNRZ128rmi $xmm16, $rdi, 1, $noreg, 0, $noreg, 15 @@ -4162,24 +4162,24 @@ body: | $xmm16 = VINSERTPSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm16 = VINSERTPSZrr $xmm16, $xmm16, 1 $xmm16 = VINSERTPSZrr $xmm16, $xmm16, 1 - ; CHECK: $xmm16 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm16 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALEPDZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALEPDZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm16 = VRNDSCALEPDZ128rri $xmm16, 15, implicit $mxcsr $xmm16 = VRNDSCALEPDZ128rri $xmm16, 15, implicit $mxcsr - ; CHECK: $xmm16 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm16 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALEPSZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALEPSZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm16 = VRNDSCALEPSZ128rri $xmm16, 15, implicit $mxcsr $xmm16 = VRNDSCALEPSZ128rri $xmm16, 15, implicit $mxcsr - ; CHECK: $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr - $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr ; CHECK: $xmm0 = VRNDSCALEPDZ128rri $xmm0, 31, implicit $mxcsr $xmm0 = VRNDSCALEPDZ128rri $xmm0, 31, implicit $mxcsr - ; CHECK: $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr - $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr ; CHECK: $xmm0 = VRNDSCALEPSZ128rri $xmm0, 31, implicit $mxcsr $xmm0 = VRNDSCALEPSZ128rri $xmm0, 31, implicit $mxcsr - RET 0, $zmm0, $zmm1 + RETQ ... --- # CHECK-LABEL: name: evex_scalar_to_evex_test @@ -4188,114 +4188,114 @@ body: | name: evex_scalar_to_evex_test body: | bb.0: - ; CHECK: $xmm16 = VADDSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VADDSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VADDSDZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VADDSDZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VADDSDZrr_Int $xmm16, $xmm1, implicit $mxcsr $xmm16 = VADDSDZrr_Int $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VADDSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VADDSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VADDSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VADDSSZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VADDSSZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VADDSSZrr_Int $xmm16, $xmm1, implicit $mxcsr $xmm16 = VADDSSZrr_Int $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VDIVSDZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VDIVSDZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VDIVSDZrr_Int $xmm16, $xmm1, implicit $mxcsr $xmm16 = VDIVSDZrr_Int $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VDIVSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VDIVSSZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VDIVSSZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VDIVSSZrr_Int $xmm16, $xmm1, implicit $mxcsr $xmm16 = VDIVSSZrr_Int $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMAXCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMAXCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXCSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXCSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMAXCSDZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMAXCSDZrr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMAXCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMAXCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXCSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXCSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMAXCSSZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMAXCSSZrr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMAXSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMAXSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VMAXSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMAXSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMAXSDZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMAXSDZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VMAXSDZrr_Int $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMAXSDZrr_Int $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMAXSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMAXSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VMAXSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMAXSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMAXSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMAXSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMAXSSZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMAXSSZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VMAXSSZrr_Int $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMAXSSZrr_Int $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMINCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMINCSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINCSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMINCSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMINCSDZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMINCSDZrr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMINCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMINCSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINCSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMINCSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMINCSSZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMINCSSZrr $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMINSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMINSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VMINSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMINSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMINSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMINSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMINSDZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMINSDZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VMINSDZrr_Int $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMINSDZrr_Int $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMINSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMINSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VMINSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMINSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMINSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMINSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMINSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMINSSZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMINSSZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VMINSSZrr_Int $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMINSSZrr_Int $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMULSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMULSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMULSDZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMULSDZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VMULSDZrr_Int $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMULSDZrr_Int $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VMULSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMULSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VMULSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VMULSSZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMULSSZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VMULSSZrr_Int $xmm16, $xmm1, implicit $mxcsr $xmm16 = VMULSSZrr_Int $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSDZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSDZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VSUBSDZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VSUBSDZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VSUBSDZrr_Int $xmm16, $xmm1, implicit $mxcsr $xmm16 = VSUBSDZrr_Int $xmm16, $xmm1, implicit $mxcsr - ; CHECK: $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr - $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $rax, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSSZrm $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr + $xmm16 = VSUBSSZrm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VSUBSSZrr $xmm16, $xmm1, implicit $mxcsr $xmm16 = VSUBSSZrr $xmm16, $xmm1, implicit $mxcsr ; CHECK: $xmm16 = VSUBSSZrr_Int $xmm16, $xmm1, implicit $mxcsr @@ -4554,50 +4554,50 @@ body: | $xmm16 = VCVTSD2SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VCVTSD2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $xmm16 = VCVTSD2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VCVTSD2SSZrr $xmm16, $noreg, implicit $mxcsr - $xmm16 = VCVTSD2SSZrr $xmm16, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VCVTSD2SSZrr_Int $xmm16, $noreg, implicit $mxcsr - $xmm16 = VCVTSD2SSZrr_Int $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSD2SSZrr $xmm16, $xmm16, implicit $mxcsr + $xmm16 = VCVTSD2SSZrr $xmm16, $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSD2SSZrr_Int $xmm16, $xmm16, implicit $mxcsr + $xmm16 = VCVTSD2SSZrr_Int $xmm16, $xmm16, implicit $mxcsr ; CHECK: $xmm16 = VCVTSI2SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg $xmm16 = VCVTSI2SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VCVTSI2SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg $xmm16 = VCVTSI2SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg - ; CHECK: $xmm16 = VCVTSI2SDZrr $xmm16, $noreg - $xmm16 = VCVTSI2SDZrr $xmm16, $noreg - ; CHECK: $xmm16 = VCVTSI2SDZrr_Int $xmm16, $noreg - $xmm16 = VCVTSI2SDZrr_Int $xmm16, $noreg + ; CHECK: $xmm16 = VCVTSI2SDZrr $xmm16, $edi + $xmm16 = VCVTSI2SDZrr $xmm16, $edi + ; CHECK: $xmm16 = VCVTSI2SDZrr_Int $xmm16, $edi + $xmm16 = VCVTSI2SDZrr_Int $xmm16, $edi ; CHECK: $xmm16 = VCVTSI2SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $xmm16 = VCVTSI2SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VCVTSI2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $xmm16 = VCVTSI2SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VCVTSI2SSZrr $xmm16, $noreg, implicit $mxcsr - $xmm16 = VCVTSI2SSZrr $xmm16, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VCVTSI2SSZrr_Int $xmm16, $noreg, implicit $mxcsr - $xmm16 = VCVTSI2SSZrr_Int $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI2SSZrr $xmm16, $edi, implicit $mxcsr + $xmm16 = VCVTSI2SSZrr $xmm16, $edi, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI2SSZrr_Int $xmm16, $edi, implicit $mxcsr + $xmm16 = VCVTSI2SSZrr_Int $xmm16, $edi, implicit $mxcsr ; CHECK: $xmm16 = VCVTSI642SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $xmm16 = VCVTSI642SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VCVTSI642SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $xmm16 = VCVTSI642SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VCVTSI642SDZrr $xmm16, $noreg, implicit $mxcsr - $xmm16 = VCVTSI642SDZrr $xmm16, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VCVTSI642SDZrr_Int $xmm16, $noreg, implicit $mxcsr - $xmm16 = VCVTSI642SDZrr_Int $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SDZrr $xmm16, $rdi, implicit $mxcsr + $xmm16 = VCVTSI642SDZrr $xmm16, $rdi, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SDZrr_Int $xmm16, $rdi, implicit $mxcsr + $xmm16 = VCVTSI642SDZrr_Int $xmm16, $rdi, implicit $mxcsr ; CHECK: $xmm16 = VCVTSI642SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $xmm16 = VCVTSI642SSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VCVTSI642SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $xmm16 = VCVTSI642SSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VCVTSI642SSZrr $xmm16, $noreg, implicit $mxcsr - $xmm16 = VCVTSI642SSZrr $xmm16, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VCVTSI642SSZrr_Int $xmm16, $noreg, implicit $mxcsr - $xmm16 = VCVTSI642SSZrr_Int $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SSZrr $xmm16, $rdi, implicit $mxcsr + $xmm16 = VCVTSI642SSZrr $xmm16, $rdi, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSI642SSZrr_Int $xmm16, $rdi, implicit $mxcsr + $xmm16 = VCVTSI642SSZrr_Int $xmm16, $rdi, implicit $mxcsr ; CHECK: $xmm16 = VCVTSS2SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $xmm16 = VCVTSS2SDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $xmm16 = VCVTSS2SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $xmm16 = VCVTSS2SDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VCVTSS2SDZrr $xmm16, $noreg, implicit $mxcsr - $xmm16 = VCVTSS2SDZrr $xmm16, $noreg, implicit $mxcsr - ; CHECK: $xmm16 = VCVTSS2SDZrr_Int $xmm16, $noreg, implicit $mxcsr - $xmm16 = VCVTSS2SDZrr_Int $xmm16, $noreg, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSS2SDZrr $xmm16, $xmm16, implicit $mxcsr + $xmm16 = VCVTSS2SDZrr $xmm16, $xmm16, implicit $mxcsr + ; CHECK: $xmm16 = VCVTSS2SDZrr_Int $xmm16, $xmm16, implicit $mxcsr + $xmm16 = VCVTSS2SDZrr_Int $xmm16, $xmm16, implicit $mxcsr ; CHECK: $rdi = VCVTSS2SI64rm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr $rdi = VCVTSS2SI64Zrm_Int $rdi, 1, $noreg, 0, $noreg, implicit $mxcsr ; CHECK: $rdi = VCVTSS2SI64Zrr_Int $xmm16, implicit $mxcsr @@ -4644,10 +4644,10 @@ body: | $xmm16 = VMOVDI2SSZrr $eax ; CHECK: VMOVSDZmr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVSDZmr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVSDZrm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVSDZrm $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMOVSDZrm_alt $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVSDZrm_alt $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVSDZrm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVSDZrm $rip, 1, $noreg, 0, $noreg + ; CHECK: $xmm16 = VMOVSDZrm_alt $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVSDZrm_alt $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVSDZrr $xmm16, $xmm1 $xmm16 = VMOVSDZrr $xmm16, $xmm1 ; CHECK: $xmm16 = VMOVSDZrr_REV $xmm16, $xmm1 @@ -4656,10 +4656,10 @@ body: | $rax = VMOVSDto64Zrr $xmm16 ; CHECK: VMOVSSZmr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVSSZmr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVSSZrm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVSSZrm $rip, 1, $rax, 0, $noreg - ; CHECK: $xmm16 = VMOVSSZrm_alt $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVSSZrm_alt $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVSSZrm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVSSZrm $rip, 1, $noreg, 0, $noreg + ; CHECK: $xmm16 = VMOVSSZrm_alt $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVSSZrm_alt $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVSSZrr $xmm16, $xmm1 $xmm16 = VMOVSSZrr $xmm16, $xmm1 ; CHECK: $xmm16 = VMOVSSZrr_REV $xmm16, $xmm1 @@ -4672,8 +4672,8 @@ body: | $xmm16 = VMOV64toPQIZrm $rdi, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOV64toSDZrr $rdi $xmm16 = VMOV64toSDZrr $rdi - ; CHECK: $xmm16 = VMOVDI2PDIZrm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVDI2PDIZrm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVDI2PDIZrm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVDI2PDIZrm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVDI2PDIZrr $edi $xmm16 = VMOVDI2PDIZrr $edi ; CHECK: $xmm16 = VMOVLHPSZrr $xmm16, $xmm1 @@ -4692,8 +4692,8 @@ body: | $rdi = VMOVPQIto64Zrr $xmm16 ; CHECK: VMOVPQIto64Zmr $rdi, 1, $noreg, 0, $noreg, $xmm16 VMOVPQIto64Zmr $rdi, 1, $noreg, 0, $noreg, $xmm16 - ; CHECK: $xmm16 = VMOVQI2PQIZrm $rip, 1, $rax, 0, $noreg - $xmm16 = VMOVQI2PQIZrm $rip, 1, $rax, 0, $noreg + ; CHECK: $xmm16 = VMOVQI2PQIZrm $rip, 1, $noreg, 0, $noreg + $xmm16 = VMOVQI2PQIZrm $rip, 1, $noreg, 0, $noreg ; CHECK: $xmm16 = VMOVZPQILo2PQIZrr $xmm16 $xmm16 = VMOVZPQILo2PQIZrr $xmm16 ; CHECK: VCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr @@ -4728,38 +4728,38 @@ body: | VUCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr ; CHECK: VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr - ; CHECK: $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm16 = VRNDSCALESDZr $xmm16, $xmm1, 15, implicit $mxcsr $xmm16 = VRNDSCALESDZr $xmm16, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm16 = VRNDSCALESSZm $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm16 = VRNDSCALESSZm $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALESSZm $xmm16, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALESSZm $xmm16, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm16 = VRNDSCALESSZr $xmm16, $xmm1, 15, implicit $mxcsr $xmm16 = VRNDSCALESSZr $xmm16, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm16 = VRNDSCALESDZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm16 = VRNDSCALESDZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALESDZm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALESDZm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm16 = VRNDSCALESDZr_Int $xmm16, $xmm1, 15, implicit $mxcsr $xmm16 = VRNDSCALESDZr_Int $xmm16, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm16 = VRNDSCALESSZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr - $xmm16 = VRNDSCALESSZm_Int $xmm16, $rip, 1, $rax, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm16 = VRNDSCALESSZm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + $xmm16 = VRNDSCALESSZm_Int $xmm16, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr ; CHECK: $xmm16 = VRNDSCALESSZr_Int $xmm16, $xmm1, 15, implicit $mxcsr $xmm16 = VRNDSCALESSZr_Int $xmm16, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr - $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr ; CHECK: $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 31, implicit $mxcsr $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 31, implicit $mxcsr - ; CHECK: $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr - $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr ; CHECK: $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 31, implicit $mxcsr $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 31, implicit $mxcsr - ; CHECK: $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr - $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr ; CHECK: $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 31, implicit $mxcsr $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 31, implicit $mxcsr - ; CHECK: $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr - $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $rax, 0, $noreg, 31, implicit $mxcsr + ; CHECK: $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr + $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 31, implicit $mxcsr ; CHECK: $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 31, implicit $mxcsr $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 31, implicit $mxcsr - RET 0, $zmm0, $zmm1 + RETQ ... diff --git a/llvm/test/CodeGen/X86/global-fill.ll b/llvm/test/CodeGen/X86/global-fill.ll index 656c8ca2c323a..41493b2d917fb 100644 --- a/llvm/test/CodeGen/X86/global-fill.ll +++ b/llvm/test/CodeGen/X86/global-fill.ll @@ -2,8 +2,12 @@ @test1 = global [2 x i24] [i24 -1, i24 -1] ; CHECK-LABEL: test1: -; CHECK-NEXT: .long 16777215 -; CHECK-NEXT: .long 16777215 +; CHECK-NEXT: .short 65535 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .space 1 +; CHECK-NEXT: .short 65535 +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .space 1 @test2 = global [2 x i7] [i7 1, i7 1] ; CHECK-LABEL: test2: diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll index 7867b5306bf70..987da0f68082c 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll @@ -1118,14 +1118,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) { ; ; X86-AVX1-LABEL: test_reduce_v8i64: ; X86-AVX1: ## %bb.0: +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 +; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5 +; X86-AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm1, %xmm0 +; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 -; X86-AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 ; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1 -; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1236,14 +1236,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) { ; ; X64-AVX1-LABEL: test_reduce_v8i64: ; X64-AVX1: ## %bb.0: +; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 +; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5 +; X64-AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm1, %xmm0 +; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 -; X64-AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 ; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1 -; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/inline-asm-bad-modifier.ll b/llvm/test/CodeGen/X86/inline-asm-bad-modifier.ll index 819b9fcfa7946..9f6e84f2f89c6 100644 --- a/llvm/test/CodeGen/X86/inline-asm-bad-modifier.ll +++ b/llvm/test/CodeGen/X86/inline-asm-bad-modifier.ll @@ -1,8 +1,8 @@ ; RUN: not llc -mtriple=x86_64-- < %s 2>&1 | FileCheck %s -;CHECK: error: invalid operand in inline asm: 'vmovd ${1:x}, $0' +;CHECK: error: invalid operand in inline asm: 'vmovd ${1:k}, $0' define i32 @foo() { entry: - %0 = tail call i32 asm sideeffect "vmovd ${1:x}, $0", "=r,x,~{dirflag},~{fpsr},~{flags}"(<2 x i64> ) + %0 = tail call i32 asm sideeffect "vmovd ${1:k}, $0", "=r,x,~{dirflag},~{fpsr},~{flags}"(<2 x i64> ) ret i32 %0 } diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll index 1d0a631280fc3..8eb242d7c1fb0 100644 --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -55,8 +55,8 @@ define <4 x float> @signbits_sext_v4i64_sitofp_v4f32(i8 signext %a0, i16 signext ret <4 x float> %9 } -define <4 x double> @signbits_ashr_sitofp(<4 x i64> %a0) nounwind { -; X86-LABEL: signbits_ashr_sitofp: +define <4 x double> @signbits_ashr_sitofp_0(<4 x i64> %a0) nounwind { +; X86-LABEL: signbits_ashr_sitofp_0: ; X86: # %bb.0: ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-NEXT: vpsrlq $36, %xmm1, %xmm2 @@ -75,7 +75,7 @@ define <4 x double> @signbits_ashr_sitofp(<4 x i64> %a0) nounwind { ; X86-NEXT: vcvtdq2pd %xmm0, %ymm0 ; X86-NEXT: retl ; -; X64-AVX1-LABEL: signbits_ashr_sitofp: +; X64-AVX1-LABEL: signbits_ashr_sitofp_0: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-AVX1-NEXT: vpsrlq $36, %xmm1, %xmm2 @@ -94,7 +94,7 @@ define <4 x double> @signbits_ashr_sitofp(<4 x i64> %a0) nounwind { ; X64-AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 ; X64-AVX1-NEXT: retq ; -; X64-AVX2-LABEL: signbits_ashr_sitofp: +; X64-AVX2-LABEL: signbits_ashr_sitofp_0: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1073741824,536870912,268435456,134217728] @@ -109,6 +109,41 @@ define <4 x double> @signbits_ashr_sitofp(<4 x i64> %a0) nounwind { ret <4 x double> %2 } +; PR45794 +define <4 x float> @signbits_ashr_sitofp_1(<4 x i64> %a0) nounwind { +; X86-LABEL: signbits_ashr_sitofp_1: +; X86: # %bb.0: +; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X86-NEXT: vpsrad $16, %xmm1, %xmm1 +; X86-NEXT: vpsrad $16, %xmm0, %xmm0 +; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 +; X86-NEXT: vzeroupper +; X86-NEXT: retl +; +; X64-AVX1-LABEL: signbits_ashr_sitofp_1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X64-AVX1-NEXT: vpsrad $16, %xmm1, %xmm1 +; X64-AVX1-NEXT: vpsrad $16, %xmm0, %xmm0 +; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; X64-AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 +; X64-AVX1-NEXT: vzeroupper +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: signbits_ashr_sitofp_1: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpsrad $16, %ymm0, %ymm0 +; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; X64-AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq + %1 = ashr <4 x i64> %a0, + %2 = sitofp <4 x i64> %1 to <4 x float> + ret <4 x float> %2 +} + define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind { ; X86-LABEL: signbits_ashr_extract_sitofp_0: ; X86: # %bb.0: diff --git a/llvm/test/CodeGen/X86/load-local-v3i1.ll b/llvm/test/CodeGen/X86/load-local-v3i1.ll index da432cb1ab34d..f471c637a5923 100644 --- a/llvm/test/CodeGen/X86/load-local-v3i1.ll +++ b/llvm/test/CodeGen/X86/load-local-v3i1.ll @@ -98,20 +98,23 @@ define void @local_load_v3i1(i32 addrspace(1)* %out, i32 addrspace(1)* %in, <3 x ; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: movb (%rdx), %al ; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: shrb $2, %cl -; CHECK-NEXT: movzbl %al, %r15d -; CHECK-NEXT: shrb %al -; CHECK-NEXT: movzbl %al, %ebx -; CHECK-NEXT: movzbl %cl, %ebp +; CHECK-NEXT: shrb %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: shrb $2, %dl +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %ebp +; CHECK-NEXT: movzbl %dl, %r15d +; CHECK-NEXT: movzbl %cl, %ebx ; CHECK-NEXT: movq %rsi, %rdi -; CHECK-NEXT: movl %r15d, %esi +; CHECK-NEXT: movl %ebp, %esi ; CHECK-NEXT: movl %ebx, %edx -; CHECK-NEXT: movl %ebp, %ecx +; CHECK-NEXT: movl %r15d, %ecx ; CHECK-NEXT: callq masked_load_v3 ; CHECK-NEXT: movq %r14, %rdi -; CHECK-NEXT: movl %r15d, %esi +; CHECK-NEXT: movl %ebp, %esi ; CHECK-NEXT: movl %ebx, %edx -; CHECK-NEXT: movl %ebp, %ecx +; CHECK-NEXT: movl %r15d, %ecx ; CHECK-NEXT: callq masked_store4_v3 ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/load-local-v4i5.ll b/llvm/test/CodeGen/X86/load-local-v4i5.ll new file mode 100644 index 0000000000000..cb382a59436e7 --- /dev/null +++ b/llvm/test/CodeGen/X86/load-local-v4i5.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +@0 = internal unnamed_addr constant [4 x i5] [i5 2, i5 0, i5 2, i5 -1], align 1 + +; Function Attrs: nobuiltin nounwind +define void @_start() { +; CHECK-LABEL: _start: +; CHECK: # %bb.0: # %Entry +; CHECK-NEXT: movl {{.*}}(%rip), %eax +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; CHECK-NEXT: andl $31, %eax +; CHECK-NEXT: andl $31, %esi +; CHECK-NEXT: shll $5, %esi +; CHECK-NEXT: orl %eax, %esi +; CHECK-NEXT: andl $31, %edx +; CHECK-NEXT: shll $10, %edx +; CHECK-NEXT: orl %esi, %edx +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shll $15, %ecx +; CHECK-NEXT: orl %edx, %ecx +; CHECK-NEXT: movw %cx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: shrl $16, %ecx +; CHECK-NEXT: andl $15, %ecx +; CHECK-NEXT: movb %cl, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: cmpb $31, %al +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %Then +; CHECK-NEXT: int3 +; CHECK-NEXT: .LBB0_2: # %EndIf +; CHECK-NEXT: retq +Entry: + %x = alloca [4 x i5], align 1 + %y = alloca <4 x i5>, align 4 + %z = alloca i5, align 1 + %0 = bitcast [4 x i5]* %x to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 bitcast ([4 x i5]* @0 to i8*), i64 4, i1 false) + %1 = getelementptr inbounds [4 x i5], [4 x i5]* %x, i64 0, i64 0 + %2 = load i5, i5* %1 + %3 = insertelement <4 x i5> undef, i5 %2, i32 0 + %4 = getelementptr inbounds [4 x i5], [4 x i5]* %x, i64 0, i64 1 + %5 = load i5, i5* %4 + %6 = insertelement <4 x i5> %3, i5 %5, i32 1 + %7 = getelementptr inbounds [4 x i5], [4 x i5]* %x, i64 0, i64 2 + %8 = load i5, i5* %7 + %9 = insertelement <4 x i5> %6, i5 %8, i32 2 + %10 = getelementptr inbounds [4 x i5], [4 x i5]* %x, i64 0, i64 3 + %11 = load i5, i5* %10 + %12 = insertelement <4 x i5> %9, i5 %11, i32 3 + store <4 x i5> %12, <4 x i5>* %y, align 4 + %13 = load <4 x i5>, <4 x i5>* %y + %14 = extractelement <4 x i5> %13, i32 3 + store i5 %14, i5* %z, align 1 + %15 = load i5, i5* %z, align 1 + %16 = icmp ne i5 %15, -1 + br i1 %16, label %Then, label %Else + +Then: ; preds = %Entry + call void @llvm.debugtrap() + br label %EndIf + +Else: ; preds = %Entry + br label %EndIf + +EndIf: ; preds = %Else, %Then + ret void +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) + +; Function Attrs: nounwind +declare void @llvm.debugtrap() diff --git a/llvm/test/CodeGen/X86/masked_compressstore.ll b/llvm/test/CodeGen/X86/masked_compressstore.ll index c275b4c9a20c2..24ae0c77af2fe 100644 --- a/llvm/test/CodeGen/X86/masked_compressstore.ll +++ b/llvm/test/CodeGen/X86/masked_compressstore.ll @@ -15,7 +15,7 @@ define void @compressstore_v8f64_v8i1(double* %base, <8 x double> %V, <8 x i1> % ; SSE-LABEL: compressstore_v8f64_v8i1: ; SSE: ## %bb.0: ; SSE-NEXT: psllw $15, %xmm4 -; SSE-NEXT: packsswb %xmm0, %xmm4 +; SSE-NEXT: packsswb %xmm4, %xmm4 ; SSE-NEXT: pmovmskb %xmm4, %eax ; SSE-NEXT: testb $1, %al ; SSE-NEXT: jne LBB0_1 @@ -84,7 +84,7 @@ define void @compressstore_v8f64_v8i1(double* %base, <8 x double> %V, <8 x i1> % ; AVX1-LABEL: compressstore_v8f64_v8i1: ; AVX1: ## %bb.0: ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2 -; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vpacksswb %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpmovmskb %xmm2, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB0_2 @@ -152,7 +152,7 @@ define void @compressstore_v8f64_v8i1(double* %base, <8 x double> %V, <8 x i1> % ; AVX2-LABEL: compressstore_v8f64_v8i1: ; AVX2: ## %bb.0: ; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2 -; AVX2-NEXT: vpacksswb %xmm0, %xmm2, %xmm2 +; AVX2-NEXT: vpacksswb %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpmovmskb %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB0_2 @@ -845,7 +845,7 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma ; SSE2-LABEL: compressstore_v8f32_v8i1: ; SSE2: ## %bb.0: ; SSE2-NEXT: psllw $15, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne LBB4_1 @@ -924,7 +924,7 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma ; SSE42-LABEL: compressstore_v8f32_v8i1: ; SSE42: ## %bb.0: ; SSE42-NEXT: psllw $15, %xmm2 -; SSE42-NEXT: packsswb %xmm0, %xmm2 +; SSE42-NEXT: packsswb %xmm2, %xmm2 ; SSE42-NEXT: pmovmskb %xmm2, %eax ; SSE42-NEXT: testb $1, %al ; SSE42-NEXT: jne LBB4_1 @@ -993,7 +993,7 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma ; AVX1-LABEL: compressstore_v8f32_v8i1: ; AVX1: ## %bb.0: ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1 -; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpmovmskb %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: jne LBB4_1 @@ -1064,7 +1064,7 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma ; AVX2-LABEL: compressstore_v8f32_v8i1: ; AVX2: ## %bb.0: ; AVX2-NEXT: vpsllw $15, %xmm1, %xmm1 -; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpmovmskb %xmm1, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: jne LBB4_1 @@ -2729,7 +2729,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask) ; SSE2-LABEL: compressstore_v8i64_v8i1: ; SSE2: ## %bb.0: ; SSE2-NEXT: psllw $15, %xmm4 -; SSE2-NEXT: packsswb %xmm0, %xmm4 +; SSE2-NEXT: packsswb %xmm4, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne LBB9_1 @@ -2802,7 +2802,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask) ; SSE42-LABEL: compressstore_v8i64_v8i1: ; SSE42: ## %bb.0: ; SSE42-NEXT: psllw $15, %xmm4 -; SSE42-NEXT: packsswb %xmm0, %xmm4 +; SSE42-NEXT: packsswb %xmm4, %xmm4 ; SSE42-NEXT: pmovmskb %xmm4, %eax ; SSE42-NEXT: testb $1, %al ; SSE42-NEXT: jne LBB9_1 @@ -2871,7 +2871,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask) ; AVX1-LABEL: compressstore_v8i64_v8i1: ; AVX1: ## %bb.0: ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2 -; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vpacksswb %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpmovmskb %xmm2, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB9_2 @@ -2939,7 +2939,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask) ; AVX2-LABEL: compressstore_v8i64_v8i1: ; AVX2: ## %bb.0: ; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2 -; AVX2-NEXT: vpacksswb %xmm0, %xmm2, %xmm2 +; AVX2-NEXT: vpacksswb %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpmovmskb %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB9_2 @@ -3182,7 +3182,7 @@ define void @compressstore_v8i16_v8i16(i16* %base, <8 x i16> %V, <8 x i16> %trig ; SSE2: ## %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqw %xmm1, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne LBB11_1 @@ -3260,7 +3260,7 @@ define void @compressstore_v8i16_v8i16(i16* %base, <8 x i16> %V, <8 x i16> %trig ; SSE42: ## %bb.0: ; SSE42-NEXT: pxor %xmm2, %xmm2 ; SSE42-NEXT: pcmpeqw %xmm1, %xmm2 -; SSE42-NEXT: packsswb %xmm0, %xmm2 +; SSE42-NEXT: packsswb %xmm2, %xmm2 ; SSE42-NEXT: pmovmskb %xmm2, %eax ; SSE42-NEXT: testb $1, %al ; SSE42-NEXT: jne LBB11_1 @@ -3330,7 +3330,7 @@ define void @compressstore_v8i16_v8i16(i16* %base, <8 x i16> %V, <8 x i16> %trig ; AVX1OR2: ## %bb.0: ; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1OR2-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 -; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX1OR2-NEXT: vpmovmskb %xmm1, %eax ; AVX1OR2-NEXT: testb $1, %al ; AVX1OR2-NEXT: jne LBB11_1 diff --git a/llvm/test/CodeGen/X86/masked_expandload.ll b/llvm/test/CodeGen/X86/masked_expandload.ll index 12cfb932a3c08..a66f5a3c2aa9a 100644 --- a/llvm/test/CodeGen/X86/masked_expandload.ll +++ b/llvm/test/CodeGen/X86/masked_expandload.ll @@ -296,7 +296,7 @@ define <8 x double> @expandload_v8f64_v8i1(double* %base, <8 x double> %src0, <8 ; SSE-LABEL: expandload_v8f64_v8i1: ; SSE: ## %bb.0: ; SSE-NEXT: psllw $15, %xmm4 -; SSE-NEXT: packsswb %xmm0, %xmm4 +; SSE-NEXT: packsswb %xmm4, %xmm4 ; SSE-NEXT: pmovmskb %xmm4, %eax ; SSE-NEXT: testb $1, %al ; SSE-NEXT: jne LBB2_1 @@ -365,7 +365,7 @@ define <8 x double> @expandload_v8f64_v8i1(double* %base, <8 x double> %src0, <8 ; AVX1-LABEL: expandload_v8f64_v8i1: ; AVX1: ## %bb.0: ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2 -; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vpacksswb %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpmovmskb %xmm2, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: jne LBB2_1 @@ -446,7 +446,7 @@ define <8 x double> @expandload_v8f64_v8i1(double* %base, <8 x double> %src0, <8 ; AVX2-LABEL: expandload_v8f64_v8i1: ; AVX2: ## %bb.0: ; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2 -; AVX2-NEXT: vpacksswb %xmm0, %xmm2, %xmm2 +; AVX2-NEXT: vpacksswb %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpmovmskb %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: jne LBB2_1 @@ -1311,21 +1311,17 @@ define <16 x float> @expandload_v16f32_const(float* %base, <16 x float> %src0) { ; ; AVX1OR2-LABEL: expandload_v16f32_const: ; AVX1OR2: ## %bb.0: -; AVX1OR2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7] -; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] -; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] -; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; AVX1OR2-NEXT: vmovups (%rdi), %xmm0 ; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] ; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1OR2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX1OR2-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3,4,5,6,7] -; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm1[0],mem[0],xmm1[2,3] +; AVX1OR2-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3] +; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] -; AVX1OR2-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1OR2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; AVX1OR2-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1OR2-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3] ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] @@ -2954,7 +2950,7 @@ define <8 x i16> @expandload_v8i16_v8i16(i16* %base, <8 x i16> %src0, <8 x i16> ; SSE: ## %bb.0: ; SSE-NEXT: pxor %xmm2, %xmm2 ; SSE-NEXT: pcmpeqw %xmm1, %xmm2 -; SSE-NEXT: packsswb %xmm0, %xmm2 +; SSE-NEXT: packsswb %xmm2, %xmm2 ; SSE-NEXT: pmovmskb %xmm2, %eax ; SSE-NEXT: testb $1, %al ; SSE-NEXT: jne LBB11_1 @@ -3024,7 +3020,7 @@ define <8 x i16> @expandload_v8i16_v8i16(i16* %base, <8 x i16> %src0, <8 x i16> ; AVX1OR2: ## %bb.0: ; AVX1OR2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1OR2-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 -; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX1OR2-NEXT: vpmovmskb %xmm1, %eax ; AVX1OR2-NEXT: testb $1, %al ; AVX1OR2-NEXT: jne LBB11_1 diff --git a/llvm/test/CodeGen/X86/masked_gather.ll b/llvm/test/CodeGen/X86/masked_gather.ll index eb214368056c9..dbd95213a60de 100644 --- a/llvm/test/CodeGen/X86/masked_gather.ll +++ b/llvm/test/CodeGen/X86/masked_gather.ll @@ -1099,7 +1099,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) { ; SSE-NEXT: pcmpeqd %xmm5, %xmm6 ; SSE-NEXT: pcmpeqd %xmm3, %xmm5 ; SSE-NEXT: packssdw %xmm5, %xmm6 -; SSE-NEXT: packsswb %xmm0, %xmm6 +; SSE-NEXT: packsswb %xmm6, %xmm6 ; SSE-NEXT: pmovmskb %xmm6, %eax ; SSE-NEXT: testb $1, %al ; SSE-NEXT: je .LBB4_19 @@ -1168,7 +1168,7 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) { ; SSE-NEXT: pcmpeqd %xmm7, %xmm2 ; SSE-NEXT: pcmpeqd %xmm7, %xmm3 ; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: packsswb %xmm0, %xmm2 +; SSE-NEXT: packsswb %xmm2, %xmm2 ; SSE-NEXT: pmovmskb %xmm2, %eax ; SSE-NEXT: testb $1, %al ; SSE-NEXT: je .LBB4_37 diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll index e72840965b76f..a21dc8bbf0585 100644 --- a/llvm/test/CodeGen/X86/masked_load.ll +++ b/llvm/test/CodeGen/X86/masked_load.ll @@ -393,7 +393,7 @@ define <8 x double> @load_v8f64_v8i16(<8 x i16> %trigger, <8 x double>* %addr, < ; SSE: ## %bb.0: ; SSE-NEXT: pxor %xmm5, %xmm5 ; SSE-NEXT: pcmpeqw %xmm0, %xmm5 -; SSE-NEXT: packsswb %xmm0, %xmm5 +; SSE-NEXT: packsswb %xmm5, %xmm5 ; SSE-NEXT: pmovmskb %xmm5, %eax ; SSE-NEXT: testb $1, %al ; SSE-NEXT: jne LBB5_1 @@ -541,7 +541,7 @@ define <8 x double> @load_v8f64_v8i64(<8 x i64> %trigger, <8 x double>* %addr, < ; SSE2-NEXT: pand %xmm0, %xmm1 ; SSE2-NEXT: packssdw %xmm2, %xmm1 ; SSE2-NEXT: packssdw %xmm3, %xmm1 -; SSE2-NEXT: packsswb %xmm0, %xmm1 +; SSE2-NEXT: packsswb %xmm1, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne LBB6_1 @@ -1645,7 +1645,7 @@ define <8 x i64> @load_v8i64_v8i16(<8 x i16> %trigger, <8 x i64>* %addr, <8 x i6 ; SSE2: ## %bb.0: ; SSE2-NEXT: pxor %xmm5, %xmm5 ; SSE2-NEXT: pcmpeqw %xmm0, %xmm5 -; SSE2-NEXT: packsswb %xmm0, %xmm5 +; SSE2-NEXT: packsswb %xmm5, %xmm5 ; SSE2-NEXT: pmovmskb %xmm5, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne LBB15_1 @@ -1716,7 +1716,7 @@ define <8 x i64> @load_v8i64_v8i16(<8 x i16> %trigger, <8 x i64>* %addr, <8 x i6 ; SSE42: ## %bb.0: ; SSE42-NEXT: pxor %xmm5, %xmm5 ; SSE42-NEXT: pcmpeqw %xmm0, %xmm5 -; SSE42-NEXT: packsswb %xmm0, %xmm5 +; SSE42-NEXT: packsswb %xmm5, %xmm5 ; SSE42-NEXT: pmovmskb %xmm5, %eax ; SSE42-NEXT: testb $1, %al ; SSE42-NEXT: jne LBB15_1 @@ -1864,7 +1864,7 @@ define <8 x i64> @load_v8i64_v8i64(<8 x i64> %trigger, <8 x i64>* %addr, <8 x i6 ; SSE2-NEXT: pand %xmm0, %xmm1 ; SSE2-NEXT: packssdw %xmm2, %xmm1 ; SSE2-NEXT: packssdw %xmm3, %xmm1 -; SSE2-NEXT: packsswb %xmm0, %xmm1 +; SSE2-NEXT: packsswb %xmm1, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne LBB16_1 diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll index dc780060ae917..29678e8495c22 100644 --- a/llvm/test/CodeGen/X86/masked_store.ll +++ b/llvm/test/CodeGen/X86/masked_store.ll @@ -353,7 +353,7 @@ define void @store_v8f32_v8i32(<8 x float> %x, <8 x float>* %ptr, <8 x float> %y ; SSE2-LABEL: store_v8f32_v8i32: ; SSE2: ## %bb.0: ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: packsswb %xmm0, %xmm4 +; SSE2-NEXT: packsswb %xmm4, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne LBB5_1 @@ -425,7 +425,7 @@ define void @store_v8f32_v8i32(<8 x float> %x, <8 x float>* %ptr, <8 x float> %y ; SSE4-LABEL: store_v8f32_v8i32: ; SSE4: ## %bb.0: ; SSE4-NEXT: packssdw %xmm5, %xmm4 -; SSE4-NEXT: packsswb %xmm0, %xmm4 +; SSE4-NEXT: packsswb %xmm4, %xmm4 ; SSE4-NEXT: pmovmskb %xmm4, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne LBB5_1 @@ -1423,7 +1423,7 @@ define void @store_v8i16_v8i16(<8 x i16> %trigger, <8 x i16>* %addr, <8 x i16> % ; SSE2: ## %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqw %xmm0, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne LBB13_1 @@ -1494,7 +1494,7 @@ define void @store_v8i16_v8i16(<8 x i16> %trigger, <8 x i16>* %addr, <8 x i16> % ; SSE4: ## %bb.0: ; SSE4-NEXT: pxor %xmm2, %xmm2 ; SSE4-NEXT: pcmpeqw %xmm0, %xmm2 -; SSE4-NEXT: packsswb %xmm0, %xmm2 +; SSE4-NEXT: packsswb %xmm2, %xmm2 ; SSE4-NEXT: pmovmskb %xmm2, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne LBB13_1 diff --git a/llvm/test/CodeGen/X86/masked_store_trunc.ll b/llvm/test/CodeGen/X86/masked_store_trunc.ll index fbf1c6a422442..494db752ca525 100644 --- a/llvm/test/CodeGen/X86/masked_store_trunc.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc.ll @@ -19,7 +19,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask ; SSE2-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE2-NEXT: pxor %xmm1, %xmm4 ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: packsswb %xmm0, %xmm4 +; SSE2-NEXT: packsswb %xmm4, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne .LBB0_1 @@ -94,7 +94,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask ; SSE4-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE4-NEXT: pxor %xmm1, %xmm4 ; SSE4-NEXT: packssdw %xmm5, %xmm4 -; SSE4-NEXT: packsswb %xmm0, %xmm4 +; SSE4-NEXT: packsswb %xmm4, %xmm4 ; SSE4-NEXT: pmovmskb %xmm4, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB0_1 @@ -232,7 +232,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE2-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE2-NEXT: pxor %xmm1, %xmm4 ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: packsswb %xmm0, %xmm4 +; SSE2-NEXT: packsswb %xmm4, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne .LBB1_1 @@ -315,7 +315,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE4-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE4-NEXT: pxor %xmm1, %xmm4 ; SSE4-NEXT: packssdw %xmm5, %xmm4 -; SSE4-NEXT: packsswb %xmm0, %xmm4 +; SSE4-NEXT: packsswb %xmm4, %xmm4 ; SSE4-NEXT: pmovmskb %xmm4, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB1_1 @@ -624,7 +624,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE2-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE2-NEXT: pxor %xmm1, %xmm4 ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: packsswb %xmm0, %xmm4 +; SSE2-NEXT: packsswb %xmm4, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: movd %xmm0, %ecx @@ -702,7 +702,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE4-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE4-NEXT: pxor %xmm1, %xmm4 ; SSE4-NEXT: packssdw %xmm5, %xmm4 -; SSE4-NEXT: packsswb %xmm0, %xmm4 +; SSE4-NEXT: packsswb %xmm4, %xmm4 ; SSE4-NEXT: pmovmskb %xmm4, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB2_1 @@ -3331,7 +3331,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 ; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne .LBB11_1 @@ -3410,7 +3410,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE4-NEXT: pcmpeqd %xmm4, %xmm2 ; SSE4-NEXT: pxor %xmm1, %xmm2 ; SSE4-NEXT: packssdw %xmm3, %xmm2 -; SSE4-NEXT: packsswb %xmm0, %xmm2 +; SSE4-NEXT: packsswb %xmm2, %xmm2 ; SSE4-NEXT: pmovmskb %xmm2, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB11_1 @@ -3714,7 +3714,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 ; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: movd %xmm0, %ecx @@ -3787,7 +3787,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE4-NEXT: pcmpeqd %xmm4, %xmm2 ; SSE4-NEXT: pxor %xmm1, %xmm2 ; SSE4-NEXT: packssdw %xmm3, %xmm2 -; SSE4-NEXT: packsswb %xmm0, %xmm2 +; SSE4-NEXT: packsswb %xmm2, %xmm2 ; SSE4-NEXT: pmovmskb %xmm2, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB12_1 @@ -6269,7 +6269,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) ; SSE2-NEXT: pcmpeqw %xmm1, %xmm2 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: packsswb %xmm0, %xmm1 +; SSE2-NEXT: packsswb %xmm1, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: movd %xmm0, %ecx @@ -6336,7 +6336,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) ; SSE4-NEXT: pcmpeqw %xmm1, %xmm2 ; SSE4-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE4-NEXT: pxor %xmm2, %xmm1 -; SSE4-NEXT: packsswb %xmm0, %xmm1 +; SSE4-NEXT: packsswb %xmm1, %xmm1 ; SSE4-NEXT: pmovmskb %xmm1, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB17_1 @@ -6402,7 +6402,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) ; AVX-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpmovmskb %xmm1, %eax ; AVX-NEXT: testb $1, %al ; AVX-NEXT: jne .LBB17_1 diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll index f6fe9359d206c..51a9d923c247c 100644 --- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll @@ -119,7 +119,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask ; SSE2-NEXT: pcmpeqd %xmm8, %xmm4 ; SSE2-NEXT: pxor %xmm7, %xmm4 ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: packsswb %xmm0, %xmm4 +; SSE2-NEXT: packsswb %xmm4, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: je .LBB0_2 @@ -228,7 +228,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask ; SSE4-NEXT: pcmpeqd %xmm8, %xmm4 ; SSE4-NEXT: pxor %xmm0, %xmm4 ; SSE4-NEXT: packssdw %xmm5, %xmm4 -; SSE4-NEXT: packsswb %xmm0, %xmm4 +; SSE4-NEXT: packsswb %xmm4, %xmm4 ; SSE4-NEXT: pmovmskb %xmm4, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB0_1 @@ -506,7 +506,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE2-NEXT: pcmpeqd %xmm8, %xmm4 ; SSE2-NEXT: pxor %xmm1, %xmm4 ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: packsswb %xmm0, %xmm4 +; SSE2-NEXT: packsswb %xmm4, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne .LBB1_1 @@ -618,7 +618,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE4-NEXT: pcmpeqd %xmm8, %xmm4 ; SSE4-NEXT: pxor %xmm0, %xmm4 ; SSE4-NEXT: packssdw %xmm5, %xmm4 -; SSE4-NEXT: packsswb %xmm0, %xmm4 +; SSE4-NEXT: packsswb %xmm4, %xmm4 ; SSE4-NEXT: pmovmskb %xmm4, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB1_1 @@ -1062,7 +1062,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE2-NEXT: pcmpeqd %xmm8, %xmm4 ; SSE2-NEXT: pxor %xmm1, %xmm4 ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: packsswb %xmm0, %xmm4 +; SSE2-NEXT: packsswb %xmm4, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: movd %xmm0, %ecx @@ -1161,14 +1161,14 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE4-NEXT: blendvpd %xmm0, %xmm10, %xmm1 ; SSE4-NEXT: packssdw %xmm3, %xmm1 ; SSE4-NEXT: packssdw %xmm1, %xmm7 -; SSE4-NEXT: packsswb %xmm0, %xmm7 +; SSE4-NEXT: packsswb %xmm7, %xmm7 ; SSE4-NEXT: pcmpeqd %xmm8, %xmm5 ; SSE4-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE4-NEXT: pxor %xmm0, %xmm5 ; SSE4-NEXT: pcmpeqd %xmm8, %xmm4 ; SSE4-NEXT: pxor %xmm0, %xmm4 ; SSE4-NEXT: packssdw %xmm5, %xmm4 -; SSE4-NEXT: packsswb %xmm0, %xmm4 +; SSE4-NEXT: packsswb %xmm4, %xmm4 ; SSE4-NEXT: pmovmskb %xmm4, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB2_1 @@ -1842,7 +1842,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %p, <4 x i32> %mask ; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm6, %xmm1 ; SSE4-NEXT: packssdw %xmm3, %xmm1 -; SSE4-NEXT: packssdw %xmm0, %xmm1 +; SSE4-NEXT: packssdw %xmm1, %xmm1 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm4 ; SSE4-NEXT: movmskps %xmm4, %eax ; SSE4-NEXT: xorl $15, %eax @@ -4238,7 +4238,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 ; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne .LBB11_1 @@ -4315,7 +4315,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE4-NEXT: pcmpeqd %xmm4, %xmm2 ; SSE4-NEXT: pxor %xmm1, %xmm2 ; SSE4-NEXT: packssdw %xmm3, %xmm2 -; SSE4-NEXT: packsswb %xmm0, %xmm2 +; SSE4-NEXT: packsswb %xmm2, %xmm2 ; SSE4-NEXT: pmovmskb %xmm2, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB11_1 @@ -4619,7 +4619,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 ; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: movd %xmm0, %ecx @@ -4690,7 +4690,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE4-NEXT: pcmpeqd %xmm4, %xmm2 ; SSE4-NEXT: pxor %xmm1, %xmm2 ; SSE4-NEXT: packssdw %xmm3, %xmm2 -; SSE4-NEXT: packsswb %xmm0, %xmm2 +; SSE4-NEXT: packsswb %xmm2, %xmm2 ; SSE4-NEXT: pmovmskb %xmm2, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB12_1 @@ -7234,7 +7234,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) ; SSE2-NEXT: pcmpeqw %xmm1, %xmm2 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: packsswb %xmm0, %xmm1 +; SSE2-NEXT: packsswb %xmm1, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: movd %xmm0, %ecx @@ -7301,7 +7301,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) ; SSE4-NEXT: pcmpeqw %xmm1, %xmm2 ; SSE4-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE4-NEXT: pxor %xmm2, %xmm1 -; SSE4-NEXT: packsswb %xmm0, %xmm1 +; SSE4-NEXT: packsswb %xmm1, %xmm1 ; SSE4-NEXT: pmovmskb %xmm1, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB17_1 @@ -7367,7 +7367,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) ; AVX-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpmovmskb %xmm1, %eax ; AVX-NEXT: testb $1, %al ; AVX-NEXT: jne .LBB17_1 diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll index e365da8d3a3d8..f90d001ab4171 100644 --- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll @@ -65,7 +65,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask ; SSE2-NEXT: pcmpeqd %xmm8, %xmm4 ; SSE2-NEXT: pxor %xmm1, %xmm4 ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: packsswb %xmm0, %xmm4 +; SSE2-NEXT: packsswb %xmm4, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: je .LBB0_2 @@ -167,7 +167,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask ; SSE4-NEXT: pcmpeqd %xmm9, %xmm4 ; SSE4-NEXT: pxor %xmm0, %xmm4 ; SSE4-NEXT: packssdw %xmm5, %xmm4 -; SSE4-NEXT: packsswb %xmm0, %xmm4 +; SSE4-NEXT: packsswb %xmm4, %xmm4 ; SSE4-NEXT: pmovmskb %xmm4, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB0_1 @@ -390,7 +390,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE2-NEXT: pcmpeqd %xmm8, %xmm4 ; SSE2-NEXT: pxor %xmm1, %xmm4 ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: packsswb %xmm0, %xmm4 +; SSE2-NEXT: packsswb %xmm4, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne .LBB1_1 @@ -495,7 +495,7 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE4-NEXT: pcmpeqd %xmm8, %xmm4 ; SSE4-NEXT: pxor %xmm0, %xmm4 ; SSE4-NEXT: packssdw %xmm5, %xmm4 -; SSE4-NEXT: packsswb %xmm0, %xmm4 +; SSE4-NEXT: packsswb %xmm4, %xmm4 ; SSE4-NEXT: pmovmskb %xmm4, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB1_1 @@ -870,14 +870,14 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: packuswb %xmm1, %xmm0 ; SSE2-NEXT: packuswb %xmm0, %xmm7 -; SSE2-NEXT: packuswb %xmm0, %xmm7 +; SSE2-NEXT: packuswb %xmm7, %xmm7 ; SSE2-NEXT: pcmpeqd %xmm8, %xmm5 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE2-NEXT: pxor %xmm0, %xmm5 ; SSE2-NEXT: pcmpeqd %xmm8, %xmm4 ; SSE2-NEXT: pxor %xmm0, %xmm4 ; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: packsswb %xmm0, %xmm4 +; SSE2-NEXT: packsswb %xmm4, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: movd %xmm7, %ecx @@ -969,14 +969,14 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 ; SSE4-NEXT: packusdw %xmm7, %xmm6 ; SSE4-NEXT: packusdw %xmm6, %xmm1 -; SSE4-NEXT: packuswb %xmm0, %xmm1 +; SSE4-NEXT: packuswb %xmm1, %xmm1 ; SSE4-NEXT: pcmpeqd %xmm8, %xmm5 ; SSE4-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE4-NEXT: pxor %xmm0, %xmm5 ; SSE4-NEXT: pcmpeqd %xmm8, %xmm4 ; SSE4-NEXT: pxor %xmm0, %xmm4 ; SSE4-NEXT: packssdw %xmm5, %xmm4 -; SSE4-NEXT: packsswb %xmm0, %xmm4 +; SSE4-NEXT: packsswb %xmm4, %xmm4 ; SSE4-NEXT: pmovmskb %xmm4, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB2_1 @@ -1579,7 +1579,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %p, <4 x i32> %mask ; SSE4-NEXT: movdqa %xmm4, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm5 ; SSE4-NEXT: packusdw %xmm3, %xmm5 -; SSE4-NEXT: packusdw %xmm0, %xmm5 +; SSE4-NEXT: packusdw %xmm5, %xmm5 ; SSE4-NEXT: pcmpeqd %xmm2, %xmm6 ; SSE4-NEXT: movmskps %xmm6, %eax ; SSE4-NEXT: xorl $15, %eax @@ -3958,7 +3958,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE2-NEXT: pcmpeqd %xmm8, %xmm2 ; SSE2-NEXT: pxor %xmm6, %xmm2 ; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: jne .LBB11_1 @@ -4038,7 +4038,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask ; SSE4-NEXT: pcmpeqd %xmm4, %xmm2 ; SSE4-NEXT: pxor %xmm1, %xmm2 ; SSE4-NEXT: packssdw %xmm3, %xmm2 -; SSE4-NEXT: packsswb %xmm0, %xmm2 +; SSE4-NEXT: packsswb %xmm2, %xmm2 ; SSE4-NEXT: pmovmskb %xmm2, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB11_1 @@ -4352,14 +4352,14 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE2-NEXT: pandn %xmm9, %xmm6 ; SSE2-NEXT: por %xmm0, %xmm6 ; SSE2-NEXT: packuswb %xmm4, %xmm6 -; SSE2-NEXT: packuswb %xmm0, %xmm6 +; SSE2-NEXT: packuswb %xmm6, %xmm6 ; SSE2-NEXT: pcmpeqd %xmm8, %xmm3 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE2-NEXT: pxor %xmm0, %xmm3 ; SSE2-NEXT: pcmpeqd %xmm8, %xmm2 ; SSE2-NEXT: pxor %xmm0, %xmm2 ; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: movd %xmm6, %ecx @@ -4433,7 +4433,7 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask) ; SSE4-NEXT: pcmpeqd %xmm4, %xmm2 ; SSE4-NEXT: pxor %xmm1, %xmm2 ; SSE4-NEXT: packssdw %xmm3, %xmm2 -; SSE4-NEXT: packsswb %xmm0, %xmm2 +; SSE4-NEXT: packsswb %xmm2, %xmm2 ; SSE4-NEXT: pmovmskb %xmm2, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB12_1 @@ -7057,7 +7057,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) ; SSE2-NEXT: pcmpeqw %xmm1, %xmm2 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: packsswb %xmm0, %xmm1 +; SSE2-NEXT: packsswb %xmm1, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: movd %xmm0, %ecx @@ -7125,7 +7125,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) ; SSE4-NEXT: pcmpeqw %xmm1, %xmm2 ; SSE4-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE4-NEXT: pxor %xmm2, %xmm1 -; SSE4-NEXT: packsswb %xmm0, %xmm1 +; SSE4-NEXT: packsswb %xmm1, %xmm1 ; SSE4-NEXT: pmovmskb %xmm1, %eax ; SSE4-NEXT: testb $1, %al ; SSE4-NEXT: jne .LBB17_1 @@ -7192,7 +7192,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) ; AVX-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpmovmskb %xmm1, %eax ; AVX-NEXT: testb $1, %al ; AVX-NEXT: jne .LBB17_1 diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index 443d57a1ad54c..d548639782dd7 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -905,10 +905,11 @@ define <32 x i8> @trunc_v32i16_v32i8_zeroes(<32 x i16>* %x) nounwind "min-legal- define <8 x i32> @trunc_v8i64_v8i32_sign(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v8i64_v8i32_sign: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsraq $48, 32(%rdi), %ymm1 -; CHECK-NEXT: vpsraq $48, (%rdi), %ymm2 -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30] -; CHECK-NEXT: vpermi2w %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: vpsraq $48, 32(%rdi), %ymm0 +; CHECK-NEXT: vpsraq $48, (%rdi), %ymm1 +; CHECK-NEXT: vpmovqd %ymm1, %xmm1 +; CHECK-NEXT: vpmovqd %ymm0, %xmm0 +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; CHECK-NEXT: retq %a = load <8 x i64>, <8 x i64>* %x %b = ashr <8 x i64> %a, diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index aab15c55411df..6bdc4e3d44785 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -556,7 +556,7 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, <12 x i32>* %p) nounwind { ; AVX1-NEXT: vpermilps {{.*#+}} ymm3 = ymm0[0,u,u,1,5,u,u,6] ; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2,3,4,5],ymm2[6],ymm3[7] ; AVX1-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm3 ; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],xmm3[3,3] @@ -602,7 +602,7 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, <12 x i32>* %p) nounwind { ; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] ; XOP-NEXT: vpermil2ps {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[u,1,5,u],ymm2[6],ymm0[6] ; XOP-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[0,1,0,1] -; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm3 ; XOP-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7] ; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 ; XOP-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],xmm3[3,3] @@ -696,7 +696,7 @@ define void @interleave_24i8_out(<24 x i8>* %p, <8 x i8>* %q1, <8 x i8>* %q2, <8 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,3,2,1] ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,3,2,1,4,5,6,7] ; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,6,5,4,7] -; SSE2-NEXT: packuswb %xmm0, %xmm4 +; SSE2-NEXT: packuswb %xmm4, %xmm4 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,255,255,0,255,255,0,255,255,255,255,255,255,255,255,255] ; SSE2-NEXT: movdqa %xmm0, %xmm6 ; SSE2-NEXT: pand %xmm5, %xmm6 @@ -712,7 +712,7 @@ define void @interleave_24i8_out(<24 x i8>* %p, <8 x i8>* %q1, <8 x i8>* %q2, <8 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,3,2,1] ; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[1,2,3,0,4,5,6,7] ; SSE2-NEXT: pshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,5,6,7,4] -; SSE2-NEXT: packuswb %xmm0, %xmm5 +; SSE2-NEXT: packuswb %xmm5, %xmm5 ; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [255,0,255,255,0,255,255,0,255,255,255,255,255,255,255,255] ; SSE2-NEXT: pand %xmm6, %xmm0 ; SSE2-NEXT: pandn %xmm1, %xmm6 @@ -833,7 +833,7 @@ define void @interleave_24i8_in(<24 x i8>* %p, <8 x i8>* %q1, <8 x i8>* %q2, <8 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,2,3] ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,1,1,0,4,5,6,7] ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7] -; SSE2-NEXT: packuswb %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,255,0,255,255,0,255,255,255,255,255,255,255,255] ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,3,3,4,5,6,7] @@ -1011,24 +1011,21 @@ define void @interleave_24i16_out(<24 x i16>* %p, <8 x i16>* %q1, <8 x i16>* %q2 ; ; AVX2-LABEL: interleave_24i16_out: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqu (%rdi), %ymm0 -; AVX2-NEXT: vmovdqu 32(%rdi), %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7,8,9],ymm1[10],ymm0[11,12],ymm1[13],ymm0[14,15] -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2,3],xmm3[4],xmm2[5,6],xmm3[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,6,7,12,13,2,3,8,9,14,15,4,5,10,11] -; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm1[0],ymm0[1,2],ymm1[3],ymm0[4,5],ymm1[6],ymm0[7],ymm1[8],ymm0[9,10],ymm1[11],ymm0[12,13],ymm1[14],ymm0[15] -; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX2-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2],xmm3[3,4],xmm4[5],xmm3[6,7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[2,3,8,9,14,15,4,5,10,11,0,1,6,7,12,13] -; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8],ymm1[9],ymm0[10,11],ymm1[12],ymm0[13,14],ymm1[15] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovdqu (%rdi), %xmm0 +; AVX2-NEXT: vmovdqu 16(%rdi), %xmm1 +; AVX2-NEXT: vmovdqu 32(%rdi), %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0,1],xmm2[2],xmm0[3,4],xmm2[5],xmm0[6,7] +; AVX2-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm1[1],xmm3[2,3],xmm1[4],xmm3[5,6],xmm1[7] +; AVX2-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[0,1,6,7,12,13,2,3,8,9,14,15,4,5,10,11] +; AVX2-NEXT: vpblendw {{.*#+}} xmm4 = xmm2[0],xmm0[1,2],xmm2[3],xmm0[4,5],xmm2[6],xmm0[7] +; AVX2-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm1[2],xmm4[3,4],xmm1[5],xmm4[6,7] +; AVX2-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[2,3,8,9,14,15,4,5,10,11,0,1,6,7,12,13] +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3],xmm2[4],xmm0[5,6],xmm2[7] ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3],xmm0[4,5],xmm1[6],xmm0[7] ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,10,11,0,1,6,7,12,13,2,3,8,9,14,15] -; AVX2-NEXT: vmovdqu %xmm2, (%rsi) -; AVX2-NEXT: vmovdqu %xmm3, (%rdx) +; AVX2-NEXT: vmovdqu %xmm3, (%rsi) +; AVX2-NEXT: vmovdqu %xmm4, (%rdx) ; AVX2-NEXT: vmovdqu %xmm0, (%rcx) -; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; XOP-LABEL: interleave_24i16_out: diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll index 42c3137a905f7..b42578eafdfd8 100644 --- a/llvm/test/CodeGen/X86/oddsubvector.ll +++ b/llvm/test/CodeGen/X86/oddsubvector.ll @@ -190,7 +190,7 @@ define void @PR42833() { ; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm0 ; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: addl {{.*}}(%rip), %eax +; SSE2-NEXT: addl .Lb${{.*}}(%rip), %eax ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: movaps {{.*#+}} xmm3 = ; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm2[0],xmm3[1,2,3] @@ -237,7 +237,7 @@ define void @PR42833() { ; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm1 ; SSE42-NEXT: movdqa .Lc$local+{{.*}}(%rip), %xmm0 ; SSE42-NEXT: movd %xmm0, %eax -; SSE42-NEXT: addl {{.*}}(%rip), %eax +; SSE42-NEXT: addl .Lb${{.*}}(%rip), %eax ; SSE42-NEXT: movdqa {{.*#+}} xmm2 = ; SSE42-NEXT: pinsrd $0, %eax, %xmm2 ; SSE42-NEXT: movdqa %xmm0, %xmm3 @@ -275,7 +275,7 @@ define void @PR42833() { ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: addl {{.*}}(%rip), %eax +; AVX1-NEXT: addl .Lb${{.*}}(%rip), %eax ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = ; AVX1-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2 @@ -312,7 +312,7 @@ define void @PR42833() { ; ; AVX2-LABEL: PR42833: ; AVX2: # %bb.0: -; AVX2-NEXT: movl {{.*}}(%rip), %eax +; AVX2-NEXT: movl .Lb${{.*}}(%rip), %eax ; AVX2-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm0 ; AVX2-NEXT: addl .Lc$local+{{.*}}(%rip), %eax ; AVX2-NEXT: vmovd %eax, %xmm1 @@ -336,7 +336,7 @@ define void @PR42833() { ; ; AVX512-LABEL: PR42833: ; AVX512: # %bb.0: -; AVX512-NEXT: movl {{.*}}(%rip), %eax +; AVX512-NEXT: movl .Lb${{.*}}(%rip), %eax ; AVX512-NEXT: vmovdqu .Lc$local+{{.*}}(%rip), %ymm0 ; AVX512-NEXT: vmovdqu64 .Lc$local+{{.*}}(%rip), %zmm1 ; AVX512-NEXT: addl .Lc$local+{{.*}}(%rip), %eax @@ -363,7 +363,7 @@ define void @PR42833() { ; XOP: # %bb.0: ; XOP-NEXT: vmovdqa .Lc$local+{{.*}}(%rip), %xmm0 ; XOP-NEXT: vmovd %xmm0, %eax -; XOP-NEXT: addl {{.*}}(%rip), %eax +; XOP-NEXT: addl .Lb${{.*}}(%rip), %eax ; XOP-NEXT: vmovdqa {{.*#+}} xmm1 = ; XOP-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 ; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm2 diff --git a/llvm/test/CodeGen/X86/pr15267.ll b/llvm/test/CodeGen/X86/pr15267.ll index b515fe8c4863e..73acb76ce55fc 100644 --- a/llvm/test/CodeGen/X86/pr15267.ll +++ b/llvm/test/CodeGen/X86/pr15267.ll @@ -7,14 +7,18 @@ define <4 x i3> @test1(<4 x i3>* %in) nounwind { ; CHECK-NEXT: movzwl (%rdi), %eax ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrl $3, %ecx -; CHECK-NEXT: vmovd %eax, %xmm0 -; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; CHECK-NEXT: andl $7, %ecx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: andl $7, %edx +; CHECK-NEXT: vmovd %edx, %xmm0 +; CHECK-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrl $6, %ecx -; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 +; CHECK-NEXT: andl $7, %ecx +; CHECK-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: shrl $9, %eax -; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 -; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: andl $7, %eax +; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %ret = load <4 x i3>, <4 x i3>* %in, align 1 ret <4 x i3> %ret @@ -23,17 +27,24 @@ define <4 x i3> @test1(<4 x i3>* %in) nounwind { define <4 x i1> @test2(<4 x i1>* %in) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: movb (%rdi), %al ; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: shrl %ecx -; CHECK-NEXT: vmovd %eax, %xmm0 -; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; CHECK-NEXT: shrb %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: andb $1, %dl +; CHECK-NEXT: movzbl %dl, %edx +; CHECK-NEXT: vmovd %edx, %xmm0 +; CHECK-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: shrl $2, %ecx -; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 -; CHECK-NEXT: shrl $3, %eax -; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 -; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: shrb $2, %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 +; CHECK-NEXT: shrb $3, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %ret = load <4 x i1>, <4 x i1>* %in, align 1 ret <4 x i1> %ret @@ -42,21 +53,26 @@ define <4 x i1> @test2(<4 x i1>* %in) nounwind { define <4 x i64> @test3(<4 x i1>* %in) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: movzbl (%rdi), %eax -; CHECK-NEXT: movq %rax, %rcx -; CHECK-NEXT: shlq $62, %rcx -; CHECK-NEXT: sarq $63, %rcx -; CHECK-NEXT: movq %rax, %rdx -; CHECK-NEXT: shlq $63, %rdx -; CHECK-NEXT: sarq $63, %rdx +; CHECK-NEXT: movb (%rdi), %al +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: shrb %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: andl $1, %edx +; CHECK-NEXT: negl %edx ; CHECK-NEXT: vmovd %edx, %xmm0 -; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; CHECK-NEXT: movq %rax, %rcx -; CHECK-NEXT: shlq $61, %rcx -; CHECK-NEXT: sarq $63, %rcx -; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 -; CHECK-NEXT: shlq $60, %rax -; CHECK-NEXT: sarq $63, %rax +; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: shrb $2, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; CHECK-NEXT: shrb $3, %cl +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] diff --git a/llvm/test/CodeGen/X86/pr32282.ll b/llvm/test/CodeGen/X86/pr32282.ll index 358095df4b2b1..8dfaaa5e6a68f 100644 --- a/llvm/test/CodeGen/X86/pr32282.ll +++ b/llvm/test/CodeGen/X86/pr32282.ll @@ -13,18 +13,18 @@ define void @foo(i64 %x) nounwind { ; X86-LABEL: foo: ; X86: # %bb.0: ; X86-NEXT: pushl %eax -; X86-NEXT: movl d+4, %eax +; X86-NEXT: movl d, %eax ; X86-NEXT: notl %eax -; X86-NEXT: movl d, %ecx +; X86-NEXT: movl d+4, %ecx ; X86-NEXT: notl %ecx -; X86-NEXT: andl $-566231040, %ecx # imm = 0xDE400000 -; X86-NEXT: andl $701685459, %eax # imm = 0x29D2DED3 -; X86-NEXT: shrdl $21, %eax, %ecx -; X86-NEXT: shrl $21, %eax -; X86-NEXT: addl $7, %ecx -; X86-NEXT: adcl $0, %eax -; X86-NEXT: pushl %eax +; X86-NEXT: andl $701685459, %ecx # imm = 0x29D2DED3 +; X86-NEXT: andl $-566231040, %eax # imm = 0xDE400000 +; X86-NEXT: shrdl $21, %ecx, %eax +; X86-NEXT: shrl $21, %ecx +; X86-NEXT: addl $7, %eax +; X86-NEXT: adcl $0, %ecx ; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %eax ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: calll __divdi3 diff --git a/llvm/test/CodeGen/X86/pr34653.ll b/llvm/test/CodeGen/X86/pr34653.ll index 9a0b56a90cb7e..2f63ac311f2eb 100644 --- a/llvm/test/CodeGen/X86/pr34653.ll +++ b/llvm/test/CodeGen/X86/pr34653.ll @@ -15,28 +15,22 @@ define void @pr34653() { ; CHECK-NEXT: subq $1536, %rsp # imm = 0x600 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; CHECK-NEXT: callq test -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm1 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm2 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm3 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm4 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm5 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm6 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm7 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm8 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm9 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm10 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm11 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm12 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm13 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm14 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm15 -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm5 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm6 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm7 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm9 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm10 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm11 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm12 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm13 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm14 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm15 = mem[0],zero ; CHECK-NEXT: vmovsd {{.*#+}} xmm16 = mem[0],zero ; CHECK-NEXT: vmovsd {{.*#+}} xmm17 = mem[0],zero ; CHECK-NEXT: vmovsd {{.*#+}} xmm18 = mem[0],zero @@ -60,17 +54,11 @@ define void @pr34653() { ; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload -; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload -; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload -; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload -; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll index 011f1db9d2c34..5f0f225a4494d 100644 --- a/llvm/test/CodeGen/X86/sad.ll +++ b/llvm/test/CodeGen/X86/sad.ll @@ -21,7 +21,7 @@ define i32 @sad_16i8() nounwind { ; SSE2-NEXT: movdqu b+1024(%rax), %xmm3 ; SSE2-NEXT: psadbw %xmm2, %xmm3 ; SSE2-NEXT: paddd %xmm3, %xmm1 -; SSE2-NEXT: addq $4, %rax +; SSE2-NEXT: addq $16, %rax ; SSE2-NEXT: jne .LBB0_1 ; SSE2-NEXT: # %bb.2: # %middle.block ; SSE2-NEXT: paddd %xmm0, %xmm1 @@ -46,7 +46,7 @@ define i32 @sad_16i8() nounwind { ; AVX1-NEXT: vpsadbw b+1024(%rax), %xmm2, %xmm2 ; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] -; AVX1-NEXT: addq $4, %rax +; AVX1-NEXT: addq $16, %rax ; AVX1-NEXT: jne .LBB0_1 ; AVX1-NEXT: # %bb.2: # %middle.block ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 @@ -73,7 +73,7 @@ define i32 @sad_16i8() nounwind { ; AVX2-NEXT: vmovdqu a+1024(%rax), %xmm2 ; AVX2-NEXT: vpsadbw b+1024(%rax), %xmm2, %xmm2 ; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: addq $4, %rax +; AVX2-NEXT: addq $16, %rax ; AVX2-NEXT: jne .LBB0_1 ; AVX2-NEXT: # %bb.2: # %middle.block ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 @@ -97,7 +97,7 @@ define i32 @sad_16i8() nounwind { ; AVX512-NEXT: vmovdqu a+1024(%rax), %xmm1 ; AVX512-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1 ; AVX512-NEXT: vpaddd %zmm0, %zmm1, %zmm0 -; AVX512-NEXT: addq $4, %rax +; AVX512-NEXT: addq $16, %rax ; AVX512-NEXT: jne .LBB0_1 ; AVX512-NEXT: # %bb.2: # %middle.block ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 @@ -130,7 +130,7 @@ vector.body: %8 = sub nsw <16 x i32> zeroinitializer, %6 %9 = select <16 x i1> %7, <16 x i32> %6, <16 x i32> %8 %10 = add nsw <16 x i32> %9, %vec.phi - %index.next = add i64 %index, 4 + %index.next = add i64 %index, 16 %11 = icmp eq i64 %index.next, 1024 br i1 %11, label %middle.block, label %vector.body @@ -163,7 +163,7 @@ define i32 @sad_32i8() nounwind { ; SSE2-NEXT: movdqa a+1040(%rax), %xmm3 ; SSE2-NEXT: psadbw b+1040(%rax), %xmm3 ; SSE2-NEXT: paddd %xmm3, %xmm1 -; SSE2-NEXT: addq $4, %rax +; SSE2-NEXT: addq $32, %rax ; SSE2-NEXT: jne .LBB1_1 ; SSE2-NEXT: # %bb.2: # %middle.block ; SSE2-NEXT: paddd %xmm0, %xmm1 @@ -195,7 +195,7 @@ define i32 @sad_32i8() nounwind { ; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 -; AVX1-NEXT: addq $4, %rax +; AVX1-NEXT: addq $32, %rax ; AVX1-NEXT: jne .LBB1_1 ; AVX1-NEXT: # %bb.2: # %middle.block ; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm2 @@ -226,7 +226,7 @@ define i32 @sad_32i8() nounwind { ; AVX2-NEXT: vmovdqa a+1024(%rax), %ymm2 ; AVX2-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2 ; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: addq $4, %rax +; AVX2-NEXT: addq $32, %rax ; AVX2-NEXT: jne .LBB1_1 ; AVX2-NEXT: # %bb.2: # %middle.block ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm1 @@ -253,7 +253,7 @@ define i32 @sad_32i8() nounwind { ; AVX512-NEXT: vmovdqa a+1024(%rax), %ymm2 ; AVX512-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2 ; AVX512-NEXT: vpaddd %zmm1, %zmm2, %zmm1 -; AVX512-NEXT: addq $4, %rax +; AVX512-NEXT: addq $32, %rax ; AVX512-NEXT: jne .LBB1_1 ; AVX512-NEXT: # %bb.2: # %middle.block ; AVX512-NEXT: vpaddd %zmm0, %zmm1, %zmm0 @@ -287,7 +287,7 @@ vector.body: %8 = sub nsw <32 x i32> zeroinitializer, %6 %9 = select <32 x i1> %7, <32 x i32> %6, <32 x i32> %8 %10 = add nsw <32 x i32> %9, %vec.phi - %index.next = add i64 %index, 4 + %index.next = add i64 %index, 32 %11 = icmp eq i64 %index.next, 1024 br i1 %11, label %middle.block, label %vector.body @@ -330,7 +330,7 @@ define i32 @sad_avx64i8() nounwind { ; SSE2-NEXT: movdqa a+1072(%rax), %xmm5 ; SSE2-NEXT: psadbw b+1072(%rax), %xmm5 ; SSE2-NEXT: paddd %xmm5, %xmm1 -; SSE2-NEXT: addq $4, %rax +; SSE2-NEXT: addq $64, %rax ; SSE2-NEXT: jne .LBB2_1 ; SSE2-NEXT: # %bb.2: # %middle.block ; SSE2-NEXT: paddd %xmm4, %xmm2 @@ -378,7 +378,7 @@ define i32 @sad_avx64i8() nounwind { ; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 -; AVX1-NEXT: addq $4, %rax +; AVX1-NEXT: addq $64, %rax ; AVX1-NEXT: jne .LBB2_1 ; AVX1-NEXT: # %bb.2: # %middle.block ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 @@ -420,7 +420,7 @@ define i32 @sad_avx64i8() nounwind { ; AVX2-NEXT: vmovdqa a+1056(%rax), %ymm3 ; AVX2-NEXT: vpsadbw b+1056(%rax), %ymm3, %ymm3 ; AVX2-NEXT: vpaddd %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: addq $4, %rax +; AVX2-NEXT: addq $64, %rax ; AVX2-NEXT: jne .LBB2_1 ; AVX2-NEXT: # %bb.2: # %middle.block ; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm2 @@ -453,7 +453,7 @@ define i32 @sad_avx64i8() nounwind { ; AVX512F-NEXT: vpsadbw b+1056(%rax), %ymm3, %ymm3 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 ; AVX512F-NEXT: vpaddd %zmm1, %zmm2, %zmm1 -; AVX512F-NEXT: addq $4, %rax +; AVX512F-NEXT: addq $64, %rax ; AVX512F-NEXT: jne .LBB2_1 ; AVX512F-NEXT: # %bb.2: # %middle.block ; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm1 @@ -482,7 +482,7 @@ define i32 @sad_avx64i8() nounwind { ; AVX512BW-NEXT: vmovdqa64 a+1024(%rax), %zmm2 ; AVX512BW-NEXT: vpsadbw b+1024(%rax), %zmm2, %zmm2 ; AVX512BW-NEXT: vpaddd %zmm1, %zmm2, %zmm1 -; AVX512BW-NEXT: addq $4, %rax +; AVX512BW-NEXT: addq $64, %rax ; AVX512BW-NEXT: jne .LBB2_1 ; AVX512BW-NEXT: # %bb.2: # %middle.block ; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm1 @@ -518,7 +518,7 @@ vector.body: %8 = sub nsw <64 x i32> zeroinitializer, %6 %9 = select <64 x i1> %7, <64 x i32> %6, <64 x i32> %8 %10 = add nsw <64 x i32> %9, %vec.phi - %index.next = add i64 %index, 4 + %index.next = add i64 %index, 64 %11 = icmp eq i64 %index.next, 1024 br i1 %11, label %middle.block, label %vector.body @@ -555,7 +555,7 @@ define i32 @sad_2i8() nounwind { ; SSE2-NEXT: pand %xmm1, %xmm3 ; SSE2-NEXT: psadbw %xmm2, %xmm3 ; SSE2-NEXT: paddd %xmm3, %xmm0 -; SSE2-NEXT: addq $4, %rax +; SSE2-NEXT: addq $2, %rax ; SSE2-NEXT: jne .LBB3_1 ; SSE2-NEXT: # %bb.2: # %middle.block ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] @@ -577,7 +577,7 @@ define i32 @sad_2i8() nounwind { ; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm0[1,2,3,4,5,6,7] ; AVX-NEXT: vpsadbw %xmm3, %xmm2, %xmm2 ; AVX-NEXT: vpaddd %xmm1, %xmm2, %xmm1 -; AVX-NEXT: addq $4, %rax +; AVX-NEXT: addq $2, %rax ; AVX-NEXT: jne .LBB3_1 ; AVX-NEXT: # %bb.2: # %middle.block ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] @@ -603,7 +603,7 @@ vector.body: %8 = sub nsw <2 x i32> zeroinitializer, %6 %9 = select <2 x i1> %7, <2 x i32> %6, <2 x i32> %8 %10 = add nsw <2 x i32> %9, %vec.phi - %index.next = add i64 %index, 4 + %index.next = add i64 %index, 2 %11 = icmp eq i64 %index.next, 1024 br i1 %11, label %middle.block, label %vector.body diff --git a/llvm/test/CodeGen/X86/sbb.ll b/llvm/test/CodeGen/X86/sbb.ll index cc8127cbea230..b3dae629ba808 100644 --- a/llvm/test/CodeGen/X86/sbb.ll +++ b/llvm/test/CodeGen/X86/sbb.ll @@ -245,3 +245,107 @@ end: ret void } +; Cases for PR45700 +define i32 @ult_zext_add(i32 %0, i32 %1, i32 %2) { +; CHECK-LABEL: ult_zext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmpl %edx, %esi +; CHECK-NEXT: adcl $0, %eax +; CHECK-NEXT: retq + %4 = icmp ult i32 %1, %2 + %5 = zext i1 %4 to i32 + %6 = add nsw i32 %5, %0 + ret i32 %6 +} + +define i32 @ule_zext_add(i32 %0, i32 %1, i32 %2) { +; CHECK-LABEL: ule_zext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmpl %esi, %edx +; CHECK-NEXT: sbbl $-1, %eax +; CHECK-NEXT: retq + %4 = icmp ule i32 %1, %2 + %5 = zext i1 %4 to i32 + %6 = add nsw i32 %5, %0 + ret i32 %6 +} + +define i32 @ugt_zext_add(i32 %0, i32 %1, i32 %2) { +; CHECK-LABEL: ugt_zext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmpl %esi, %edx +; CHECK-NEXT: adcl $0, %eax +; CHECK-NEXT: retq + %4 = icmp ugt i32 %1, %2 + %5 = zext i1 %4 to i32 + %6 = add nsw i32 %5, %0 + ret i32 %6 +} + +define i32 @uge_zext_add(i32 %0, i32 %1, i32 %2) { +; CHECK-LABEL: uge_zext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmpl %edx, %esi +; CHECK-NEXT: sbbl $-1, %eax +; CHECK-NEXT: retq + %4 = icmp uge i32 %1, %2 + %5 = zext i1 %4 to i32 + %6 = add nsw i32 %5, %0 + ret i32 %6 +} + +define i32 @ult_sext_add(i32 %0, i32 %1, i32 %2) { +; CHECK-LABEL: ult_sext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmpl %edx, %esi +; CHECK-NEXT: sbbl $0, %eax +; CHECK-NEXT: retq + %4 = icmp ult i32 %1, %2 + %5 = sext i1 %4 to i32 + %6 = add nsw i32 %5, %0 + ret i32 %6 +} + +define i32 @ule_sext_add(i32 %0, i32 %1, i32 %2) { +; CHECK-LABEL: ule_sext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmpl %esi, %edx +; CHECK-NEXT: adcl $-1, %eax +; CHECK-NEXT: retq + %4 = icmp ule i32 %1, %2 + %5 = sext i1 %4 to i32 + %6 = add nsw i32 %5, %0 + ret i32 %6 +} + +define i32 @ugt_sext_add(i32 %0, i32 %1, i32 %2) { +; CHECK-LABEL: ugt_sext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmpl %esi, %edx +; CHECK-NEXT: sbbl $0, %eax +; CHECK-NEXT: retq + %4 = icmp ugt i32 %1, %2 + %5 = sext i1 %4 to i32 + %6 = add nsw i32 %5, %0 + ret i32 %6 +} + +define i32 @uge_sext_add(i32 %0, i32 %1, i32 %2) { +; CHECK-LABEL: uge_sext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmpl %edx, %esi +; CHECK-NEXT: adcl $-1, %eax +; CHECK-NEXT: retq + %4 = icmp uge i32 %1, %2 + %5 = sext i1 %4 to i32 + %6 = add nsw i32 %5, %0 + ret i32 %6 +} diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll index be6baaf42ae2f..c683c3e0a345c 100644 --- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -700,43 +700,37 @@ define i64 @test4(i64 %a, i64 %b) nounwind { ; ILP-NEXT: movl $0, %edx ; ILP-NEXT: sbbq %rdx, %rdx ; ILP-NEXT: sbbq %rcx, %rcx -; ILP-NEXT: setae %cl -; ILP-NEXT: movzbl %cl, %ecx -; ILP-NEXT: subq %rcx, %rax +; ILP-NEXT: adcq $-1, %rax ; ILP-NEXT: retq ; ; HYBRID-LABEL: test4: ; HYBRID: # %bb.0: -; HYBRID-NEXT: xorl %eax, %eax ; HYBRID-NEXT: xorl %ecx, %ecx +; HYBRID-NEXT: xorl %edx, %edx ; HYBRID-NEXT: incq %rsi -; HYBRID-NEXT: sete %cl +; HYBRID-NEXT: sete %dl +; HYBRID-NEXT: movl $2, %eax ; HYBRID-NEXT: cmpq %rdi, %rsi -; HYBRID-NEXT: sbbq $0, %rcx -; HYBRID-NEXT: movl $0, %ecx +; HYBRID-NEXT: sbbq $0, %rdx +; HYBRID-NEXT: movl $0, %edx +; HYBRID-NEXT: sbbq %rdx, %rdx ; HYBRID-NEXT: sbbq %rcx, %rcx -; HYBRID-NEXT: sbbq %rax, %rax -; HYBRID-NEXT: setae %al -; HYBRID-NEXT: movzbl %al, %ecx -; HYBRID-NEXT: movl $2, %eax -; HYBRID-NEXT: subq %rcx, %rax +; HYBRID-NEXT: adcq $-1, %rax ; HYBRID-NEXT: retq ; ; BURR-LABEL: test4: ; BURR: # %bb.0: -; BURR-NEXT: xorl %eax, %eax ; BURR-NEXT: xorl %ecx, %ecx +; BURR-NEXT: xorl %edx, %edx ; BURR-NEXT: incq %rsi -; BURR-NEXT: sete %cl +; BURR-NEXT: sete %dl +; BURR-NEXT: movl $2, %eax ; BURR-NEXT: cmpq %rdi, %rsi -; BURR-NEXT: sbbq $0, %rcx -; BURR-NEXT: movl $0, %ecx +; BURR-NEXT: sbbq $0, %rdx +; BURR-NEXT: movl $0, %edx +; BURR-NEXT: sbbq %rdx, %rdx ; BURR-NEXT: sbbq %rcx, %rcx -; BURR-NEXT: sbbq %rax, %rax -; BURR-NEXT: setae %al -; BURR-NEXT: movzbl %al, %ecx -; BURR-NEXT: movl $2, %eax -; BURR-NEXT: subq %rcx, %rax +; BURR-NEXT: adcq $-1, %rax ; BURR-NEXT: retq ; ; SRC-LABEL: test4: @@ -750,10 +744,8 @@ define i64 @test4(i64 %a, i64 %b) nounwind { ; SRC-NEXT: movl $0, %eax ; SRC-NEXT: sbbq %rax, %rax ; SRC-NEXT: sbbq %rcx, %rcx -; SRC-NEXT: setae %al -; SRC-NEXT: movzbl %al, %ecx ; SRC-NEXT: movl $2, %eax -; SRC-NEXT: subq %rcx, %rax +; SRC-NEXT: adcq $-1, %rax ; SRC-NEXT: retq ; ; LIN-LABEL: test4: @@ -768,9 +760,7 @@ define i64 @test4(i64 %a, i64 %b) nounwind { ; LIN-NEXT: movl $0, %edx ; LIN-NEXT: sbbq %rdx, %rdx ; LIN-NEXT: sbbq %rcx, %rcx -; LIN-NEXT: setae %cl -; LIN-NEXT: movzbl %cl, %ecx -; LIN-NEXT: subq %rcx, %rax +; LIN-NEXT: adcq $-1, %rax ; LIN-NEXT: retq %r = zext i64 %b to i256 %u = add i256 %r, 1 diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll index 7344d38b585f3..f73a608e096b7 100644 --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -904,29 +904,35 @@ define i32 @test13(i32 %a, i32 %b) nounwind { } define i32 @test14(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: test14: -; CHECK: ## %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: setae %al -; CHECK-NEXT: negl %eax -; CHECK-NEXT: retq +; GENERIC-LABEL: test14: +; GENERIC: ## %bb.0: +; GENERIC-NEXT: xorl %eax, %eax +; GENERIC-NEXT: cmpl %esi, %edi +; GENERIC-NEXT: adcl $-1, %eax +; GENERIC-NEXT: retq +; +; ATOM-LABEL: test14: +; ATOM: ## %bb.0: +; ATOM-NEXT: xorl %eax, %eax +; ATOM-NEXT: cmpl %esi, %edi +; ATOM-NEXT: adcl $-1, %eax +; ATOM-NEXT: nop +; ATOM-NEXT: nop +; ATOM-NEXT: retq ; ; ATHLON-LABEL: test14: ; ATHLON: ## %bb.0: ; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx ; ATHLON-NEXT: xorl %eax, %eax ; ATHLON-NEXT: cmpl {{[0-9]+}}(%esp), %ecx -; ATHLON-NEXT: setae %al -; ATHLON-NEXT: negl %eax +; ATHLON-NEXT: adcl $-1, %eax ; ATHLON-NEXT: retl ; ; MCU-LABEL: test14: ; MCU: # %bb.0: ; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: cmpl %edx, %eax -; MCU-NEXT: setae %cl -; MCU-NEXT: negl %ecx +; MCU-NEXT: adcl $-1, %ecx ; MCU-NEXT: movl %ecx, %eax ; MCU-NEXT: retl %c = icmp uge i32 %a, %b diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll index f5673c7e8bd38..1582544b507ce 100644 --- a/llvm/test/CodeGen/X86/shift-combine.ll +++ b/llvm/test/CodeGen/X86/shift-combine.ll @@ -226,13 +226,8 @@ define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) nounwind { ; ; X64-LABEL: ashr_add_shl_v4i8: ; X64: # %bb.0: -; X64-NEXT: pand {{.*}}(%rip), %xmm0 -; X64-NEXT: packuswb %xmm0, %xmm0 -; X64-NEXT: packuswb %xmm0, %xmm0 -; X64-NEXT: pcmpeqd %xmm1, %xmm1 -; X64-NEXT: psubb %xmm1, %xmm0 -; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; X64-NEXT: pslld $24, %xmm0 +; X64-NEXT: paddd {{.*}}(%rip), %xmm0 ; X64-NEXT: psrad $24, %xmm0 ; X64-NEXT: retq %conv = shl <4 x i32> %r, diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index e42742fd008de..4722355cc89c4 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -348,7 +348,7 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) { ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] ; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 -; AVX1-NEXT: vpackssdw %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vpackssdw %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm3 ; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0 @@ -363,7 +363,7 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) { ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] ; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vpackssdw %xmm0, %xmm2, %xmm2 +; AVX2-NEXT: vpackssdw %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1 ; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll index 46347dff1fdaa..1112681d75b97 100644 --- a/llvm/test/CodeGen/X86/vec_saddo.ll +++ b/llvm/test/CodeGen/X86/vec_saddo.ll @@ -546,7 +546,7 @@ define <16 x i32> @saddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2) ; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1 ; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi) ; AVX2-NEXT: vmovdqa %ymm2, (%rdi) diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll index dc72917238af9..7fe21744ef28a 100644 --- a/llvm/test/CodeGen/X86/vec_smulo.ll +++ b/llvm/test/CodeGen/X86/vec_smulo.ll @@ -1237,7 +1237,7 @@ define <16 x i32> @smulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2) ; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1 ; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi) ; AVX2-NEXT: vmovdqa %ymm2, (%rdi) diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll index 1c99eff751165..e0dd06181eed9 100644 --- a/llvm/test/CodeGen/X86/vec_ssubo.ll +++ b/llvm/test/CodeGen/X86/vec_ssubo.ll @@ -551,7 +551,7 @@ define <16 x i32> @ssubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2) ; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1 ; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi) ; AVX2-NEXT: vmovdqa %ymm2, (%rdi) diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll index 7539667cd1547..081fd13dd7c29 100644 --- a/llvm/test/CodeGen/X86/vec_uaddo.ll +++ b/llvm/test/CodeGen/X86/vec_uaddo.ll @@ -635,7 +635,7 @@ define <16 x i32> @uaddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2) ; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1 ; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi) ; AVX2-NEXT: vmovdqa %ymm2, (%rdi) diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll index 10ffdcaeb5c67..f101ea2bef813 100644 --- a/llvm/test/CodeGen/X86/vec_umulo.ll +++ b/llvm/test/CodeGen/X86/vec_umulo.ll @@ -1050,11 +1050,11 @@ define <16 x i32> @umulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2) ; AVX2-NEXT: vpxor %ymm6, %ymm5, %ymm5 ; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 ; AVX2-NEXT: vpackssdw %xmm6, %xmm5, %xmm5 -; AVX2-NEXT: vpacksswb %xmm0, %xmm5, %xmm5 +; AVX2-NEXT: vpacksswb %xmm5, %xmm5, %xmm5 ; AVX2-NEXT: vpmulld %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpmulld %ymm3, %ymm1, %ymm3 ; AVX2-NEXT: vpmovsxbd %xmm5, %ymm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm4, %xmm1 +; AVX2-NEXT: vpacksswb %xmm4, %xmm4, %xmm1 ; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1 ; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi) ; AVX2-NEXT: vmovdqa %ymm2, (%rdi) diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll index 18623f4660bf8..449121a863df0 100644 --- a/llvm/test/CodeGen/X86/vec_usubo.ll +++ b/llvm/test/CodeGen/X86/vec_usubo.ll @@ -678,7 +678,7 @@ define <16 x i32> @usubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2) ; AVX2-NEXT: vpackssdw %xmm4, %xmm0, %xmm0 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX2-NEXT: vpacksswb %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1 ; AVX2-NEXT: vmovdqa %ymm3, 32(%rdi) ; AVX2-NEXT: vmovdqa %ymm2, (%rdi) diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll index 4348edf5b7c26..b4edae1a75941 100644 --- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -1100,7 +1100,7 @@ define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SSE-LABEL: bool_reduction_v8i16: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm0, %xmm1 -; SSE-NEXT: packsswb %xmm0, %xmm1 +; SSE-NEXT: packsswb %xmm1, %xmm1 ; SSE-NEXT: pmovmskb %xmm1, %eax ; SSE-NEXT: cmpb $-1, %al ; SSE-NEXT: sete %al @@ -1227,7 +1227,7 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { ; SSE-NEXT: pminud %xmm0, %xmm2 ; SSE-NEXT: pcmpeqd %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: packsswb %xmm0, %xmm2 +; SSE-NEXT: packsswb %xmm2, %xmm2 ; SSE-NEXT: pmovmskb %xmm2, %eax ; SSE-NEXT: cmpb $-1, %al ; SSE-NEXT: sete %al diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll index b3443b9707b7f..48e374c36c066 100644 --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -996,7 +996,7 @@ define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SSE-LABEL: bool_reduction_v8i16: ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm0, %xmm1 -; SSE-NEXT: packsswb %xmm0, %xmm1 +; SSE-NEXT: packsswb %xmm1, %xmm1 ; SSE-NEXT: pmovmskb %xmm1, %eax ; SSE-NEXT: testb %al, %al ; SSE-NEXT: setne %al @@ -1123,7 +1123,7 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { ; SSE-NEXT: pminud %xmm0, %xmm2 ; SSE-NEXT: pcmpeqd %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: packsswb %xmm0, %xmm2 +; SSE-NEXT: packsswb %xmm2, %xmm2 ; SSE-NEXT: pmovmskb %xmm2, %eax ; SSE-NEXT: testb %al, %al ; SSE-NEXT: setne %al diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll index 47b7d157ddad9..16be83c84fb3c 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -317,7 +317,7 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: psllw $7, %xmm4 ; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1,2,3,4,5,6],xmm4[7] ; SSE41-NEXT: psrlw $8, %xmm4 -; SSE41-NEXT: packuswb %xmm0, %xmm4 +; SSE41-NEXT: packuswb %xmm4, %xmm4 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero ; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm2 ; SSE41-NEXT: psrlw $8, %xmm2 @@ -816,7 +816,7 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: psllw $7, %xmm4 ; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1,2,3,4,5,6],xmm4[7] ; SSE41-NEXT: psrlw $8, %xmm4 -; SSE41-NEXT: packuswb %xmm0, %xmm4 +; SSE41-NEXT: packuswb %xmm4, %xmm4 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero ; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm4 ; SSE41-NEXT: psrlw $8, %xmm4 diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll index 63f8ee88b642b..bb39831efe736 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll @@ -511,7 +511,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; SSE2-NEXT: psllw $15, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: cmpb $-1, %al ; SSE2-NEXT: sete %al @@ -1404,7 +1404,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; SSE2-NEXT: pand %xmm0, %xmm1 ; SSE2-NEXT: packssdw %xmm2, %xmm1 ; SSE2-NEXT: packssdw %xmm3, %xmm1 -; SSE2-NEXT: packsswb %xmm0, %xmm1 +; SSE2-NEXT: packsswb %xmm1, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax ; SSE2-NEXT: cmpb $-1, %al ; SSE2-NEXT: sete %al diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll index fec54a6caf8ad..64bba07ac9598 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll @@ -505,7 +505,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; SSE2-NEXT: psllw $15, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: testb %al, %al ; SSE2-NEXT: setne %al @@ -1393,7 +1393,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; SSE2-NEXT: pand %xmm0, %xmm1 ; SSE2-NEXT: packssdw %xmm2, %xmm1 ; SSE2-NEXT: packssdw %xmm3, %xmm1 -; SSE2-NEXT: packsswb %xmm0, %xmm1 +; SSE2-NEXT: packsswb %xmm1, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax ; SSE2-NEXT: testb %al, %al ; SSE2-NEXT: setne %al diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll index 47a6ac4ce45df..e8259e147133c 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll @@ -352,14 +352,14 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; ; AVX1-LABEL: test_v8i64: ; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5 +; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 -; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1 -; AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -663,24 +663,24 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; ; AVX1-LABEL: test_v16i64: ; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm8 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm9 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm11 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5 -; AVX1-NEXT: vpcmpgtq %xmm11, %xmm5, %xmm8 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 -; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm9 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm10 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 -; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vblendvpd %xmm10, %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7 +; AVX1-NEXT: vpcmpgtq %xmm11, %xmm7, %xmm10 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm5, %xmm4, %xmm6 +; AVX1-NEXT: vblendvpd %xmm6, %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vblendvpd %xmm10, %xmm11, %xmm7, %xmm5 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm6 +; AVX1-NEXT: vblendvpd %xmm9, %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vblendvpd %xmm8, %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendvpd %xmm6, %xmm4, %xmm5, %xmm1 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vblendvpd %xmm9, %xmm7, %xmm6, %xmm3 -; AVX1-NEXT: vblendvpd %xmm8, %xmm11, %xmm5, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 -; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1 -; AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll index d22cf8e219656..ee6d0198c2d08 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll @@ -554,7 +554,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] ; SSE2-NEXT: psllw $15, %xmm2 -; SSE2-NEXT: packsswb %xmm0, %xmm2 +; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax ; SSE2-NEXT: xorb $0, %al ; SSE2-NEXT: setnp %al @@ -1582,7 +1582,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; SSE2-NEXT: pand %xmm0, %xmm1 ; SSE2-NEXT: packssdw %xmm2, %xmm1 ; SSE2-NEXT: packssdw %xmm3, %xmm1 -; SSE2-NEXT: packsswb %xmm0, %xmm1 +; SSE2-NEXT: packsswb %xmm1, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax ; SSE2-NEXT: xorb $0, %al ; SSE2-NEXT: setnp %al diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index aa123c0afce6b..1d0de82fd551d 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -507,9 +507,8 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { ; AVX1-LABEL: splatvar_rotate_v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64] -; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64] +; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpsllq %xmm1, %xmm3, %xmm4 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index 999fe4a731d91..d162f5c4a97ab 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -1342,41 +1342,44 @@ entry: define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; SSE-LABEL: load_sext_2i1_to_2i64: ; SSE: # %bb.0: # %entry -; SSE-NEXT: movzbl (%rdi), %eax -; SSE-NEXT: movq %rax, %rcx -; SSE-NEXT: shlq $62, %rcx -; SSE-NEXT: movq %rcx, %xmm0 -; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: movb (%rdi), %al +; SSE-NEXT: movzbl %al, %ecx +; SSE-NEXT: shrb %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: negq %rax ; SSE-NEXT: movq %rax, %xmm1 -; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE-NEXT: psrad $31, %xmm1 -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] +; SSE-NEXT: andl $1, %ecx +; SSE-NEXT: negq %rcx +; SSE-NEXT: movq %rcx, %xmm0 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX1-LABEL: load_sext_2i1_to_2i64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: movzbl (%rdi), %eax -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shlq $62, %rcx -; AVX1-NEXT: vmovq %rcx, %xmm0 -; AVX1-NEXT: shlq $63, %rax -; AVX1-NEXT: vmovq %rax, %xmm1 +; AVX1-NEXT: movb (%rdi), %al +; AVX1-NEXT: movzbl %al, %ecx +; AVX1-NEXT: shrb %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: negq %rax +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: negq %rcx +; AVX1-NEXT: vmovq %rcx, %xmm1 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: load_sext_2i1_to_2i64: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movzbl (%rdi), %eax -; AVX2-NEXT: movq %rax, %rcx -; AVX2-NEXT: shlq $62, %rcx -; AVX2-NEXT: vmovq %rcx, %xmm0 -; AVX2-NEXT: shlq $63, %rax -; AVX2-NEXT: vmovq %rax, %xmm1 +; AVX2-NEXT: movb (%rdi), %al +; AVX2-NEXT: movzbl %al, %ecx +; AVX2-NEXT: shrb %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: negq %rax +; AVX2-NEXT: vmovq %rax, %xmm0 +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: negq %rcx +; AVX2-NEXT: vmovq %rcx, %xmm1 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: load_sext_2i1_to_2i64: @@ -1390,30 +1393,34 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { ; X32-SSE2-LABEL: load_sext_2i1_to_2i64: ; X32-SSE2: # %bb.0: # %entry ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movzbl (%eax), %eax -; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: shll $30, %ecx -; X32-SSE2-NEXT: movd %ecx, %xmm0 -; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] -; X32-SSE2-NEXT: shll $31, %eax +; X32-SSE2-NEXT: movb (%eax), %al +; X32-SSE2-NEXT: movzbl %al, %ecx +; X32-SSE2-NEXT: shrb %al +; X32-SSE2-NEXT: movzbl %al, %eax +; X32-SSE2-NEXT: negl %eax ; X32-SSE2-NEXT: movd %eax, %xmm0 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] +; X32-SSE2-NEXT: andl $1, %ecx +; X32-SSE2-NEXT: negl %ecx +; X32-SSE2-NEXT: movd %ecx, %xmm0 ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X32-SSE2-NEXT: psrad $31, %xmm0 ; X32-SSE2-NEXT: retl ; ; X32-SSE41-LABEL: load_sext_2i1_to_2i64: ; X32-SSE41: # %bb.0: # %entry ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE41-NEXT: movzbl (%eax), %eax -; X32-SSE41-NEXT: movl %eax, %ecx -; X32-SSE41-NEXT: shll $31, %ecx +; X32-SSE41-NEXT: movb (%eax), %al +; X32-SSE41-NEXT: movzbl %al, %ecx +; X32-SSE41-NEXT: andl $1, %ecx +; X32-SSE41-NEXT: negl %ecx ; X32-SSE41-NEXT: movd %ecx, %xmm0 ; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 -; X32-SSE41-NEXT: shll $30, %eax +; X32-SSE41-NEXT: shrb %al +; X32-SSE41-NEXT: movzbl %al, %eax +; X32-SSE41-NEXT: negl %eax ; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0 ; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 -; X32-SSE41-NEXT: psrad $31, %xmm0 ; X32-SSE41-NEXT: retl entry: %X = load <2 x i1>, <2 x i1>* %ptr @@ -1483,107 +1490,132 @@ entry: define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; SSE2-LABEL: load_sext_4i1_to_4i32: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movl (%rdi), %eax -; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shlq $60, %rcx -; SSE2-NEXT: sarq $63, %rcx +; SSE2-NEXT: movb (%rdi), %al +; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: shrb $3, %cl +; SSE2-NEXT: movzbl %cl, %ecx +; SSE2-NEXT: negl %ecx ; SSE2-NEXT: movd %ecx, %xmm0 -; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shlq $61, %rcx -; SSE2-NEXT: sarq $63, %rcx -; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: movzbl %al, %ecx +; SSE2-NEXT: shrb $2, %al +; SSE2-NEXT: movzbl %al, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: negl %eax +; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shlq $62, %rcx -; SSE2-NEXT: sarq $63, %rcx -; SSE2-NEXT: movd %ecx, %xmm2 -; SSE2-NEXT: shlq $63, %rax -; SSE2-NEXT: sarq $63, %rax +; SSE2-NEXT: movl %ecx, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: negl %eax ; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: shrb %cl +; SSE2-NEXT: movzbl %cl, %eax +; SSE2-NEXT: andl $1, %eax +; SSE2-NEXT: negl %eax +; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: load_sext_4i1_to_4i32: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movl (%rdi), %eax -; SSSE3-NEXT: movq %rax, %rcx -; SSSE3-NEXT: shlq $60, %rcx -; SSSE3-NEXT: sarq $63, %rcx +; SSSE3-NEXT: movb (%rdi), %al +; SSSE3-NEXT: movl %eax, %ecx +; SSSE3-NEXT: shrb $3, %cl +; SSSE3-NEXT: movzbl %cl, %ecx +; SSSE3-NEXT: negl %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 -; SSSE3-NEXT: movq %rax, %rcx -; SSSE3-NEXT: shlq $61, %rcx -; SSSE3-NEXT: sarq $63, %rcx -; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: movzbl %al, %ecx +; SSSE3-NEXT: shrb $2, %al +; SSSE3-NEXT: movzbl %al, %eax +; SSSE3-NEXT: andl $1, %eax +; SSSE3-NEXT: negl %eax +; SSSE3-NEXT: movd %eax, %xmm1 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: movq %rax, %rcx -; SSSE3-NEXT: shlq $62, %rcx -; SSSE3-NEXT: sarq $63, %rcx -; SSSE3-NEXT: movd %ecx, %xmm2 -; SSSE3-NEXT: shlq $63, %rax -; SSSE3-NEXT: sarq $63, %rax +; SSSE3-NEXT: movl %ecx, %eax +; SSSE3-NEXT: andl $1, %eax +; SSSE3-NEXT: negl %eax ; SSSE3-NEXT: movd %eax, %xmm0 +; SSSE3-NEXT: shrb %cl +; SSSE3-NEXT: movzbl %cl, %eax +; SSSE3-NEXT: andl $1, %eax +; SSSE3-NEXT: negl %eax +; SSSE3-NEXT: movd %eax, %xmm2 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: load_sext_4i1_to_4i32: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: movl (%rdi), %eax -; SSE41-NEXT: movq %rax, %rcx -; SSE41-NEXT: shlq $62, %rcx -; SSE41-NEXT: sarq $63, %rcx -; SSE41-NEXT: movq %rax, %rdx -; SSE41-NEXT: shlq $63, %rdx -; SSE41-NEXT: sarq $63, %rdx +; SSE41-NEXT: movb (%rdi), %al +; SSE41-NEXT: movzbl %al, %ecx +; SSE41-NEXT: shrb %al +; SSE41-NEXT: movzbl %al, %eax +; SSE41-NEXT: andl $1, %eax +; SSE41-NEXT: negl %eax +; SSE41-NEXT: movl %ecx, %edx +; SSE41-NEXT: andl $1, %edx +; SSE41-NEXT: negl %edx ; SSE41-NEXT: movd %edx, %xmm0 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 -; SSE41-NEXT: movq %rax, %rcx -; SSE41-NEXT: shlq $61, %rcx -; SSE41-NEXT: sarq $63, %rcx -; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 -; SSE41-NEXT: shlq $60, %rax -; SSE41-NEXT: sarq $63, %rax +; SSE41-NEXT: pinsrd $1, %eax, %xmm0 +; SSE41-NEXT: movl %ecx, %eax +; SSE41-NEXT: shrb $2, %al +; SSE41-NEXT: movzbl %al, %eax +; SSE41-NEXT: andl $1, %eax +; SSE41-NEXT: negl %eax +; SSE41-NEXT: pinsrd $2, %eax, %xmm0 +; SSE41-NEXT: shrb $3, %cl +; SSE41-NEXT: movzbl %cl, %eax +; SSE41-NEXT: negl %eax ; SSE41-NEXT: pinsrd $3, %eax, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: load_sext_4i1_to_4i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: movl (%rdi), %eax -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shlq $62, %rcx -; AVX1-NEXT: sarq $63, %rcx -; AVX1-NEXT: movq %rax, %rdx -; AVX1-NEXT: shlq $63, %rdx -; AVX1-NEXT: sarq $63, %rdx +; AVX1-NEXT: movb (%rdi), %al +; AVX1-NEXT: movzbl %al, %ecx +; AVX1-NEXT: shrb %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: negl %eax +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: negl %edx ; AVX1-NEXT: vmovd %edx, %xmm0 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shlq $61, %rcx -; AVX1-NEXT: sarq $63, %rcx -; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: shlq $60, %rax -; AVX1-NEXT: sarq $63, %rax +; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX1-NEXT: movl %ecx, %eax +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: negl %eax +; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX1-NEXT: shrb $3, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: negl %eax ; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: load_sext_4i1_to_4i32: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movl (%rdi), %eax -; AVX2-NEXT: movq %rax, %rcx -; AVX2-NEXT: shlq $62, %rcx -; AVX2-NEXT: sarq $63, %rcx -; AVX2-NEXT: movq %rax, %rdx -; AVX2-NEXT: shlq $63, %rdx -; AVX2-NEXT: sarq $63, %rdx +; AVX2-NEXT: movb (%rdi), %al +; AVX2-NEXT: movzbl %al, %ecx +; AVX2-NEXT: shrb %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: andl $1, %eax +; AVX2-NEXT: negl %eax +; AVX2-NEXT: movl %ecx, %edx +; AVX2-NEXT: andl $1, %edx +; AVX2-NEXT: negl %edx ; AVX2-NEXT: vmovd %edx, %xmm0 -; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: movq %rax, %rcx -; AVX2-NEXT: shlq $61, %rcx -; AVX2-NEXT: sarq $63, %rcx -; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: shlq $60, %rax -; AVX2-NEXT: sarq $63, %rax +; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX2-NEXT: movl %ecx, %eax +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: andl $1, %eax +; AVX2-NEXT: negl %eax +; AVX2-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX2-NEXT: shrb $3, %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: negl %eax ; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -1598,40 +1630,56 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; X32-SSE2-LABEL: load_sext_4i1_to_4i32: ; X32-SSE2: # %bb.0: # %entry ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movl (%eax), %eax +; X32-SSE2-NEXT: movb (%eax), %al ; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: shll $28, %ecx +; X32-SSE2-NEXT: shrb $3, %cl +; X32-SSE2-NEXT: movzbl %cl, %ecx +; X32-SSE2-NEXT: negl %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm0 ; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: shll $29, %ecx +; X32-SSE2-NEXT: shrb $2, %cl +; X32-SSE2-NEXT: movzbl %cl, %ecx +; X32-SSE2-NEXT: andl $1, %ecx +; X32-SSE2-NEXT: negl %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm1 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: shll $30, %ecx -; X32-SSE2-NEXT: movd %ecx, %xmm2 -; X32-SSE2-NEXT: shll $31, %eax -; X32-SSE2-NEXT: movd %eax, %xmm0 +; X32-SSE2-NEXT: movzbl %al, %ecx +; X32-SSE2-NEXT: andl $1, %ecx +; X32-SSE2-NEXT: negl %ecx +; X32-SSE2-NEXT: movd %ecx, %xmm0 +; X32-SSE2-NEXT: shrb %al +; X32-SSE2-NEXT: movzbl %al, %eax +; X32-SSE2-NEXT: andl $1, %eax +; X32-SSE2-NEXT: negl %eax +; X32-SSE2-NEXT: movd %eax, %xmm2 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X32-SSE2-NEXT: psrad $31, %xmm0 ; X32-SSE2-NEXT: retl ; ; X32-SSE41-LABEL: load_sext_4i1_to_4i32: ; X32-SSE41: # %bb.0: # %entry ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE41-NEXT: movl (%eax), %eax +; X32-SSE41-NEXT: movb (%eax), %al ; X32-SSE41-NEXT: movl %eax, %ecx -; X32-SSE41-NEXT: shll $30, %ecx -; X32-SSE41-NEXT: movl %eax, %edx -; X32-SSE41-NEXT: shll $31, %edx +; X32-SSE41-NEXT: shrb %cl +; X32-SSE41-NEXT: movzbl %cl, %ecx +; X32-SSE41-NEXT: andl $1, %ecx +; X32-SSE41-NEXT: negl %ecx +; X32-SSE41-NEXT: movzbl %al, %edx +; X32-SSE41-NEXT: andl $1, %edx +; X32-SSE41-NEXT: negl %edx ; X32-SSE41-NEXT: movd %edx, %xmm0 ; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 ; X32-SSE41-NEXT: movl %eax, %ecx -; X32-SSE41-NEXT: shll $29, %ecx +; X32-SSE41-NEXT: shrb $2, %cl +; X32-SSE41-NEXT: movzbl %cl, %ecx +; X32-SSE41-NEXT: andl $1, %ecx +; X32-SSE41-NEXT: negl %ecx ; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0 -; X32-SSE41-NEXT: shll $28, %eax +; X32-SSE41-NEXT: shrb $3, %al +; X32-SSE41-NEXT: movzbl %al, %eax +; X32-SSE41-NEXT: negl %eax ; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 -; X32-SSE41-NEXT: psrad $31, %xmm0 ; X32-SSE41-NEXT: retl entry: %X = load <4 x i1>, <4 x i1>* %ptr @@ -1689,25 +1737,29 @@ entry: define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; SSE2-LABEL: load_sext_4i1_to_4i64: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movl (%rdi), %eax +; SSE2-NEXT: movb (%rdi), %al ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $3, %ecx -; SSE2-NEXT: movd %ecx, %xmm0 +; SSE2-NEXT: shrb %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movzbl %cl, %ecx +; SSE2-NEXT: movl %eax, %edx +; SSE2-NEXT: andb $1, %dl +; SSE2-NEXT: movzbl %dl, %edx +; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: pinsrw $2, %ecx, %xmm1 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $2, %ecx -; SSE2-NEXT: movd %ecx, %xmm1 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: shrl %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] -; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] +; SSE2-NEXT: shrb $2, %cl +; SSE2-NEXT: andb $1, %cl +; SSE2-NEXT: movzbl %cl, %ecx +; SSE2-NEXT: pinsrw $4, %ecx, %xmm1 +; SSE2-NEXT: shrb $3, %al +; SSE2-NEXT: movzbl %al, %eax +; SSE2-NEXT: pinsrw $6, %eax, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] ; SSE2-NEXT: psllq $63, %xmm0 ; SSE2-NEXT: psrad $31, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] ; SSE2-NEXT: psllq $63, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] @@ -1715,25 +1767,29 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; ; SSSE3-LABEL: load_sext_4i1_to_4i64: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movl (%rdi), %eax +; SSSE3-NEXT: movb (%rdi), %al ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $3, %ecx -; SSSE3-NEXT: movd %ecx, %xmm0 +; SSSE3-NEXT: shrb %cl +; SSSE3-NEXT: andb $1, %cl +; SSSE3-NEXT: movzbl %cl, %ecx +; SSSE3-NEXT: movl %eax, %edx +; SSSE3-NEXT: andb $1, %dl +; SSSE3-NEXT: movzbl %dl, %edx +; SSSE3-NEXT: movd %edx, %xmm1 +; SSSE3-NEXT: pinsrw $2, %ecx, %xmm1 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $2, %ecx -; SSSE3-NEXT: movd %ecx, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: movd %eax, %xmm2 -; SSSE3-NEXT: shrl %eax -; SSSE3-NEXT: movd %eax, %xmm0 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] -; SSSE3-NEXT: pand {{.*}}(%rip), %xmm2 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] +; SSSE3-NEXT: shrb $2, %cl +; SSSE3-NEXT: andb $1, %cl +; SSSE3-NEXT: movzbl %cl, %ecx +; SSSE3-NEXT: pinsrw $4, %ecx, %xmm1 +; SSSE3-NEXT: shrb $3, %al +; SSSE3-NEXT: movzbl %al, %eax +; SSSE3-NEXT: pinsrw $6, %eax, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] ; SSSE3-NEXT: psllq $63, %xmm0 ; SSSE3-NEXT: psrad $31, %xmm0 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] ; SSSE3-NEXT: psllq $63, %xmm1 ; SSSE3-NEXT: psrad $31, %xmm1 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] @@ -1741,18 +1797,25 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; ; SSE41-LABEL: load_sext_4i1_to_4i64: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: movl (%rdi), %eax +; SSE41-NEXT: movb (%rdi), %al ; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: shrl %ecx -; SSE41-NEXT: movd %eax, %xmm1 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm1 +; SSE41-NEXT: shrb %cl +; SSE41-NEXT: andb $1, %cl +; SSE41-NEXT: movzbl %cl, %ecx +; SSE41-NEXT: movl %eax, %edx +; SSE41-NEXT: andb $1, %dl +; SSE41-NEXT: movzbl %dl, %edx +; SSE41-NEXT: movd %edx, %xmm1 +; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 ; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: shrl $2, %ecx +; SSE41-NEXT: shrb $2, %cl +; SSE41-NEXT: andb $1, %cl +; SSE41-NEXT: movzbl %cl, %ecx ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero -; SSE41-NEXT: pinsrd $2, %ecx, %xmm1 -; SSE41-NEXT: shrl $3, %eax -; SSE41-NEXT: pinsrd $3, %eax, %xmm1 -; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 +; SSE41-NEXT: shrb $3, %al +; SSE41-NEXT: movzbl %al, %eax +; SSE41-NEXT: pinsrb $12, %eax, %xmm1 ; SSE41-NEXT: psllq $63, %xmm0 ; SSE41-NEXT: psrad $31, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] @@ -1764,21 +1827,26 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; ; AVX1-LABEL: load_sext_4i1_to_4i64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: movl (%rdi), %eax -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shlq $62, %rcx -; AVX1-NEXT: sarq $63, %rcx -; AVX1-NEXT: movq %rax, %rdx -; AVX1-NEXT: shlq $63, %rdx -; AVX1-NEXT: sarq $63, %rdx +; AVX1-NEXT: movb (%rdi), %al +; AVX1-NEXT: movzbl %al, %ecx +; AVX1-NEXT: shrb %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: negl %eax +; AVX1-NEXT: movl %ecx, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: negl %edx ; AVX1-NEXT: vmovd %edx, %xmm0 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shlq $61, %rcx -; AVX1-NEXT: sarq $63, %rcx -; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: shlq $60, %rax -; AVX1-NEXT: sarq $63, %rax +; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX1-NEXT: movl %ecx, %eax +; AVX1-NEXT: shrb $2, %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: negl %eax +; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX1-NEXT: shrb $3, %cl +; AVX1-NEXT: movzbl %cl, %eax +; AVX1-NEXT: negl %eax ; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] @@ -1788,23 +1856,30 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; ; AVX2-LABEL: load_sext_4i1_to_4i64: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movl (%rdi), %eax -; AVX2-NEXT: movq %rax, %rcx -; AVX2-NEXT: shlq $60, %rcx +; AVX2-NEXT: movb (%rdi), %al +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: shrb $3, %cl +; AVX2-NEXT: movzbl %cl, %ecx +; AVX2-NEXT: negq %rcx ; AVX2-NEXT: vmovq %rcx, %xmm0 -; AVX2-NEXT: movq %rax, %rcx -; AVX2-NEXT: shlq $61, %rcx -; AVX2-NEXT: vmovq %rcx, %xmm1 +; AVX2-NEXT: movzbl %al, %ecx +; AVX2-NEXT: shrb $2, %al +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: andl $1, %eax +; AVX2-NEXT: negq %rax +; AVX2-NEXT: vmovq %rax, %xmm1 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX2-NEXT: movq %rax, %rcx -; AVX2-NEXT: shlq $62, %rcx -; AVX2-NEXT: vmovq %rcx, %xmm1 -; AVX2-NEXT: shlq $63, %rax +; AVX2-NEXT: movl %ecx, %eax +; AVX2-NEXT: andl $1, %eax +; AVX2-NEXT: negq %rax +; AVX2-NEXT: vmovq %rax, %xmm1 +; AVX2-NEXT: shrb %cl +; AVX2-NEXT: movzbl %cl, %eax +; AVX2-NEXT: andl $1, %eax +; AVX2-NEXT: negq %rax ; AVX2-NEXT: vmovq %rax, %xmm2 -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: load_sext_4i1_to_4i64: @@ -1817,25 +1892,29 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; X32-SSE2-LABEL: load_sext_4i1_to_4i64: ; X32-SSE2: # %bb.0: # %entry ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movzbl (%eax), %eax +; X32-SSE2-NEXT: movb (%eax), %al ; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: shrl $3, %ecx -; X32-SSE2-NEXT: movd %ecx, %xmm0 +; X32-SSE2-NEXT: shrb %cl +; X32-SSE2-NEXT: andb $1, %cl +; X32-SSE2-NEXT: movzbl %cl, %ecx +; X32-SSE2-NEXT: movl %eax, %edx +; X32-SSE2-NEXT: andb $1, %dl +; X32-SSE2-NEXT: movzbl %dl, %edx +; X32-SSE2-NEXT: movd %edx, %xmm1 +; X32-SSE2-NEXT: pinsrw $2, %ecx, %xmm1 ; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: shrl $2, %ecx -; X32-SSE2-NEXT: movd %ecx, %xmm1 -; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32-SSE2-NEXT: movd %eax, %xmm2 -; X32-SSE2-NEXT: shrl %eax -; X32-SSE2-NEXT: movd %eax, %xmm0 -; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] -; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm2 -; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] +; X32-SSE2-NEXT: shrb $2, %cl +; X32-SSE2-NEXT: andb $1, %cl +; X32-SSE2-NEXT: movzbl %cl, %ecx +; X32-SSE2-NEXT: pinsrw $4, %ecx, %xmm1 +; X32-SSE2-NEXT: shrb $3, %al +; X32-SSE2-NEXT: movzbl %al, %eax +; X32-SSE2-NEXT: pinsrw $6, %eax, %xmm1 +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] ; X32-SSE2-NEXT: psllq $63, %xmm0 ; X32-SSE2-NEXT: psrad $31, %xmm0 ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] +; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] ; X32-SSE2-NEXT: psllq $63, %xmm1 ; X32-SSE2-NEXT: psrad $31, %xmm1 ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] @@ -1844,18 +1923,25 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; X32-SSE41-LABEL: load_sext_4i1_to_4i64: ; X32-SSE41: # %bb.0: # %entry ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE41-NEXT: movzbl (%eax), %eax +; X32-SSE41-NEXT: movb (%eax), %al ; X32-SSE41-NEXT: movl %eax, %ecx -; X32-SSE41-NEXT: shrl %ecx -; X32-SSE41-NEXT: movd %eax, %xmm1 -; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm1 +; X32-SSE41-NEXT: shrb %cl +; X32-SSE41-NEXT: andb $1, %cl +; X32-SSE41-NEXT: movzbl %cl, %ecx +; X32-SSE41-NEXT: movl %eax, %edx +; X32-SSE41-NEXT: andb $1, %dl +; X32-SSE41-NEXT: movzbl %dl, %edx +; X32-SSE41-NEXT: movd %edx, %xmm1 +; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 ; X32-SSE41-NEXT: movl %eax, %ecx -; X32-SSE41-NEXT: shrl $2, %ecx +; X32-SSE41-NEXT: shrb $2, %cl +; X32-SSE41-NEXT: andb $1, %cl +; X32-SSE41-NEXT: movzbl %cl, %ecx ; X32-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero -; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm1 -; X32-SSE41-NEXT: shrl $3, %eax -; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm1 -; X32-SSE41-NEXT: pand {{\.LCPI.*}}, %xmm1 +; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 +; X32-SSE41-NEXT: shrb $3, %al +; X32-SSE41-NEXT: movzbl %al, %eax +; X32-SSE41-NEXT: pinsrb $12, %eax, %xmm1 ; X32-SSE41-NEXT: psllq $63, %xmm0 ; X32-SSE41-NEXT: psrad $31, %xmm0 ; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] @@ -3376,25 +3462,27 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { ; SSE2-LABEL: sext_4i17_to_4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movq (%rdi), %rax +; SSE2-NEXT: movl %eax, %ecx +; SSE2-NEXT: shll $15, %ecx +; SSE2-NEXT: sarl $15, %ecx +; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shlq $30, %rcx -; SSE2-NEXT: sarq $47, %rcx +; SSE2-NEXT: shrq $17, %rcx +; SSE2-NEXT: shll $15, %ecx +; SSE2-NEXT: sarl $15, %ecx ; SSE2-NEXT: movd %ecx, %xmm1 -; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shlq $47, %rcx -; SSE2-NEXT: sarq $47, %rcx -; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movl 8(%rdi), %ecx ; SSE2-NEXT: shll $13, %ecx ; SSE2-NEXT: movq %rax, %rdx ; SSE2-NEXT: shrq $51, %rdx ; SSE2-NEXT: orl %ecx, %edx -; SSE2-NEXT: shlq $47, %rdx -; SSE2-NEXT: sarq $47, %rdx +; SSE2-NEXT: shll $15, %edx +; SSE2-NEXT: sarl $15, %edx ; SSE2-NEXT: movd %edx, %xmm1 -; SSE2-NEXT: shlq $13, %rax -; SSE2-NEXT: sarq $47, %rax +; SSE2-NEXT: shrq $34, %rax +; SSE2-NEXT: shll $15, %eax +; SSE2-NEXT: sarl $15, %eax ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] @@ -3403,25 +3491,27 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { ; SSSE3-LABEL: sext_4i17_to_4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movq (%rdi), %rax +; SSSE3-NEXT: movl %eax, %ecx +; SSSE3-NEXT: shll $15, %ecx +; SSSE3-NEXT: sarl $15, %ecx +; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movq %rax, %rcx -; SSSE3-NEXT: shlq $30, %rcx -; SSSE3-NEXT: sarq $47, %rcx +; SSSE3-NEXT: shrq $17, %rcx +; SSSE3-NEXT: shll $15, %ecx +; SSSE3-NEXT: sarl $15, %ecx ; SSSE3-NEXT: movd %ecx, %xmm1 -; SSSE3-NEXT: movq %rax, %rcx -; SSSE3-NEXT: shlq $47, %rcx -; SSSE3-NEXT: sarq $47, %rcx -; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: movl 8(%rdi), %ecx ; SSSE3-NEXT: shll $13, %ecx ; SSSE3-NEXT: movq %rax, %rdx ; SSSE3-NEXT: shrq $51, %rdx ; SSSE3-NEXT: orl %ecx, %edx -; SSSE3-NEXT: shlq $47, %rdx -; SSSE3-NEXT: sarq $47, %rdx +; SSSE3-NEXT: shll $15, %edx +; SSSE3-NEXT: sarl $15, %edx ; SSSE3-NEXT: movd %edx, %xmm1 -; SSSE3-NEXT: shlq $13, %rax -; SSSE3-NEXT: sarq $47, %rax +; SSSE3-NEXT: shrq $34, %rax +; SSSE3-NEXT: shll $15, %eax +; SSSE3-NEXT: sarl $15, %eax ; SSSE3-NEXT: movd %eax, %xmm2 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] @@ -3431,23 +3521,25 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { ; SSE41: # %bb.0: ; SSE41-NEXT: movq (%rdi), %rax ; SSE41-NEXT: movq %rax, %rcx -; SSE41-NEXT: shlq $30, %rcx -; SSE41-NEXT: sarq $47, %rcx -; SSE41-NEXT: movq %rax, %rdx -; SSE41-NEXT: shlq $47, %rdx -; SSE41-NEXT: sarq $47, %rdx +; SSE41-NEXT: shrq $17, %rcx +; SSE41-NEXT: shll $15, %ecx +; SSE41-NEXT: sarl $15, %ecx +; SSE41-NEXT: movl %eax, %edx +; SSE41-NEXT: shll $15, %edx +; SSE41-NEXT: sarl $15, %edx ; SSE41-NEXT: movd %edx, %xmm0 ; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 ; SSE41-NEXT: movq %rax, %rcx -; SSE41-NEXT: shlq $13, %rcx -; SSE41-NEXT: sarq $47, %rcx +; SSE41-NEXT: shrq $34, %rcx +; SSE41-NEXT: shll $15, %ecx +; SSE41-NEXT: sarl $15, %ecx ; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 ; SSE41-NEXT: movl 8(%rdi), %ecx ; SSE41-NEXT: shll $13, %ecx ; SSE41-NEXT: shrq $51, %rax ; SSE41-NEXT: orl %ecx, %eax -; SSE41-NEXT: shlq $47, %rax -; SSE41-NEXT: sarq $47, %rax +; SSE41-NEXT: shll $15, %eax +; SSE41-NEXT: sarl $15, %eax ; SSE41-NEXT: pinsrd $3, %eax, %xmm0 ; SSE41-NEXT: retq ; @@ -3455,23 +3547,25 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { ; AVX: # %bb.0: ; AVX-NEXT: movq (%rdi), %rax ; AVX-NEXT: movq %rax, %rcx -; AVX-NEXT: shlq $30, %rcx -; AVX-NEXT: sarq $47, %rcx -; AVX-NEXT: movq %rax, %rdx -; AVX-NEXT: shlq $47, %rdx -; AVX-NEXT: sarq $47, %rdx +; AVX-NEXT: shrq $17, %rcx +; AVX-NEXT: shll $15, %ecx +; AVX-NEXT: sarl $15, %ecx +; AVX-NEXT: movl %eax, %edx +; AVX-NEXT: shll $15, %edx +; AVX-NEXT: sarl $15, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ; AVX-NEXT: movq %rax, %rcx -; AVX-NEXT: shlq $13, %rcx -; AVX-NEXT: sarq $47, %rcx +; AVX-NEXT: shrq $34, %rcx +; AVX-NEXT: shll $15, %ecx +; AVX-NEXT: sarl $15, %ecx ; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ; AVX-NEXT: movl 8(%rdi), %ecx ; AVX-NEXT: shll $13, %ecx ; AVX-NEXT: shrq $51, %rax ; AVX-NEXT: orl %ecx, %eax -; AVX-NEXT: shlq $47, %rax -; AVX-NEXT: sarq $47, %rax +; AVX-NEXT: shll $15, %eax +; AVX-NEXT: sarl $15, %eax ; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq ; @@ -3483,19 +3577,22 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { ; X32-SSE2-NEXT: movl 8(%eax), %eax ; X32-SSE2-NEXT: shldl $13, %edx, %eax ; X32-SSE2-NEXT: shll $15, %eax +; X32-SSE2-NEXT: sarl $15, %eax ; X32-SSE2-NEXT: movd %eax, %xmm0 ; X32-SSE2-NEXT: movl %edx, %eax ; X32-SSE2-NEXT: shll $13, %eax +; X32-SSE2-NEXT: sarl $15, %eax ; X32-SSE2-NEXT: movd %eax, %xmm1 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32-SSE2-NEXT: shldl $15, %ecx, %edx ; X32-SSE2-NEXT: shll $15, %ecx +; X32-SSE2-NEXT: sarl $15, %ecx ; X32-SSE2-NEXT: movd %ecx, %xmm0 ; X32-SSE2-NEXT: shll $15, %edx +; X32-SSE2-NEXT: sarl $15, %edx ; X32-SSE2-NEXT: movd %edx, %xmm2 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X32-SSE2-NEXT: psrad $15, %xmm0 ; X32-SSE2-NEXT: retl ; ; X32-SSE41-LABEL: sext_4i17_to_4i32: @@ -3511,14 +3608,17 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { ; X32-SSE41-NEXT: shldl $13, %edx, %eax ; X32-SSE41-NEXT: shldl $15, %ecx, %edx ; X32-SSE41-NEXT: shll $15, %edx +; X32-SSE41-NEXT: sarl $15, %edx ; X32-SSE41-NEXT: shll $15, %ecx +; X32-SSE41-NEXT: sarl $15, %ecx ; X32-SSE41-NEXT: movd %ecx, %xmm0 ; X32-SSE41-NEXT: pinsrd $1, %edx, %xmm0 ; X32-SSE41-NEXT: shll $13, %esi +; X32-SSE41-NEXT: sarl $15, %esi ; X32-SSE41-NEXT: pinsrd $2, %esi, %xmm0 ; X32-SSE41-NEXT: shll $15, %eax +; X32-SSE41-NEXT: sarl $15, %eax ; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 -; X32-SSE41-NEXT: psrad $15, %xmm0 ; X32-SSE41-NEXT: popl %esi ; X32-SSE41-NEXT: .cfi_def_cfa_offset 4 ; X32-SSE41-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shuffle-sse41.ll b/llvm/test/CodeGen/X86/vector-shuffle-sse41.ll index 9eeff51970a8c..e821709153538 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-sse41.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-sse41.ll @@ -38,14 +38,14 @@ define <8 x i16> @blend_packusdw_packuswb(<4 x i32> %a0, <4 x i32> %a1, <8 x i16 ; SSE41-LABEL: blend_packusdw_packuswb: ; SSE41: # %bb.0: ; SSE41-NEXT: packusdw %xmm0, %xmm0 -; SSE41-NEXT: packuswb %xmm0, %xmm2 +; SSE41-NEXT: packuswb %xmm2, %xmm2 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE41-NEXT: retq ; ; AVX-LABEL: blend_packusdw_packuswb: ; AVX: # %bb.0: ; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpackuswb %xmm0, %xmm2, %xmm1 +; AVX-NEXT: vpackuswb %xmm2, %xmm2, %xmm1 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll index 80e33a7aab9ee..ecfc5e610350a 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll @@ -300,12 +300,11 @@ define <16 x i8> @trunc_add_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; ; AVX512-LABEL: trunc_add_v16i64_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpaddq %zmm3, %zmm1, %zmm1 ; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpaddq %zmm3, %zmm1, %zmm1 +; AVX512-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = add <16 x i64> %a0, %a1 @@ -731,10 +730,9 @@ define <16 x i8> @trunc_add_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX512-LABEL: trunc_add_const_v16i64_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1146,12 +1144,11 @@ define <16 x i8> @trunc_sub_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; ; AVX512-LABEL: trunc_sub_v16i64_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpsubq %zmm3, %zmm1, %zmm1 ; AVX512-NEXT: vpsubq %zmm2, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpsubq %zmm3, %zmm1, %zmm1 +; AVX512-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = sub <16 x i64> %a0, %a1 @@ -1545,10 +1542,9 @@ define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX512-LABEL: trunc_sub_const_v16i64_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -2079,38 +2075,31 @@ define <16 x i8> @trunc_mul_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; ; AVX512F-LABEL: trunc_mul_v16i64_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovqd %zmm3, %ymm3 -; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512F-NEXT: vpmulld %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpmovqd %zmm2, %ymm2 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512F-NEXT: vpmulld %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vpmuludq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm1 +; AVX512F-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_mul_v16i64_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmovqd %zmm3, %ymm3 -; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512BW-NEXT: vpmulld %ymm3, %ymm1, %ymm1 -; AVX512BW-NEXT: vpmovqd %zmm2, %ymm2 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vpmulld %ymm2, %ymm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vpmuludq %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_mul_v16i64_v16i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmullq %zmm3, %zmm1, %zmm1 ; AVX512DQ-NEXT: vpmullq %zmm2, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vpmullq %zmm3, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512DQ-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = mul <16 x i64> %a0, %a1 @@ -2587,16 +2576,35 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX2-FAST-NEXT: vzeroupper ; AVX2-FAST-NEXT: retq ; -; AVX512-LABEL: trunc_mul_const_v16i64_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_mul_const_v16i64_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmuludq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT: vpmuludq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512F-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc_mul_const_v16i64_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpmuludq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpmuludq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: trunc_mul_const_v16i64_v16i8: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vpmullq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmullq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512DQ-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512DQ-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq %1 = mul <16 x i64> %a0, %2 = trunc <16 x i64> %1 to <16 x i8> ret <16 x i8> %2 @@ -3024,12 +3032,11 @@ define <16 x i8> @trunc_and_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; ; AVX512-LABEL: trunc_and_v16i64_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpandq %zmm3, %zmm1, %zmm1 ; AVX512-NEXT: vpandq %zmm2, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpandq %zmm3, %zmm1, %zmm1 +; AVX512-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = and <16 x i64> %a0, %a1 @@ -3396,10 +3403,9 @@ define <16 x i8> @trunc_and_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX512-LABEL: trunc_and_const_v16i64_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -3789,12 +3795,11 @@ define <16 x i8> @trunc_xor_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; ; AVX512-LABEL: trunc_xor_v16i64_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpxorq %zmm3, %zmm1, %zmm1 ; AVX512-NEXT: vpxorq %zmm2, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpxorq %zmm3, %zmm1, %zmm1 +; AVX512-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = xor <16 x i64> %a0, %a1 @@ -4161,10 +4166,9 @@ define <16 x i8> @trunc_xor_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX512-LABEL: trunc_xor_const_v16i64_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -4554,12 +4558,11 @@ define <16 x i8> @trunc_or_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind ; ; AVX512-LABEL: trunc_or_v16i64_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vporq %zmm3, %zmm1, %zmm1 ; AVX512-NEXT: vporq %zmm2, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vporq %zmm3, %zmm1, %zmm1 +; AVX512-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = or <16 x i64> %a0, %a1 @@ -4926,10 +4929,9 @@ define <16 x i8> @trunc_or_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; ; AVX512-LABEL: trunc_or_const_v16i64_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpmovqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll index 7ab38b317dc13..8a0310bbad57b 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll @@ -1512,7 +1512,7 @@ define <4 x i16> @trunc_packus_v4i64_v4i16(<4 x i64> %a0) { ; SSE41-NEXT: por %xmm4, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 ; SSE41-NEXT: packusdw %xmm2, %xmm1 -; SSE41-NEXT: packusdw %xmm0, %xmm1 +; SSE41-NEXT: packusdw %xmm1, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; @@ -1766,7 +1766,7 @@ define void @trunc_packus_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { ; SSE41-NEXT: por %xmm4, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 ; SSE41-NEXT: packusdw %xmm2, %xmm1 -; SSE41-NEXT: packusdw %xmm0, %xmm1 +; SSE41-NEXT: packusdw %xmm1, %xmm1 ; SSE41-NEXT: movq %xmm1, (%rdi) ; SSE41-NEXT: retq ; @@ -2870,8 +2870,8 @@ define void @trunc_packus_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8> *%p1) { ; SSE2-NEXT: pand %xmm3, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: packuswb %xmm1, %xmm1 -; SSE2-NEXT: packuswb %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: movw %ax, (%rdi) ; SSE2-NEXT: retq @@ -3313,8 +3313,8 @@ define void @trunc_packus_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { ; SSE2-NEXT: pand %xmm8, %xmm2 ; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: packuswb %xmm3, %xmm2 -; SSE2-NEXT: packuswb %xmm0, %xmm2 -; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: packuswb %xmm2, %xmm2 +; SSE2-NEXT: packuswb %xmm2, %xmm2 ; SSE2-NEXT: movd %xmm2, (%rdi) ; SSE2-NEXT: retq ; @@ -3838,7 +3838,7 @@ define <8 x i8> @trunc_packus_v8i64_v8i8(<8 x i64>* %p0) "min-legal-vector-width ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5 ; SSE41-NEXT: packusdw %xmm3, %xmm5 ; SSE41-NEXT: packusdw %xmm5, %xmm1 -; SSE41-NEXT: packuswb %xmm0, %xmm1 +; SSE41-NEXT: packuswb %xmm1, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; @@ -4030,7 +4030,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-l ; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: packuswb %xmm4, %xmm2 ; SSE2-NEXT: packuswb %xmm2, %xmm3 -; SSE2-NEXT: packuswb %xmm0, %xmm3 +; SSE2-NEXT: packuswb %xmm3, %xmm3 ; SSE2-NEXT: movq %xmm3, (%rsi) ; SSE2-NEXT: retq ; @@ -4142,7 +4142,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-l ; SSSE3-NEXT: pand %xmm1, %xmm2 ; SSSE3-NEXT: packuswb %xmm4, %xmm2 ; SSSE3-NEXT: packuswb %xmm2, %xmm3 -; SSSE3-NEXT: packuswb %xmm0, %xmm3 +; SSSE3-NEXT: packuswb %xmm3, %xmm3 ; SSSE3-NEXT: movq %xmm3, (%rsi) ; SSSE3-NEXT: retq ; @@ -4240,7 +4240,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-l ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5 ; SSE41-NEXT: packusdw %xmm2, %xmm5 ; SSE41-NEXT: packusdw %xmm5, %xmm4 -; SSE41-NEXT: packuswb %xmm0, %xmm4 +; SSE41-NEXT: packuswb %xmm4, %xmm4 ; SSE41-NEXT: movq %xmm4, (%rsi) ; SSE41-NEXT: retq ; @@ -5033,57 +5033,16 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64>* %p0) "min-legal-vector-w ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512F-LABEL: trunc_packus_v16i64_v16i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm0 = [255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpminsq (%rdi), %zmm0, %zmm1 -; AVX512F-NEXT: vpminsq 64(%rdi), %zmm0, %zmm0 -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 -; AVX512F-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 -; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: trunc_packus_v16i64_v16i8: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vpmaxsq 64(%rdi), %zmm0, %zmm1 -; AVX512VL-NEXT: vpmovusqb %zmm1, %xmm1 -; AVX512VL-NEXT: vpmaxsq (%rdi), %zmm0, %zmm0 -; AVX512VL-NEXT: vpmovusqb %zmm0, %xmm0 -; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: trunc_packus_v16i64_v16i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm0 = [255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpminsq (%rdi), %zmm0, %zmm1 -; AVX512BW-NEXT: vpminsq 64(%rdi), %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 -; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: trunc_packus_v16i64_v16i8: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpmaxsq 64(%rdi), %zmm0, %zmm1 -; AVX512BWVL-NEXT: vpmovusqb %zmm1, %xmm1 -; AVX512BWVL-NEXT: vpmaxsq (%rdi), %zmm0, %zmm0 -; AVX512BWVL-NEXT: vpmovusqb %zmm0, %xmm0 -; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512BWVL-NEXT: vzeroupper -; AVX512BWVL-NEXT: retq +; AVX512-LABEL: trunc_packus_v16i64_v16i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vpmaxsq 64(%rdi), %zmm0, %zmm1 +; AVX512-NEXT: vpmovusqb %zmm1, %xmm1 +; AVX512-NEXT: vpmaxsq (%rdi), %zmm0, %zmm0 +; AVX512-NEXT: vpmovusqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq ; ; SKX-LABEL: trunc_packus_v16i64_v16i8: ; SKX: # %bb.0: @@ -5148,7 +5107,7 @@ define <4 x i8> @trunc_packus_v4i32_v4i8(<4 x i32> %a0) "min-legal-vector-width" ; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pmaxsd %xmm0, %xmm1 -; SSE41-NEXT: packusdw %xmm0, %xmm1 +; SSE41-NEXT: packusdw %xmm1, %xmm1 ; SSE41-NEXT: packuswb %xmm1, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq @@ -5230,8 +5189,8 @@ define void @trunc_packus_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: packuswb %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 ; SSE2-NEXT: movd %xmm1, (%rdi) ; SSE2-NEXT: retq ; @@ -5256,8 +5215,8 @@ define void @trunc_packus_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { ; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pmaxsd %xmm0, %xmm1 -; SSE41-NEXT: packusdw %xmm0, %xmm1 -; SSE41-NEXT: packuswb %xmm0, %xmm1 +; SSE41-NEXT: packusdw %xmm1, %xmm1 +; SSE41-NEXT: packuswb %xmm1, %xmm1 ; SSE41-NEXT: movd %xmm1, (%rdi) ; SSE41-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll index e045e41b71000..34ab8ace8b16a 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll @@ -1508,7 +1508,7 @@ define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) { ; SSE41-NEXT: por %xmm3, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 ; SSE41-NEXT: packssdw %xmm2, %xmm1 -; SSE41-NEXT: packssdw %xmm0, %xmm1 +; SSE41-NEXT: packssdw %xmm1, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; @@ -1643,7 +1643,7 @@ define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { ; SSE2-NEXT: pandn %xmm0, %xmm1 ; SSE2-NEXT: por %xmm3, %xmm1 ; SSE2-NEXT: packssdw %xmm6, %xmm1 -; SSE2-NEXT: packssdw %xmm0, %xmm1 +; SSE2-NEXT: packssdw %xmm1, %xmm1 ; SSE2-NEXT: movq %xmm1, (%rdi) ; SSE2-NEXT: retq ; @@ -1706,7 +1706,7 @@ define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { ; SSSE3-NEXT: pandn %xmm0, %xmm1 ; SSSE3-NEXT: por %xmm3, %xmm1 ; SSSE3-NEXT: packssdw %xmm6, %xmm1 -; SSSE3-NEXT: packssdw %xmm0, %xmm1 +; SSSE3-NEXT: packssdw %xmm1, %xmm1 ; SSSE3-NEXT: movq %xmm1, (%rdi) ; SSSE3-NEXT: retq ; @@ -1756,7 +1756,7 @@ define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { ; SSE41-NEXT: por %xmm3, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 ; SSE41-NEXT: packssdw %xmm2, %xmm1 -; SSE41-NEXT: packssdw %xmm0, %xmm1 +; SSE41-NEXT: packssdw %xmm1, %xmm1 ; SSE41-NEXT: movq %xmm1, (%rdi) ; SSE41-NEXT: retq ; @@ -2610,8 +2610,8 @@ define void @trunc_ssat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8> *%p1) { ; SSE2-NEXT: por %xmm3, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: packuswb %xmm1, %xmm1 -; SSE2-NEXT: packuswb %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: movw %ax, (%rdi) ; SSE2-NEXT: retq @@ -3590,7 +3590,7 @@ define <8 x i8> @trunc_ssat_v8i64_v8i8(<8 x i64>* %p0) "min-legal-vector-width"= ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5 ; SSE41-NEXT: packssdw %xmm3, %xmm5 ; SSE41-NEXT: packssdw %xmm5, %xmm1 -; SSE41-NEXT: packsswb %xmm0, %xmm1 +; SSE41-NEXT: packsswb %xmm1, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; @@ -3791,7 +3791,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-leg ; SSE2-NEXT: por %xmm1, %xmm2 ; SSE2-NEXT: packssdw %xmm4, %xmm2 ; SSE2-NEXT: packssdw %xmm2, %xmm6 -; SSE2-NEXT: packsswb %xmm0, %xmm6 +; SSE2-NEXT: packsswb %xmm6, %xmm6 ; SSE2-NEXT: movq %xmm6, (%rsi) ; SSE2-NEXT: retq ; @@ -3912,7 +3912,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-leg ; SSSE3-NEXT: por %xmm1, %xmm2 ; SSSE3-NEXT: packssdw %xmm4, %xmm2 ; SSSE3-NEXT: packssdw %xmm2, %xmm6 -; SSSE3-NEXT: packsswb %xmm0, %xmm6 +; SSSE3-NEXT: packsswb %xmm6, %xmm6 ; SSSE3-NEXT: movq %xmm6, (%rsi) ; SSSE3-NEXT: retq ; @@ -4010,7 +4010,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) "min-leg ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5 ; SSE41-NEXT: packssdw %xmm2, %xmm5 ; SSE41-NEXT: packssdw %xmm5, %xmm4 -; SSE41-NEXT: packsswb %xmm0, %xmm4 +; SSE41-NEXT: packsswb %xmm4, %xmm4 ; SSE41-NEXT: movq %xmm4, (%rsi) ; SSE41-NEXT: retq ; @@ -4835,55 +4835,15 @@ define <16 x i8> @trunc_ssat_v16i64_v16i8(<16 x i64>* %p0) "min-legal-vector-wid ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512F-LABEL: trunc_ssat_v16i64_v16i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm0 = [127,127,127,127,127,127,127,127] -; AVX512F-NEXT: vpminsq (%rdi), %zmm0, %zmm1 -; AVX512F-NEXT: vpminsq 64(%rdi), %zmm0, %zmm0 -; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] -; AVX512F-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 -; AVX512F-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 -; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: trunc_ssat_v16i64_v16i8: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512VL-NEXT: vmovdqa64 64(%rdi), %zmm1 -; AVX512VL-NEXT: vpmovsqb %zmm1, %xmm1 -; AVX512VL-NEXT: vpmovsqb %zmm0, %xmm0 -; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: trunc_ssat_v16i64_v16i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm0 = [127,127,127,127,127,127,127,127] -; AVX512BW-NEXT: vpminsq (%rdi), %zmm0, %zmm1 -; AVX512BW-NEXT: vpminsq 64(%rdi), %zmm0, %zmm0 -; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] -; AVX512BW-NEXT: vpmaxsq %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsq %zmm2, %zmm1, %zmm1 -; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: trunc_ssat_v16i64_v16i8: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512BWVL-NEXT: vmovdqa64 64(%rdi), %zmm1 -; AVX512BWVL-NEXT: vpmovsqb %zmm1, %xmm1 -; AVX512BWVL-NEXT: vpmovsqb %zmm0, %xmm0 -; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512BWVL-NEXT: vzeroupper -; AVX512BWVL-NEXT: retq +; AVX512-LABEL: trunc_ssat_v16i64_v16i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1 +; AVX512-NEXT: vpmovsqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovsqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq ; ; SKX-LABEL: trunc_ssat_v16i64_v16i8: ; SKX: # %bb.0: @@ -5024,8 +4984,8 @@ define void @trunc_ssat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { ; SSE2-NEXT: pandn %xmm0, %xmm1 ; SSE2-NEXT: por %xmm2, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: packuswb %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 ; SSE2-NEXT: movd %xmm1, (%rdi) ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll index 17252867eb4f3..df691c55f14c6 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll @@ -1046,7 +1046,7 @@ define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) { ; SSE41-NEXT: por %xmm5, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 ; SSE41-NEXT: packusdw %xmm6, %xmm2 -; SSE41-NEXT: packusdw %xmm0, %xmm2 +; SSE41-NEXT: packusdw %xmm2, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; @@ -1221,7 +1221,7 @@ define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { ; SSE41-NEXT: por %xmm5, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3 ; SSE41-NEXT: packusdw %xmm6, %xmm3 -; SSE41-NEXT: packusdw %xmm0, %xmm3 +; SSE41-NEXT: packusdw %xmm3, %xmm3 ; SSE41-NEXT: movq %xmm3, (%rdi) ; SSE41-NEXT: retq ; @@ -2127,8 +2127,8 @@ define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8>* %p1) { ; SSE2-NEXT: por %xmm0, %xmm2 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE2-NEXT: packuswb %xmm2, %xmm2 -; SSE2-NEXT: packuswb %xmm0, %xmm2 -; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: packuswb %xmm2, %xmm2 +; SSE2-NEXT: packuswb %xmm2, %xmm2 ; SSE2-NEXT: movd %xmm2, %eax ; SSE2-NEXT: movw %ax, (%rdi) ; SSE2-NEXT: retq @@ -2431,8 +2431,8 @@ define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { ; SSE2-NEXT: pand %xmm8, %xmm0 ; SSE2-NEXT: pand %xmm8, %xmm3 ; SSE2-NEXT: packuswb %xmm0, %xmm3 -; SSE2-NEXT: packuswb %xmm0, %xmm3 -; SSE2-NEXT: packuswb %xmm0, %xmm3 +; SSE2-NEXT: packuswb %xmm3, %xmm3 +; SSE2-NEXT: packuswb %xmm3, %xmm3 ; SSE2-NEXT: movd %xmm3, (%rdi) ; SSE2-NEXT: retq ; @@ -2767,7 +2767,7 @@ define <8 x i8> @trunc_usat_v8i64_v8i8(<8 x i64>* %p0) { ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2 ; SSE41-NEXT: packusdw %xmm4, %xmm2 ; SSE41-NEXT: packusdw %xmm2, %xmm1 -; SSE41-NEXT: packuswb %xmm0, %xmm1 +; SSE41-NEXT: packuswb %xmm1, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; @@ -2903,7 +2903,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) { ; SSE2-NEXT: por %xmm9, %xmm0 ; SSE2-NEXT: packuswb %xmm3, %xmm0 ; SSE2-NEXT: packuswb %xmm0, %xmm5 -; SSE2-NEXT: packuswb %xmm0, %xmm5 +; SSE2-NEXT: packuswb %xmm5, %xmm5 ; SSE2-NEXT: movq %xmm5, (%rsi) ; SSE2-NEXT: retq ; @@ -2970,7 +2970,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) { ; SSSE3-NEXT: por %xmm9, %xmm0 ; SSSE3-NEXT: packuswb %xmm3, %xmm0 ; SSSE3-NEXT: packuswb %xmm0, %xmm5 -; SSSE3-NEXT: packuswb %xmm0, %xmm5 +; SSSE3-NEXT: packuswb %xmm5, %xmm5 ; SSSE3-NEXT: movq %xmm5, (%rsi) ; SSSE3-NEXT: retq ; @@ -3027,7 +3027,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64>* %p0, <8 x i8> *%p1) { ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1 ; SSE41-NEXT: packusdw %xmm3, %xmm1 ; SSE41-NEXT: packusdw %xmm1, %xmm6 -; SSE41-NEXT: packuswb %xmm0, %xmm6 +; SSE41-NEXT: packuswb %xmm6, %xmm6 ; SSE41-NEXT: movq %xmm6, (%rsi) ; SSE41-NEXT: retq ; @@ -3543,49 +3543,15 @@ define <16 x i8> @trunc_usat_v16i64_v16i8(<16 x i64>* %p0) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512F-LABEL: trunc_usat_v16i64_v16i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm0 = [255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpminuq 64(%rdi), %zmm0, %zmm1 -; AVX512F-NEXT: vpminuq (%rdi), %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: trunc_usat_v16i64_v16i8: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512VL-NEXT: vmovdqa64 64(%rdi), %zmm1 -; AVX512VL-NEXT: vpmovusqb %zmm1, %xmm1 -; AVX512VL-NEXT: vpmovusqb %zmm0, %xmm0 -; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: trunc_usat_v16i64_v16i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm0 = [255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpminuq 64(%rdi), %zmm0, %zmm1 -; AVX512BW-NEXT: vpminuq (%rdi), %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: trunc_usat_v16i64_v16i8: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512BWVL-NEXT: vmovdqa64 64(%rdi), %zmm1 -; AVX512BWVL-NEXT: vpmovusqb %zmm1, %xmm1 -; AVX512BWVL-NEXT: vpmovusqb %zmm0, %xmm0 -; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512BWVL-NEXT: vzeroupper -; AVX512BWVL-NEXT: retq +; AVX512-LABEL: trunc_usat_v16i64_v16i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1 +; AVX512-NEXT: vpmovusqb %zmm1, %xmm1 +; AVX512-NEXT: vpmovusqb %zmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq ; ; SKX-LABEL: trunc_usat_v16i64_v16i8: ; SKX: # %bb.0: @@ -3698,8 +3664,8 @@ define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { ; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2 ; SSE2-NEXT: por %xmm0, %xmm2 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 -; SSE2-NEXT: packuswb %xmm0, %xmm2 -; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: packuswb %xmm2, %xmm2 +; SSE2-NEXT: packuswb %xmm2, %xmm2 ; SSE2-NEXT: movd %xmm2, (%rdi) ; SSE2-NEXT: retq ; @@ -3907,7 +3873,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; SSE2-NEXT: pandn %xmm2, %xmm5 ; SSE2-NEXT: por %xmm0, %xmm5 ; SSE2-NEXT: packuswb %xmm6, %xmm5 -; SSE2-NEXT: packuswb %xmm0, %xmm5 +; SSE2-NEXT: packuswb %xmm5, %xmm5 ; SSE2-NEXT: movq %xmm5, (%rdi) ; SSE2-NEXT: retq ; @@ -3929,7 +3895,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; SSSE3-NEXT: pandn %xmm2, %xmm5 ; SSSE3-NEXT: por %xmm0, %xmm5 ; SSSE3-NEXT: packuswb %xmm6, %xmm5 -; SSSE3-NEXT: packuswb %xmm0, %xmm5 +; SSSE3-NEXT: packuswb %xmm5, %xmm5 ; SSSE3-NEXT: movq %xmm5, (%rdi) ; SSSE3-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/xor-icmp.ll b/llvm/test/CodeGen/X86/xor-icmp.ll index 1b5acce51d553..4062e06b4c278 100644 --- a/llvm/test/CodeGen/X86/xor-icmp.ll +++ b/llvm/test/CodeGen/X86/xor-icmp.ll @@ -87,3 +87,90 @@ bb: ; preds = %entry return: ; preds = %entry ret i32 undef } + +; PR45703 +; https://bugs.llvm.org/show_bug.cgi?id=45703 + +define i1 @xor_not_bools(i1 zeroext %x, i1 zeroext %y) nounwind { +; X32-LABEL: xor_not_bools: +; X32: # %bb.0: +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: xorb {{[0-9]+}}(%esp), %al +; X32-NEXT: xorb $1, %al +; X32-NEXT: retl +; +; X64-LABEL: xor_not_bools: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %eax +; X64-NEXT: xorb $1, %al +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + %xor = xor i1 %x, %y + %not = xor i1 %xor, true + ret i1 %not +} + +; This is probably not canonical IR; just testing another possible pattern. + +define zeroext i1 @xor_not_cmps(i32 %x, i32 %y) nounwind { +; X32-LABEL: xor_not_cmps: +; X32: # %bb.0: +; X32-NEXT: cmpl $42, {{[0-9]+}}(%esp) +; X32-NEXT: setne %cl +; X32-NEXT: cmpl $235, {{[0-9]+}}(%esp) +; X32-NEXT: sete %al +; X32-NEXT: xorb %cl, %al +; X32-NEXT: xorb $1, %al +; X32-NEXT: retl +; +; X64-LABEL: xor_not_cmps: +; X64: # %bb.0: +; X64-NEXT: cmpl $42, %edi +; X64-NEXT: setne %cl +; X64-NEXT: cmpl $235, %esi +; X64-NEXT: sete %al +; X64-NEXT: xorb %cl, %al +; X64-NEXT: xorb $1, %al +; X64-NEXT: retq + %cmpx = icmp ne i32 %x, 42 + %cmpy = icmp eq i32 %y, 235 + %xor = xor i1 %cmpx, %cmpy + %not = xor i1 %xor, 1 + ret i1 %not +} + +define zeroext i1 @xor_not_cmps_extra_use(i32 %x, i32 %y, i32* %p) nounwind { +; X32-LABEL: xor_not_cmps_extra_use: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: cmpl $42, {{[0-9]+}}(%esp) +; X32-NEXT: setne %dl +; X32-NEXT: cmpl $235, {{[0-9]+}}(%esp) +; X32-NEXT: sete %al +; X32-NEXT: xorb %dl, %al +; X32-NEXT: movzbl %al, %edx +; X32-NEXT: movl %edx, (%ecx) +; X32-NEXT: xorb $1, %al +; X32-NEXT: retl +; +; X64-LABEL: xor_not_cmps_extra_use: +; X64: # %bb.0: +; X64-NEXT: cmpl $42, %edi +; X64-NEXT: setne %al +; X64-NEXT: cmpl $235, %esi +; X64-NEXT: sete %cl +; X64-NEXT: xorb %al, %cl +; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: movl %eax, (%rdx) +; X64-NEXT: xorb $1, %al +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + %cmpx = icmp ne i32 %x, 42 + %cmpy = icmp eq i32 %y, 235 + %xor = xor i1 %cmpx, %cmpy + %z = zext i1 %xor to i32 + store i32 %z, i32* %p + %not = xor i1 %xor, 1 + ret i1 %not +} diff --git a/llvm/test/DebugInfo/COFF/global_visibility.ll b/llvm/test/DebugInfo/COFF/global_visibility.ll index bdf603580b166..8a3e8307785cb 100644 --- a/llvm/test/DebugInfo/COFF/global_visibility.ll +++ b/llvm/test/DebugInfo/COFF/global_visibility.ll @@ -51,7 +51,7 @@ ; CHECK: } ; CHECK: DataSym { ; CHECK: Kind: S_LDATA32 (0x110C) -; CHECK: DisplayName: local_int +; CHECK: DisplayName: foo::local_int ; CHECK: LinkageName: ?local_int@?1??foo@@YAXXZ@4HA ; CHECK: } ; CHECK: BlockSym { @@ -59,7 +59,7 @@ ; CHECK: } ; CHECK: DataSym { ; CHECK: Kind: S_LDATA32 (0x110C) -; CHECK: DisplayName: nested_int +; CHECK: DisplayName: foo::nested_int ; CHECK: LinkageName: ?nested_int@?1??foo@@YAXXZ@4HA ; CHECK: } ; CHECK: ScopeEndSym { @@ -78,7 +78,7 @@ ; CHECK: } ; CHECK: DataSym { ; CHECK: Kind: S_LDATA32 (0x110C) -; CHECK: DisplayName: local_int +; CHECK: DisplayName: bar::local_int ; CHECK: LinkageName: ?local_int@?1??bar@@YAXXZ@4HA ; CHECK: } ; CHECK: BlockSym { @@ -86,7 +86,7 @@ ; CHECK: } ; CHECK: DataSym { ; CHECK: Kind: S_LDATA32 (0x110C) -; CHECK: DisplayName: nested_int +; CHECK: DisplayName: bar::nested_int ; CHECK: LinkageName: ?nested_int@?1??bar@@YAXXZ@4HA ; CHECK: } ; CHECK: ScopeEndSym { diff --git a/llvm/test/DebugInfo/COFF/globals.ll b/llvm/test/DebugInfo/COFF/globals.ll index a9a437db73944..3312e81618a91 100644 --- a/llvm/test/DebugInfo/COFF/globals.ll +++ b/llvm/test/DebugInfo/COFF/globals.ll @@ -4,111 +4,146 @@ ; RUN: llc < %s -filetype=obj | obj2yaml | FileCheck %s --check-prefix=YAML ; C++ source to regenerate: -; $ cat t.cpp +; $ cat a.cpp ; int first; ; template struct A { static const int comdat = 3; }; -; const int *middle = &A::comdat; +; thread_local const int *middle = &A::comdat; +; namespace foo { +; thread_local int globalTLS = 4; +; static thread_local int staticTLS = 5; +; int justGlobal = 6; +; static int globalStatic = 7; +; } ; int last; -; $ clang t.cpp -S -emit-llvm -g -gcodeview -o t.ll +; int bar() { +; return foo::globalStatic + foo::globalTLS + foo::staticTLS; +; } +; $ clang-cl a.cpp /c /Z7 /GS- /clang:-S /clang:-emit-llvm ; ASM: .section .debug$S,"dr" -; ASM: .p2align 2 -; ASM: .long 4 # Debug section magic +; ASM-NEXT: .p2align 2 +; ASM-NEXT: .long 4 # Debug section magic -; ASM: .short {{.*-.*}} # Record length -; ASM: .short 4364 # Record kind: S_LDATA32 -; ASM: .long 116 # Type -; ASM: .secrel32 "?first@@3HA" # DataOffset -; ASM: .secidx "?first@@3HA" # Segment -; ASM: .asciz "first" # Name +; ASM: .short 4365 # Record kind: S_GDATA32 +; ASM-NEXT: .long 116 # Type +; ASM-NEXT: .secrel32 "?first@@3HA" # DataOffset +; ASM-NEXT: .secidx "?first@@3HA" # Segment +; ASM-NEXT: .asciz "first" # Name +; ASM-NEXT: .p2align 2 + +; ASM: .short 4371 # Record kind: S_GTHREAD32 +; ASM-NEXT: .long 4100 # Type +; ASM-NEXT: .secrel32 "?middle@@3PEBHEB" # DataOffset +; ASM-NEXT: .secidx "?middle@@3PEBHEB" # Segment +; ASM-NEXT: .asciz "middle" # Name +; ASM-NEXT: .p2align 2 -; ASM: .short {{.*-.*}} # Record length ; ASM: .short 4371 # Record kind: S_GTHREAD32 -; ASM: .long 4097 # Type -; ASM: .secrel32 "?middle@@3PEBHEB" # DataOffset -; ASM: .secidx "?middle@@3PEBHEB" # Segment -; ASM: .asciz "middle" # Name +; ASM-NEXT: .long 116 # Type +; ASM-NEXT: .secrel32 "?globalTLS@foo@@3HA" # DataOffset +; ASM-NEXT: .secidx "?globalTLS@foo@@3HA" # Segment +; ASM-NEXT: .asciz "foo::globalTLS" # Name +; ASM-NEXT: .p2align 2 + +; ASM: .short 4365 # Record kind: S_GDATA32 +; ASM-NEXT: .long 116 # Type +; ASM-NEXT: .secrel32 "?justGlobal@foo@@3HA" # DataOffset +; ASM-NEXT: .secidx "?justGlobal@foo@@3HA" # Segment +; ASM-NEXT: .asciz "foo::justGlobal" # Name +; ASM-NEXT: .p2align 2 -; ASM: .short {{.*-.*}} # Record length ; ASM: .short 4365 # Record kind: S_GDATA32 -; ASM: .long 116 # Type -; ASM: .secrel32 "?last@@3HA" # DataOffset -; ASM: .secidx "?last@@3HA" # Segment -; ASM: .asciz "last" # Name +; ASM-NEXT: .long 116 # Type +; ASM-NEXT: .secrel32 "?last@@3HA" # DataOffset +; ASM-NEXT: .secidx "?last@@3HA" # Segment +; ASM-NEXT: .asciz "last" # Name +; ASM-NEXT: .p2align 2 + +; ASM: .short 4364 # Record kind: S_LDATA32 +; ASM-NEXT: .long 116 # Type +; ASM-NEXT: .secrel32 "?globalStatic@foo@@3HA" # DataOffset +; ASM-NEXT: .secidx "?globalStatic@foo@@3HA" # Segment +; ASM-NEXT: .asciz "foo::globalStatic" # Name +; ASM-NEXT: .p2align 2 + +; ASM: .short 4370 # Record kind: S_LTHREAD32 +; ASM-NEXT: .long 116 # Type +; ASM-NEXT: .secrel32 "?staticTLS@foo@@3HA" # DataOffset +; ASM-NEXT: .secidx "?staticTLS@foo@@3HA" # Segment +; ASM-NEXT: .asciz "foo::staticTLS" # Name +; ASM-NEXT: .p2align 2 ; ASM: .section .debug$S,"dr",associative,"?comdat@?$A@X@@2HB" -; ASM: .p2align 2 -; ASM: .long 4 # Debug section magic +; ASM-NEXT: .p2align 2 +; ASM-NEXT: .long 4 # Debug section magic -; ASM: .short {{.*-.*}} # Record length ; ASM: .short 4365 # Record kind: S_GDATA32 -; ASM: .long 4096 # Type -; ASM: .secrel32 "?comdat@?$A@X@@2HB" # DataOffset -; ASM: .secidx "?comdat@?$A@X@@2HB" # Segment -; ASM: .asciz "comdat" # Name - -; OBJ: CodeViewTypes [ -; OBJ: Section: .debug$T -; OBJ: Magic: 0x4 -; OBJ: Modifier (0x1000) { -; OBJ: TypeLeafKind: LF_MODIFIER (0x1001) -; OBJ: ModifiedType: int (0x74) -; OBJ: Modifiers [ (0x1) -; OBJ: Const (0x1) -; OBJ: ] -; OBJ: } -; OBJ: Pointer (0x1001) { -; OBJ: TypeLeafKind: LF_POINTER (0x1002) -; OBJ: PointeeType: const int (0x1000) -; OBJ: PtrType: Near64 (0xC) -; OBJ: PtrMode: Pointer (0x0) -; OBJ: IsFlat: 0 -; OBJ: IsConst: 0 -; OBJ: IsVolatile: 0 -; OBJ: IsUnaligned: 0 -; OBJ: } -; OBJ: ] +; ASM-NEXT: .long 4099 # Type +; ASM-NEXT: .secrel32 "?comdat@?$A@X@@2HB" # DataOffset +; ASM-NEXT: .secidx "?comdat@?$A@X@@2HB" # Segment +; ASM-NEXT: .asciz "comdat" # Name ; OBJ: CodeViewDebugInfo [ ; OBJ: Section: .debug$S ; OBJ: Magic: 0x4 ; OBJ: Subsection [ -; OBJ: SubSectionType: Symbols (0xF1) -; OBJ: DataSym { -; OBJ: Kind: S_LDATA32 (0x110C) -; OBJ: DataOffset: ?first@@3HA+0x0 -; OBJ: Type: int (0x74) -; OBJ: DisplayName: first -; OBJ: LinkageName: ?first@@3HA -; OBJ: } -; OBJ: GlobalTLS { -; OBJ: DataOffset: ?middle@@3PEBHEB+0x0 -; OBJ: Type: const int* (0x1001) -; OBJ: DisplayName: middle -; OBJ: LinkageName: ?middle@@3PEBHEB -; OBJ: } -; OBJ: GlobalData { -; OBJ: Kind: S_GDATA32 (0x110D) -; OBJ: DataOffset: ?last@@3HA+0x0 -; OBJ: Type: int (0x74) -; OBJ: DisplayName: last -; OBJ: LinkageName: ?last@@3HA -; OBJ: } -; OBJ: ] -; OBJ: ] -; OBJ: CodeViewDebugInfo [ -; OBJ: Section: .debug$S -; OBJ: Magic: 0x4 -; OBJ: Subsection [ -; OBJ: SubSectionType: Symbols (0xF1) -; OBJ: GlobalData { -; OBJ: DataOffset: ?comdat@?$A@X@@2HB+0x0 -; OBJ: Type: const int (0x1000) -; OBJ: DisplayName: comdat -; OBJ: LinkageName: ?comdat@?$A@X@@2HB -; OBJ: } -; OBJ: ] -; OBJ: ] + +; OBJ-LABEL: GlobalData { +; OBJ-NEXT: Kind: S_GDATA32 (0x110D) +; OBJ-NEXT: DataOffset: ?first@@3HA+0x0 +; OBJ-NEXT: Type: int (0x74) +; OBJ-NEXT: DisplayName: first +; OBJ-NEXT: LinkageName: ?first@@3HA +; OBJ-NEXT: } +; OBJ-NEXT: GlobalTLS { +; OBJ-NEXT: Kind: S_GTHREAD32 (0x1113) +; OBJ-NEXT: DataOffset: ?middle@@3PEBHEB+0x0 +; OBJ-NEXT: Type: const int* (0x1004) +; OBJ-NEXT: DisplayName: middle +; OBJ-NEXT: LinkageName: ?middle@@3PEBHEB +; OBJ-NEXT: } +; OBJ-NEXT: GlobalTLS { +; OBJ-NEXT: Kind: S_GTHREAD32 (0x1113) +; OBJ-NEXT: DataOffset: ?globalTLS@foo@@3HA+0x0 +; OBJ-NEXT: Type: int (0x74) +; OBJ-NEXT: DisplayName: foo::globalTLS +; OBJ-NEXT: LinkageName: ?globalTLS@foo@@3HA +; OBJ-NEXT: } +; OBJ-NEXT: GlobalData { +; OBJ-NEXT: Kind: S_GDATA32 (0x110D) +; OBJ-NEXT: DataOffset: ?justGlobal@foo@@3HA+0x0 +; OBJ-NEXT: Type: int (0x74) +; OBJ-NEXT: DisplayName: foo::justGlobal +; OBJ-NEXT: LinkageName: ?justGlobal@foo@@3HA +; OBJ-NEXT: } +; OBJ-NEXT: GlobalData { +; OBJ-NEXT: Kind: S_GDATA32 (0x110D) +; OBJ-NEXT: DataOffset: ?last@@3HA+0x0 +; OBJ-NEXT: Type: int (0x74) +; OBJ-NEXT: DisplayName: last +; OBJ-NEXT: LinkageName: ?last@@3HA +; OBJ-NEXT: } +; OBJ-NEXT: DataSym { +; OBJ-NEXT: Kind: S_LDATA32 (0x110C) +; OBJ-NEXT: DataOffset: ?globalStatic@foo@@3HA+0x0 +; OBJ-NEXT: Type: int (0x74) +; OBJ-NEXT: DisplayName: foo::globalStatic +; OBJ-NEXT: LinkageName: ?globalStatic@foo@@3HA +; OBJ-NEXT: } +; OBJ-NEXT: ThreadLocalDataSym { +; OBJ-NEXT: Kind: S_LTHREAD32 (0x1112) +; OBJ-NEXT: DataOffset: ?staticTLS@foo@@3HA+0x0 +; OBJ-NEXT: Type: int (0x74) +; OBJ-NEXT: DisplayName: foo::staticTLS +; OBJ-NEXT: LinkageName: ?staticTLS@foo@@3HA +; OBJ-NEXT: } + +; OBJ: GlobalData { +; OBJ-NEXT: Kind: S_GDATA32 (0x110D) +; OBJ-LABEL: DataOffset: ?comdat@?$A@X@@2HB+0x0 +; OBJ-NEXT: Type: const int (0x1003) +; OBJ-NEXT: DisplayName: comdat +; OBJ-NEXT: LinkageName: ?comdat@?$A@X@@2HB ; YAML-LABEL: - Name: '.debug$S' ; YAML: Subsections: @@ -116,75 +151,115 @@ ; YAML: Records: ; YAML: - Kind: S_COMPILE3 ; YAML: Compile3Sym: + ; YAML: - !Symbols -; YAML: Records: -; YAML: - Kind: S_LDATA32 -; YAML: DataSym: +; YAML-NEXT: Records: +; YAML-LABEL: - Kind: S_GDATA32 +; YAML-NEXT: DataSym: ; YAML-NOT: Segment -; YAML: Type: 116 +; YAML-NEXT: Type: 116 ; YAML-NOT: Segment -; YAML: DisplayName: first +; YAML-NEXT: DisplayName: first ; YAML-NOT: Segment -; YAML: - Kind: S_GTHREAD32 -; YAML: ThreadLocalDataSym: -; YAML: Type: 4097 -; YAML: DisplayName: middle -; YAML: - Kind: S_GDATA32 -; YAML: DataSym: +; YAML-NEXT: - Kind: S_GTHREAD32 +; YAML-NEXT: ThreadLocalDataSym: +; YAML-NEXT: Type: 4100 +; YAML-NEXT: DisplayName: middle +; YAML-NEXT: - Kind: S_GTHREAD32 +; YAML-NEXT: ThreadLocalDataSym: +; YAML-NEXT: Type: 116 +; YAML-NEXT: DisplayName: 'foo::globalTLS' +; YAML-NEXT: - Kind: S_GDATA32 +; YAML-NEXT: DataSym: ; YAML-NOT: Segment -; YAML: Type: 116 -; YAML-NOT: Offset +; YAML-NEXT: Type: 116 ; YAML-NOT: Segment -; YAML: DisplayName: last +; YAML-NEXT: DisplayName: 'foo::justGlobal' ; YAML-NOT: Segment +; YAML-NEXT: - Kind: S_GDATA32 +; YAML-NEXT: DataSym: +; YAML-NEXT: Type: 116 +; YAML-NEXT: DisplayName: last +; YAML-NEXT: - Kind: S_LDATA32 +; YAML-NEXT: DataSym: +; YAML-NEXT: Type: 116 +; YAML-NEXT: DisplayName: 'foo::globalStatic' +; YAML-NEXT: - Kind: S_LTHREAD32 +; YAML-NEXT: ThreadLocalDataSym: +; YAML-NEXT: Type: 116 +; YAML-NEXT: DisplayName: 'foo::staticTLS' -; The missing offsets are represented as relocations against this section. -; YAML: Relocations: -; YAML: - VirtualAddress: 92 -; YAML: SymbolName: '?first@@3HA' -; YAML: Type: IMAGE_REL_AMD64_SECREL -; YAML: - VirtualAddress: 96 -; YAML: SymbolName: '?first@@3HA' -; YAML: Type: IMAGE_REL_AMD64_SECTION +; ModuleID = 'a.cpp' +source_filename = "a.cpp" +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.25.28614" -; ModuleID = 't.cpp' -source_filename = "t.cpp" -target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-windows-msvc19.0.23918" +$"?comdat@?$A@X@@2HB" = comdat any -$"\01?comdat@?$A@X@@2HB" = comdat any +@"?first@@3HA" = dso_local global i32 0, align 4, !dbg !0 +@"?comdat@?$A@X@@2HB" = linkonce_odr dso_local constant i32 3, comdat, align 4, !dbg !6 +@"?middle@@3PEBHEB" = dso_local thread_local global i32* @"?comdat@?$A@X@@2HB", align 8, !dbg !15 +@"?globalTLS@foo@@3HA" = dso_local thread_local global i32 4, align 4, !dbg !18 +@"?justGlobal@foo@@3HA" = dso_local global i32 6, align 4, !dbg !21 +@"?last@@3HA" = dso_local global i32 0, align 4, !dbg !23 +@"?globalStatic@foo@@3HA" = internal global i32 7, align 4, !dbg !25 +@"?staticTLS@foo@@3HA" = internal thread_local global i32 5, align 4, !dbg !27 -@"\01?first@@3HA" = internal global i32 0, align 4, !dbg !0 -@"\01?comdat@?$A@X@@2HB" = linkonce_odr constant i32 3, comdat, align 4, !dbg !6 -@"\01?middle@@3PEBHEB" = thread_local global i32* @"\01?comdat@?$A@X@@2HB", align 8, !dbg !15 -@"\01?last@@3HA" = global i32 0, align 4, !dbg !18 +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @"?bar@@YAHXZ"() #0 !dbg !36 { +entry: + %0 = load i32, i32* @"?globalStatic@foo@@3HA", align 4, !dbg !39 + %1 = load i32, i32* @"?globalTLS@foo@@3HA", align 4, !dbg !39 + %add = add nsw i32 %0, %1, !dbg !39 + %2 = load i32, i32* @"?staticTLS@foo@@3HA", align 4, !dbg !39 + %add1 = add nsw i32 %add, %2, !dbg !39 + ret i32 %add1, !dbg !39 +} + +attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!20, !21, !22} -!llvm.ident = !{!23} +!llvm.linker.options = !{!29, !30} +!llvm.module.flags = !{!31, !32, !33, !34} +!llvm.ident = !{!35} -!0 = distinct !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = !DIGlobalVariable(name: "first", linkageName: "\01?first@@3HA", scope: !2, file: !3, line: 1, type: !9, isLocal: true, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 3.9.0 (trunk 271937)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) -!3 = !DIFile(filename: "t.cpp", directory: "D:\5Csrc\5Cllvm\5Cbuild") +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "first", linkageName: "?first@@3HA", scope: !2, file: !3, line: 1, type: !9, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git f5b1301ce8575f6d82e87031a1a5485c33637a93)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: None) +!3 = !DIFile(filename: "a.cpp", directory: "F:\\llvm-project\\__test", checksumkind: CSK_MD5, checksum: "65c2a7701cffb7a2e8d4caf1cc24caa7") !4 = !{} -!5 = !{!0, !6, !15, !18} -!6 = distinct !DIGlobalVariableExpression(var: !7, expr: !DIExpression()) -!7 = !DIGlobalVariable(name: "comdat", linkageName: "\01?comdat@?$A@X@@2HB", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true, declaration: !10) +!5 = !{!0, !6, !15, !18, !21, !23, !25, !27} +!6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression()) +!7 = distinct !DIGlobalVariable(name: "comdat", linkageName: "?comdat@?$A@X@@2HB", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true, declaration: !10) !8 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9) -!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !10 = !DIDerivedType(tag: DW_TAG_member, name: "comdat", scope: !11, file: !3, line: 2, baseType: !8, flags: DIFlagStaticMember, extraData: i32 3) -!11 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A", file: !3, line: 2, size: 8, align: 8, elements: !12, templateParams: !13) +!11 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A", file: !3, line: 2, size: 8, flags: DIFlagTypePassByValue, elements: !12, templateParams: !13, identifier: ".?AU?$A@X@@") !12 = !{!10} !13 = !{!14} !14 = !DITemplateTypeParameter(name: "T", type: null) -!15 = distinct !DIGlobalVariableExpression(var: !16, expr: !DIExpression()) -!16 = !DIGlobalVariable(name: "middle", linkageName: "\01?middle@@3PEBHEB", scope: !2, file: !3, line: 3, type: !17, isLocal: false, isDefinition: true) -!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 64) -!18 = distinct !DIGlobalVariableExpression(var: !19, expr: !DIExpression()) -!19 = !DIGlobalVariable(name: "last", linkageName: "\01?last@@3HA", scope: !2, file: !3, line: 4, type: !9, isLocal: false, isDefinition: true) -!20 = !{i32 2, !"CodeView", i32 1} -!21 = !{i32 2, !"Debug Info Version", i32 3} -!22 = !{i32 1, !"PIC Level", i32 2} -!23 = !{!"clang version 3.9.0 (trunk 271937)"} - +!15 = !DIGlobalVariableExpression(var: !16, expr: !DIExpression()) +!16 = distinct !DIGlobalVariable(name: "middle", linkageName: "?middle@@3PEBHEB", scope: !2, file: !3, line: 3, type: !17, isLocal: false, isDefinition: true) +!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64) +!18 = !DIGlobalVariableExpression(var: !19, expr: !DIExpression()) +!19 = distinct !DIGlobalVariable(name: "globalTLS", linkageName: "?globalTLS@foo@@3HA", scope: !20, file: !3, line: 5, type: !9, isLocal: false, isDefinition: true) +!20 = !DINamespace(name: "foo", scope: null) +!21 = !DIGlobalVariableExpression(var: !22, expr: !DIExpression()) +!22 = distinct !DIGlobalVariable(name: "justGlobal", linkageName: "?justGlobal@foo@@3HA", scope: !20, file: !3, line: 7, type: !9, isLocal: false, isDefinition: true) +!23 = !DIGlobalVariableExpression(var: !24, expr: !DIExpression()) +!24 = distinct !DIGlobalVariable(name: "last", linkageName: "?last@@3HA", scope: !2, file: !3, line: 10, type: !9, isLocal: false, isDefinition: true) +!25 = !DIGlobalVariableExpression(var: !26, expr: !DIExpression()) +!26 = distinct !DIGlobalVariable(name: "globalStatic", linkageName: "?globalStatic@foo@@3HA", scope: !20, file: !3, line: 8, type: !9, isLocal: true, isDefinition: true) +!27 = !DIGlobalVariableExpression(var: !28, expr: !DIExpression()) +!28 = distinct !DIGlobalVariable(name: "staticTLS", linkageName: "?staticTLS@foo@@3HA", scope: !20, file: !3, line: 6, type: !9, isLocal: true, isDefinition: true) +!29 = !{!"/DEFAULTLIB:libcmt.lib"} +!30 = !{!"/DEFAULTLIB:oldnames.lib"} +!31 = !{i32 2, !"CodeView", i32 1} +!32 = !{i32 2, !"Debug Info Version", i32 3} +!33 = !{i32 1, !"wchar_size", i32 2} +!34 = !{i32 7, !"PIC Level", i32 2} +!35 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git f5b1301ce8575f6d82e87031a1a5485c33637a93)"} +!36 = distinct !DISubprogram(name: "bar", linkageName: "?bar@@YAHXZ", scope: !3, file: !3, line: 11, type: !37, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4) +!37 = !DISubroutineType(types: !38) +!38 = !{!9} +!39 = !DILocation(line: 12, scope: !36) diff --git a/llvm/test/DebugInfo/Generic/global-sra-struct-zero-length.ll b/llvm/test/DebugInfo/Generic/global-sra-struct-zero-length.ll new file mode 100644 index 0000000000000..35b9953ffd513 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/global-sra-struct-zero-length.ll @@ -0,0 +1,69 @@ +; RUN: opt -S -globalopt < %s | FileCheck %s +; Generated at -O2 -g from: +; typedef struct { +; } a; +; static struct { +; long b; +; a c; +; } d; +; e() { +; long f = d.b + 1; +; d.b = f; +; } +; (with some simplification by hand) + +; Check that the global variable "d" is not +; emitted as a fragment after the member "c" +; is removed. +; d.b is referenced, but d.c has zero length. +; So a fragment covering d.b would be the same +; size as d itself. + +source_filename = "pr45335.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.anon = type { i64, %struct.a } +%struct.a = type {} + +; CHECK: @d.0 = internal unnamed_addr global i64 0, align 8, !dbg ![[GVE:.*]] +@d = internal global %struct.anon zeroinitializer, align 8, !dbg !0 + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @e() #0 !dbg !18 { +entry: + %0 = load i64, i64* getelementptr inbounds (%struct.anon, %struct.anon* @d, i32 0, i32 0), align 8 + %add = add nsw i64 %0, 1 + call void @llvm.dbg.value(metadata i64 %add, metadata !24, metadata !DIExpression()), !dbg !25 + store i64 %add, i64* getelementptr inbounds (%struct.anon, %struct.anon* @d, i32 0, i32 0), align 8 + ret i32 undef +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!14, !15} + +; CHECK: ![[GVE]] = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "d", scope: !2, file: !3, line: 6, type: !7, isLocal: true, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !{}, globals: !{!0}, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "pr45335.c", directory: "/") +!7 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !3, line: 3, size: 64, elements: !8) +!8 = !{!9, !11} +!9 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !7, file: !3, line: 4, baseType: !10, size: 64) +!10 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !7, file: !3, line: 5, baseType: !12, offset: 64) +!12 = !DIDerivedType(tag: DW_TAG_typedef, name: "a", file: !3, line: 2, baseType: !13) +!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !3, line: 1, elements: !{}) +!14 = !{i32 7, !"Dwarf Version", i32 4} +!15 = !{i32 2, !"Debug Info Version", i32 3} +!18 = distinct !DISubprogram(name: "e", scope: !3, file: !3, line: 7, type: !19, scopeLine: 7, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !{}) +!19 = !DISubroutineType(types: !{!21}) +!21 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!24 = !DILocalVariable(name: "f", scope: !18, file: !3, line: 8, type: !10) +!25 = !DILocation(line: 0, scope: !18) diff --git a/llvm/test/DebugInfo/X86/dwarf-callsite-related-attrs.ll b/llvm/test/DebugInfo/X86/dwarf-callsite-related-attrs.ll index 3a34ff38809f9..53d21764ad04f 100644 --- a/llvm/test/DebugInfo/X86/dwarf-callsite-related-attrs.ll +++ b/llvm/test/DebugInfo/X86/dwarf-callsite-related-attrs.ll @@ -20,7 +20,7 @@ ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis -o /dev/null ; VERIFY: No errors. -; STATS: "call site DIEs":6 +; STATS: "#call site DIEs":6 @sink = global i32 0, align 4, !dbg !0 diff --git a/llvm/test/Instrumentation/AddressSanitizer/byval-args.ll b/llvm/test/Instrumentation/AddressSanitizer/byval-args.ll new file mode 100644 index 0000000000000..a070cedca37d9 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/byval-args.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -asan -S | FileCheck %s +; Test that for call instructions, the by-value arguments are instrumented. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.bar = type { %struct.foo } +%struct.foo = type { i8*, i8*, i8* } +define dso_local void @func2(%struct.foo* %foo) sanitize_address { +; CHECK-LABEL: @func2 + tail call void @func1(%struct.foo* byval(%struct.foo) align 8 %foo) #2 +; CHECK: call void @__asan_report_load + ret void +; CHECK: ret void +} +declare dso_local void @func1(%struct.foo* byval(%struct.foo) align 8) + +!0 = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/Instrumentation/SanitizerCoverage/inline-bool-flag.ll b/llvm/test/Instrumentation/SanitizerCoverage/inline-bool-flag.ll index 6796e94e90539..65bee698e23dd 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/inline-bool-flag.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/inline-bool-flag.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Test -sanitizer-coverage-inline-bool-flag=1 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-inline-bool-flag=1 -S | FileCheck %s ; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -sanitizer-coverage-inline-bool-flag=1 -S | FileCheck %s @@ -5,9 +6,18 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" define void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i1, i1* getelementptr inbounds ([1 x i1], [1 x i1]* @__sancov_gen_, i64 0, i64 0), align 1, !nosanitize !1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i1 [[TMP0]], false +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; CHECK: 2: +; CHECK-NEXT: store i1 true, i1* getelementptr inbounds ([1 x i1], [1 x i1]* @__sancov_gen_, i64 0, i64 0), align 1, !nosanitize !1 +; CHECK-NEXT: br label [[TMP3]] +; CHECK: 3: +; CHECK-NEXT: ret void +; entry: -; CHECK: @__sancov_gen_ = private global [1 x i1] zeroinitializer, section "__sancov_bools", comdat($foo), align 1, !associated !0 -; CHECK: store i1 true, i1* getelementptr inbounds ([1 x i1], [1 x i1]* @__sancov_gen_, i64 0, i64 0), align 1, !nosanitize !1 ret void } ; CHECK: call void @__sanitizer_cov_bool_flag_init(i1* bitcast (i1** @__start___sancov_bools to i1*), i1* bitcast (i1** @__stop___sancov_bools to i1*)) diff --git a/llvm/test/LTO/X86/type-mapping-bug3.ll b/llvm/test/LTO/X86/type-mapping-bug3.ll index 2f845a56a9292..6d9e81d6cdcdd 100644 --- a/llvm/test/LTO/X86/type-mapping-bug3.ll +++ b/llvm/test/LTO/X86/type-mapping-bug3.ll @@ -25,7 +25,7 @@ define void @b() { entry: %f.addr = alloca %"T3"*load %"T3"*, %"T3"** %f.addr - ; The call with the getCalledValue() vs getCalledFunction() mismatch. + ; The call with the getCalledOperand() vs getCalledFunction() mismatch. call void @d(%"T3"* %0) unreachable } diff --git a/llvm/test/MC/AArch64/armv8.4a-ras.s b/llvm/test/MC/AArch64/armv8.4a-ras.s index d89baf7bd7ebb..5e96d2711a0b7 100644 --- a/llvm/test/MC/AArch64/armv8.4a-ras.s +++ b/llvm/test/MC/AArch64/armv8.4a-ras.s @@ -23,14 +23,6 @@ //CHECK: msr ERXPFGCDN_EL1, x0 // encoding: [0xc0,0x54,0x18,0xd5] //CHECK: mrs x0, ERXPFGCDN_EL1 // encoding: [0xc0,0x54,0x38,0xd5] //CHECK-ERROR: error: expected writable system register or pstate -//CHECK-ERROR: error: expected readable system register - - msr ERXTS_EL1, x0 - mrs x0,ERXTS_EL1 - -//CHECK: msr ERXTS_EL1, x0 // encoding: [0xe0,0x55,0x18,0xd5] -//CHECK: mrs x0, ERXTS_EL1 // encoding: [0xe0,0x55,0x38,0xd5] -//CHECK-ERROR: error: expected writable system register or pstate //CHECK-ERROR: error: expected readable system register msr ERXMISC2_EL1, x0 diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.4a-ras.txt b/llvm/test/MC/Disassembler/AArch64/armv8.4a-ras.txt index ef38b71fb9e01..afae16dc14380 100644 --- a/llvm/test/MC/Disassembler/AArch64/armv8.4a-ras.txt +++ b/llvm/test/MC/Disassembler/AArch64/armv8.4a-ras.txt @@ -4,8 +4,6 @@ 0xa0,0x54,0x38,0xd5 0xc0,0x54,0x18,0xd5 0xc0,0x54,0x38,0xd5 -0xe0,0x55,0x18,0xd5 -0xe0,0x55,0x38,0xd5 0x80,0x54,0x38,0xd5 0x40,0x55,0x18,0xd5 @@ -17,8 +15,6 @@ #CHECK: mrs x0, ERXPFGCTL_EL1 #CHECK: msr ERXPFGCDN_EL1, x0 #CHECK: mrs x0, ERXPFGCDN_EL1 -#CHECK: msr ERXTS_EL1, x0 -#CHECK: mrs x0, ERXTS_EL1 #CHECK: mrs x0, ERXPFGF_EL1 #CHECK: msr ERXMISC2_EL1, x0 diff --git a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt index 08cc95434c16d..bb50b646ab549 100644 --- a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt +++ b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt @@ -34,19 +34,19 @@ 0xFC 0x07 # CHECK: v128.const 50462976, 117835012, 185207048, 252579084 -0xFD 0x02 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F +0xFD 0x0C 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F # CHECK: v8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 -0xFD 0x03 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F +0xFD 0x0D 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F # Check LEB128 encoding of SIMD instructions -# CHECK: i64x2.all_true -0xFD 0x86 0x01 +# CHECK: i16x8.all_true +0xFD 0x83 0x01 # Including non-canonical LEB128 encodings -# CHECK: i64x2.any_true -# CHECK-NOT: i64.div_u -0xFD 0x85 0x81 0x80 0x80 0x80 0x80 0x00 +# CHECK: i16x8.any_true +# CHECK-NOT: i16x8.neg +0xFD 0x82 0x81 0x80 0x80 0x80 0x80 0x00 # Check br_table, which has its own operand type. # CHECK: br_table {0, 1, 2} diff --git a/llvm/test/MC/PowerPC/ppc-machine.s b/llvm/test/MC/PowerPC/ppc-machine.s index 6b3bf3146cc7c..119fde6f8df65 100644 --- a/llvm/test/MC/PowerPC/ppc-machine.s +++ b/llvm/test/MC/PowerPC/ppc-machine.s @@ -1,6 +1,7 @@ # RUN: llvm-mc -triple powerpc-unknown-unknown %s # RUN: llvm-mc -triple powerpc64-unknown-unknown %s # RUN: llvm-mc -triple powerpc64le-unknown-unknown %s +# RUN: llvm-mc -triple powerpc64le-unknown-unknown -filetype=null %s # For now, the only thing we check is that the .machine directive # is accepted without syntax error. diff --git a/llvm/test/MC/PowerPC/ppc64-abiversion.s b/llvm/test/MC/PowerPC/ppc64-abiversion.s index d2970f8c90596..49d122772b8c3 100644 --- a/llvm/test/MC/PowerPC/ppc64-abiversion.s +++ b/llvm/test/MC/PowerPC/ppc64-abiversion.s @@ -3,6 +3,7 @@ # RUN: llvm-readobj -h | FileCheck %s # RUN: llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj %s | \ # RUN: llvm-readobj -h | FileCheck %s +# RUN: llvm-mc -triple powerpc64le-unknown-unknown -filetype=null %s .abiversion 2 # CHECK: Flags [ (0x2) diff --git a/llvm/test/MC/RISCV/machine-csr-names.s b/llvm/test/MC/RISCV/machine-csr-names.s index 3932ebce48367..6467b04930649 100644 --- a/llvm/test/MC/RISCV/machine-csr-names.s +++ b/llvm/test/MC/RISCV/machine-csr-names.s @@ -397,20 +397,48 @@ csrrs t1, dpc, zero # uimm12 csrrs t2, 0x7B1, zero +# dscratch0 +# name +# CHECK-INST: csrrs t1, dscratch0, zero +# CHECK-ENC: encoding: [0x73,0x23,0x20,0x7b] +# CHECK-INST-ALIAS: csrr t1, dscratch0 +# uimm12 +# CHECK-INST: csrrs t2, dscratch0, zero +# CHECK-ENC: encoding: [0xf3,0x23,0x20,0x7b] +# CHECK-INST-ALIAS: csrr t2, dscratch0 +# name +csrrs t1, dscratch0, zero +# uimm12 +csrrs t2, 0x7B2, zero + # dscratch # name -# CHECK-INST: csrrs t1, dscratch, zero +# CHECK-INST: csrrs t1, dscratch0, zero # CHECK-ENC: encoding: [0x73,0x23,0x20,0x7b] -# CHECK-INST-ALIAS: csrr t1, dscratch +# CHECK-INST-ALIAS: csrr t1, dscratch0 # uimm12 -# CHECK-INST: csrrs t2, dscratch, zero +# CHECK-INST: csrrs t2, dscratch0, zero # CHECK-ENC: encoding: [0xf3,0x23,0x20,0x7b] -# CHECK-INST-ALIAS: csrr t2, dscratch +# CHECK-INST-ALIAS: csrr t2, dscratch0 # name csrrs t1, dscratch, zero # uimm12 csrrs t2, 0x7B2, zero +# dscratch1 +# name +# CHECK-INST: csrrs t1, dscratch1, zero +# CHECK-ENC: encoding: [0x73,0x23,0x30,0x7b] +# CHECK-INST-ALIAS: csrr t1, dscratch1 +# uimm12 +# CHECK-INST: csrrs t2, dscratch1, zero +# CHECK-ENC: encoding: [0xf3,0x23,0x30,0x7b] +# CHECK-INST-ALIAS: csrr t2, dscratch1 +# name +csrrs t1, dscratch1, zero +# uimm12 +csrrs t2, 0x7B3, zero + # mhpmcounter3 # name # CHECK-INST: csrrs t1, mhpmcounter3, zero diff --git a/llvm/test/MC/SystemZ/regs-bad.s b/llvm/test/MC/SystemZ/regs-bad.s index c4d16ef4cc3d6..db56af96638ba 100644 --- a/llvm/test/MC/SystemZ/regs-bad.s +++ b/llvm/test/MC/SystemZ/regs-bad.s @@ -15,9 +15,9 @@ #CHECK: lr %r0,%a1 #CHECK: error: invalid operand for instruction #CHECK: lr %r0,%c1 -#CHECK: error: invalid operand for instruction -#CHECK: lr %r0,0 -#CHECK: error: invalid operand for instruction +#CHECK: error: invalid register +#CHECK: lr %r0,16 +#CHECK: error: unexpected token in argument list #CHECK: lr %r0,0(%r1) lr %f0,%r1 @@ -26,7 +26,7 @@ lr %r0,%f1 lr %r0,%a1 lr %r0,%c1 - lr %r0,0 + lr %r0,16 lr %r0,0(%r1) # Test GR64 operands @@ -43,9 +43,9 @@ #CHECK: lgr %r0,%a1 #CHECK: error: invalid operand for instruction #CHECK: lgr %r0,%c1 -#CHECK: error: invalid operand for instruction -#CHECK: lgr %r0,0 -#CHECK: error: invalid operand for instruction +#CHECK: error: invalid register +#CHECK: lgr %r0,16 +#CHECK: error: unexpected token in argument list #CHECK: lgr %r0,0(%r1) lgr %f0,%r1 @@ -54,7 +54,7 @@ lgr %r0,%f1 lgr %r0,%a1 lgr %r0,%c1 - lgr %r0,0 + lgr %r0,16 lgr %r0,0(%r1) # Test GR128 operands @@ -87,9 +87,9 @@ #CHECK: dlr %r0,%a1 #CHECK: error: invalid operand for instruction #CHECK: dlr %r0,%c1 -#CHECK: error: invalid operand for instruction -#CHECK: dlr %r0,0 -#CHECK: error: invalid operand for instruction +#CHECK: error: invalid register +#CHECK: dlr %r0,16 +#CHECK: error: unexpected token in argument list #CHECK: dlr %r0,0(%r1) dlr %r1,%r0 @@ -106,7 +106,7 @@ dlr %r0,%f1 dlr %r0,%a1 dlr %r0,%c1 - dlr %r0,0 + dlr %r0,16 dlr %r0,0(%r1) # Test FP32 operands @@ -123,9 +123,9 @@ #CHECK: ler %f0,%a1 #CHECK: error: invalid operand for instruction #CHECK: ler %f0,%c1 -#CHECK: error: invalid operand for instruction -#CHECK: ler %f0,0 -#CHECK: error: invalid operand for instruction +#CHECK: error: invalid register +#CHECK: ler %f0,16 +#CHECK: error: unexpected token in argument list #CHECK: ler %f0,0(%r1) ler %r0,%f1 @@ -134,7 +134,7 @@ ler %f0,%r1 ler %f0,%a1 ler %f0,%c1 - ler %f0,0 + ler %f0,16 ler %f0,0(%r1) # Test FP64 operands @@ -151,9 +151,9 @@ #CHECK: ldr %f0,%a1 #CHECK: error: invalid operand for instruction #CHECK: ldr %f0,%c1 -#CHECK: error: invalid operand for instruction -#CHECK: ldr %f0,0 -#CHECK: error: invalid operand for instruction +#CHECK: error: invalid register +#CHECK: ldr %f0,16 +#CHECK: error: unexpected token in argument list #CHECK: ldr %f0,0(%r1) ldr %r0,%f1 @@ -162,7 +162,7 @@ ldr %f0,%r1 ldr %f0,%a1 ldr %f0,%c1 - ldr %f0,0 + ldr %f0,16 ldr %f0,0(%r1) # Test FP128 operands @@ -195,9 +195,9 @@ #CHECK: lxr %f0,%a1 #CHECK: error: invalid operand for instruction #CHECK: lxr %f0,%c1 -#CHECK: error: invalid operand for instruction -#CHECK: lxr %f0,0 -#CHECK: error: invalid operand for instruction +#CHECK: error: invalid register +#CHECK: lxr %f0,16 +#CHECK: error: unexpected token in argument list #CHECK: lxr %f0,0(%r1) lxr %f2,%f0 @@ -214,7 +214,7 @@ lxr %f0,%r1 lxr %f0,%a1 lxr %f0,%c1 - lxr %f0,0 + lxr %f0,16 lxr %f0,0(%r1) # Test access register operands @@ -225,15 +225,15 @@ #CHECK: ear %r0,%f0 #CHECK: error: invalid operand for instruction #CHECK: ear %r0,%c0 -#CHECK: error: invalid operand for instruction -#CHECK: ear %r0,0 -#CHECK: error: invalid operand for instruction +#CHECK: error: invalid register +#CHECK: ear %r0,16 +#CHECK: error: unexpected token in argument list #CHECK: ear %r0,0(%r1) ear %r0,%r0 ear %r0,%f0 ear %r0,%c0 - ear %r0,0 + ear %r0,16 ear %r0,0(%r1) # Test control register operands @@ -242,13 +242,13 @@ #CHECK: lctl %c0,%r0,0 #CHECK: lctl %c0,%f0,0 #CHECK: lctl %c0,%a0,0 -#CHECK: lctl %c0,0,0 +#CHECK: lctl %c0,16,0 #CHECK: lctl %c0,0(%r1),0 lctl %c0,%r0,0 lctl %c0,%f0,0 lctl %c0,%a0,0 - lctl %c0,0,0 + lctl %c0,16,0 lctl %c0,0(%r1),0 .cfi_startproc diff --git a/llvm/test/MC/SystemZ/regs-good.s b/llvm/test/MC/SystemZ/regs-good.s index c20301133d870..c6157b5e2c855 100644 --- a/llvm/test/MC/SystemZ/regs-good.s +++ b/llvm/test/MC/SystemZ/regs-good.s @@ -8,6 +8,7 @@ #CHECK: lr %r10, %r11 # encoding: [0x18,0xab] #CHECK: lr %r12, %r13 # encoding: [0x18,0xcd] #CHECK: lr %r14, %r15 # encoding: [0x18,0xef] +#CHECK: lr %r0, %r15 # encoding: [0x18,0x0f] lr %r0,%r1 lr %r2,%r3 @@ -17,6 +18,7 @@ lr %r10,%r11 lr %r12,%r13 lr %r14,%r15 + lr 0,15 #CHECK: lgr %r0, %r1 # encoding: [0xb9,0x04,0x00,0x01] #CHECK: lgr %r2, %r3 # encoding: [0xb9,0x04,0x00,0x23] @@ -26,6 +28,7 @@ #CHECK: lgr %r10, %r11 # encoding: [0xb9,0x04,0x00,0xab] #CHECK: lgr %r12, %r13 # encoding: [0xb9,0x04,0x00,0xcd] #CHECK: lgr %r14, %r15 # encoding: [0xb9,0x04,0x00,0xef] +#CHECK: lgr %r0, %r15 # encoding: [0xb9,0x04,0x00,0x0f] lgr %r0,%r1 lgr %r2,%r3 @@ -35,6 +38,7 @@ lgr %r10,%r11 lgr %r12,%r13 lgr %r14,%r15 + lgr 0,15 #CHECK: dlr %r0, %r0 # encoding: [0xb9,0x97,0x00,0x00] #CHECK: dlr %r2, %r0 # encoding: [0xb9,0x97,0x00,0x20] @@ -43,6 +47,7 @@ #CHECK: dlr %r8, %r0 # encoding: [0xb9,0x97,0x00,0x80] #CHECK: dlr %r10, %r0 # encoding: [0xb9,0x97,0x00,0xa0] #CHECK: dlr %r12, %r0 # encoding: [0xb9,0x97,0x00,0xc0] +#CHECK: dlr %r14, %r0 # encoding: [0xb9,0x97,0x00,0xe0] #CHECK: dlr %r14, %r0 # encoding: [0xb9,0x97,0x00,0xe0] dlr %r0,%r0 @@ -53,6 +58,7 @@ dlr %r10,%r0 dlr %r12,%r0 dlr %r14,%r0 + dlr 14,0 #CHECK: ler %f0, %f1 # encoding: [0x38,0x01] #CHECK: ler %f2, %f3 # encoding: [0x38,0x23] @@ -62,6 +68,7 @@ #CHECK: ler %f10, %f11 # encoding: [0x38,0xab] #CHECK: ler %f12, %f13 # encoding: [0x38,0xcd] #CHECK: ler %f14, %f15 # encoding: [0x38,0xef] +#CHECK: ler %f0, %f15 # encoding: [0x38,0x0f] ler %f0,%f1 ler %f2,%f3 @@ -71,6 +78,7 @@ ler %f10,%f11 ler %f12,%f13 ler %f14,%f15 + ler 0,15 #CHECK: ldr %f0, %f1 # encoding: [0x28,0x01] #CHECK: ldr %f2, %f3 # encoding: [0x28,0x23] @@ -80,6 +88,7 @@ #CHECK: ldr %f10, %f11 # encoding: [0x28,0xab] #CHECK: ldr %f12, %f13 # encoding: [0x28,0xcd] #CHECK: ldr %f14, %f15 # encoding: [0x28,0xef] +#CHECK: ldr %f0, %f15 # encoding: [0x28,0x0f] ldr %f0,%f1 ldr %f2,%f3 @@ -89,16 +98,19 @@ ldr %f10,%f11 ldr %f12,%f13 ldr %f14,%f15 + ldr 0,15 #CHECK: lxr %f0, %f1 # encoding: [0xb3,0x65,0x00,0x01] #CHECK: lxr %f4, %f5 # encoding: [0xb3,0x65,0x00,0x45] #CHECK: lxr %f8, %f9 # encoding: [0xb3,0x65,0x00,0x89] #CHECK: lxr %f12, %f13 # encoding: [0xb3,0x65,0x00,0xcd] +#CHECK: lxr %f0, %f13 # encoding: [0xb3,0x65,0x00,0x0d] lxr %f0,%f1 lxr %f4,%f5 lxr %f8,%f9 lxr %f12,%f13 + lxr 0,13 #CHECK: cpya %a0, %a1 # encoding: [0xb2,0x4d,0x00,0x01] #CHECK: cpya %a2, %a3 # encoding: [0xb2,0x4d,0x00,0x23] @@ -108,6 +120,7 @@ #CHECK: cpya %a10, %a11 # encoding: [0xb2,0x4d,0x00,0xab] #CHECK: cpya %a12, %a13 # encoding: [0xb2,0x4d,0x00,0xcd] #CHECK: cpya %a14, %a15 # encoding: [0xb2,0x4d,0x00,0xef] +#CHECK: cpya %a0, %a15 # encoding: [0xb2,0x4d,0x00,0x0f] cpya %a0,%a1 cpya %a2,%a3 @@ -117,6 +130,7 @@ cpya %a10,%a11 cpya %a12,%a13 cpya %a14,%a15 + cpya 0,15 #CHECK: lctl %c0, %c1, 0 # encoding: [0xb7,0x01,0x00,0x00] #CHECK: lctl %c2, %c3, 0 # encoding: [0xb7,0x23,0x00,0x00] @@ -126,6 +140,7 @@ #CHECK: lctl %c10, %c11, 0 # encoding: [0xb7,0xab,0x00,0x00] #CHECK: lctl %c12, %c13, 0 # encoding: [0xb7,0xcd,0x00,0x00] #CHECK: lctl %c14, %c15, 0 # encoding: [0xb7,0xef,0x00,0x00] +#CHECK: lctl %c0, %c15, 0 # encoding: [0xb7,0x0f,0x00,0x00] lctl %c0,%c1,0 lctl %c2,%c3,0 @@ -135,6 +150,7 @@ lctl %c10,%c11,0 lctl %c12,%c13,0 lctl %c14,%c15,0 + lctl 0,15,0 #CHECK: .cfi_offset %r0, 0 diff --git a/llvm/test/MC/WebAssembly/globals.s b/llvm/test/MC/WebAssembly/globals.s new file mode 100644 index 0000000000000..10d696b7090a7 --- /dev/null +++ b/llvm/test/MC/WebAssembly/globals.s @@ -0,0 +1,79 @@ +# RUN: llvm-mc -triple=wasm32-unknown-unknown < %s | FileCheck %s +# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj < %s | obj2yaml | FileCheck -check-prefix=BIN %s + +# Tests creating an accessing actual wasm globals + +.globl read_global +.globl write_global +.globaltype foo_global, i32 +.globaltype global2, i64 +.globaltype global3, f32 +.globaltype global4, f64 + +read_global: + .functype read_global () -> (i32) + global.get foo_global + end_function + +write_global: + .functype write_global (i32) -> () + local.get 0 + global.set foo_global + global.set global2 + global.set global3 + global.set global4 + end_function + +foo_global: +global2: +global3: +global4: + +# CHECK: .globl read_global +# CNEXT: .globl write_global +# CHECK: .globaltype foo_global, i32 +# CHECK: foo_global: + +# BIN: - Type: GLOBAL +# BIN-NEXT: Globals: +# BIN-NEXT: - Index: 0 +# BIN-NEXT: Type: I32 +# BIN-NEXT: Mutable: true +# BIN-NEXT: InitExpr: +# BIN-NEXT: Opcode: I32_CONST +# BIN-NEXT: Value: 0 + +# BIN: - Type: CUSTOM +# BIN-NEXT: Name: linking +# BIN-NEXT: Version: 2 +# BIN-NEXT: SymbolTable: +# BIN-NEXT: - Index: 0 +# BIN-NEXT: Kind: FUNCTION +# BIN-NEXT: Name: read_global +# BIN-NEXT: Flags: [ ] +# BIN-NEXT: Function: 0 +# BIN-NEXT: - Index: 1 +# BIN-NEXT: Kind: FUNCTION +# BIN-NEXT: Name: write_global +# BIN-NEXT: Flags: [ ] +# BIN-NEXT: Function: 1 +# BIN-NEXT: - Index: 2 +# BIN-NEXT: Kind: GLOBAL +# BIN-NEXT: Name: foo_global +# BIN-NEXT: Flags: [ BINDING_LOCAL ] +# BIN-NEXT: Global: 0 +# BIN-NEXT: - Index: 3 +# BIN-NEXT: Kind: GLOBAL +# BIN-NEXT: Name: global2 +# BIN-NEXT: Flags: [ BINDING_LOCAL ] +# BIN-NEXT: Global: 1 +# BIN-NEXT: - Index: 4 +# BIN-NEXT: Kind: GLOBAL +# BIN-NEXT: Name: global3 +# BIN-NEXT: Flags: [ BINDING_LOCAL ] +# BIN-NEXT: Global: 2 +# BIN-NEXT: - Index: 5 +# BIN-NEXT: Kind: GLOBAL +# BIN-NEXT: Name: global4 +# BIN-NEXT: Flags: [ BINDING_LOCAL ] +# BIN-NEXT: Global: 3 diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index 023660ea4dca1..14b15cf4fb4a6 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -6,17 +6,47 @@ main: # CHECK: v128.load 48 # encoding: [0xfd,0x00,0x04,0x30] v128.load 48 - # CHECK: v128.store 48 # encoding: [0xfd,0x01,0x04,0x30] + # CHECK: i16x8.load8x8_s 32 # encoding: [0xfd,0x01,0x03,0x20] + i16x8.load8x8_s 32 + + # CHECK: i16x8.load8x8_u 32 # encoding: [0xfd,0x02,0x03,0x20] + i16x8.load8x8_u 32 + + # CHECK: i32x4.load16x4_s 32 # encoding: [0xfd,0x03,0x03,0x20] + i32x4.load16x4_s 32 + + # CHECK: i32x4.load16x4_u 32 # encoding: [0xfd,0x04,0x03,0x20] + i32x4.load16x4_u 32 + + # CHECK: i64x2.load32x2_s 32 # encoding: [0xfd,0x05,0x03,0x20] + i64x2.load32x2_s 32 + + # CHECK: i64x2.load32x2_u 32 # encoding: [0xfd,0x06,0x03,0x20] + i64x2.load32x2_u 32 + + # CHECK: v8x16.load_splat 48 # encoding: [0xfd,0x07,0x00,0x30] + v8x16.load_splat 48 + + # CHECK: v16x8.load_splat 48 # encoding: [0xfd,0x08,0x01,0x30] + v16x8.load_splat 48 + + # CHECK: v32x4.load_splat 48 # encoding: [0xfd,0x09,0x02,0x30] + v32x4.load_splat 48 + + # CHECK: v64x2.load_splat 48 # encoding: [0xfd,0x0a,0x03,0x30] + v64x2.load_splat 48 + + # CHECK: v128.store 48 # encoding: [0xfd,0x0b,0x04,0x30] v128.store 48 # CHECK: v128.const 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 - # CHECK-SAME: # encoding: [0xfd,0x02, + # CHECK-SAME: # encoding: [0xfd,0x0c, # CHECK-SAME: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, # CHECK-SAME: 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f] v128.const 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 # CHECK: v128.const 256, 770, 1284, 1798, 2312, 2826, 3340, 3854 - # CHECK-SAME: # encoding: [0xfd,0x02, + # CHECK-SAME: # encoding: [0xfd,0x0c, # CHECK-SAME: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, # CHECK-SAME: 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f] v128.const 256, 770, 1284, 1798, 2312, 2826, 3340, 3854 @@ -25,577 +55,535 @@ main: # CHECK: v128.const 0x1.0402p-121, 0x1.0c0a08p-113, # CHECK-SAME: 0x1.14121p-105, 0x1.1c1a18p-97 - # CHECK-SAME: # encoding: [0xfd,0x02, + # CHECK-SAME: # encoding: [0xfd,0x0c, # CHECK-SAME: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, # CHECK-SAME: 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f] v128.const 0x1.0402p-121, 0x1.0c0a08p-113, 0x1.14121p-105, 0x1.1c1a18p-97 # CHECK: v128.const 0x1.60504030201p-911, 0x1.e0d0c0b0a0908p-783 - # CHECK-SAME: # encoding: [0xfd,0x02, + # CHECK-SAME: # encoding: [0xfd,0x0c, # CHECK-SAME: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, # CHECK-SAME: 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f] v128.const 0x1.60504030201p-911, 0x1.e0d0c0b0a0908p-783 # CHECK: v8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, # CHECK-SAME: 8, 25, 10, 27, 12, 29, 14, 31 - # CHECK-SAME: # encoding: [0xfd,0x03, + # CHECK-SAME: # encoding: [0xfd,0x0d, # CHECK-SAME: 0x00,0x11,0x02,0x13,0x04,0x15,0x06,0x17, # CHECK-SAME: 0x08,0x19,0x0a,0x1b,0x0c,0x1d,0x0e,0x1f] v8x16.shuffle 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31 - # CHECK: i8x16.splat # encoding: [0xfd,0x04] + # CHECK: v8x16.swizzle # encoding: [0xfd,0x0e] + v8x16.swizzle + + # CHECK: i8x16.splat # encoding: [0xfd,0x0f] i8x16.splat - # CHECK: i8x16.extract_lane_s 15 # encoding: [0xfd,0x05,0x0f] + # CHECK: i16x8.splat # encoding: [0xfd,0x10] + i16x8.splat + + # CHECK: i32x4.splat # encoding: [0xfd,0x11] + i32x4.splat + + # CHECK: i64x2.splat # encoding: [0xfd,0x12] + i64x2.splat + + # CHECK: f32x4.splat # encoding: [0xfd,0x13] + f32x4.splat + + # CHECK: f64x2.splat # encoding: [0xfd,0x14] + f64x2.splat + + # CHECK: i8x16.extract_lane_s 15 # encoding: [0xfd,0x15,0x0f] i8x16.extract_lane_s 15 - # CHECK: i8x16.extract_lane_u 15 # encoding: [0xfd,0x06,0x0f] + # CHECK: i8x16.extract_lane_u 15 # encoding: [0xfd,0x16,0x0f] i8x16.extract_lane_u 15 - # CHECK: i8x16.replace_lane 15 # encoding: [0xfd,0x07,0x0f] + # CHECK: i8x16.replace_lane 15 # encoding: [0xfd,0x17,0x0f] i8x16.replace_lane 15 - # CHECK: i16x8.splat # encoding: [0xfd,0x08] - i16x8.splat - - # CHECK: i16x8.extract_lane_s 7 # encoding: [0xfd,0x09,0x07] + # CHECK: i16x8.extract_lane_s 7 # encoding: [0xfd,0x18,0x07] i16x8.extract_lane_s 7 - # CHECK: i16x8.extract_lane_u 7 # encoding: [0xfd,0x0a,0x07] + # CHECK: i16x8.extract_lane_u 7 # encoding: [0xfd,0x19,0x07] i16x8.extract_lane_u 7 - # CHECK: i16x8.replace_lane 7 # encoding: [0xfd,0x0b,0x07] + # CHECK: i16x8.replace_lane 7 # encoding: [0xfd,0x1a,0x07] i16x8.replace_lane 7 - # CHECK: i32x4.splat # encoding: [0xfd,0x0c] - i32x4.splat - - # CHECK: i32x4.extract_lane 3 # encoding: [0xfd,0x0d,0x03] + # CHECK: i32x4.extract_lane 3 # encoding: [0xfd,0x1b,0x03] i32x4.extract_lane 3 - # CHECK: i32x4.replace_lane 3 # encoding: [0xfd,0x0e,0x03] + # CHECK: i32x4.replace_lane 3 # encoding: [0xfd,0x1c,0x03] i32x4.replace_lane 3 - # CHECK: i64x2.splat # encoding: [0xfd,0x0f] - i64x2.splat - - # CHECK: i64x2.extract_lane 1 # encoding: [0xfd,0x10,0x01] + # CHECK: i64x2.extract_lane 1 # encoding: [0xfd,0x1d,0x01] i64x2.extract_lane 1 - # CHECK: i64x2.replace_lane 1 # encoding: [0xfd,0x11,0x01] + # CHECK: i64x2.replace_lane 1 # encoding: [0xfd,0x1e,0x01] i64x2.replace_lane 1 - # CHECK: f32x4.splat # encoding: [0xfd,0x12] - f32x4.splat - - # CHECK: f32x4.extract_lane 3 # encoding: [0xfd,0x13,0x03] + # CHECK: f32x4.extract_lane 3 # encoding: [0xfd,0x1f,0x03] f32x4.extract_lane 3 - # CHECK: f32x4.replace_lane 3 # encoding: [0xfd,0x14,0x03] + # CHECK: f32x4.replace_lane 3 # encoding: [0xfd,0x20,0x03] f32x4.replace_lane 3 - # CHECK: f64x2.splat # encoding: [0xfd,0x15] - f64x2.splat - - # CHECK: f64x2.extract_lane 1 # encoding: [0xfd,0x16,0x01] + # CHECK: f64x2.extract_lane 1 # encoding: [0xfd,0x21,0x01] f64x2.extract_lane 1 - # CHECK: f64x2.replace_lane 1 # encoding: [0xfd,0x17,0x01] + # CHECK: f64x2.replace_lane 1 # encoding: [0xfd,0x22,0x01] f64x2.replace_lane 1 - # CHECK: i8x16.eq # encoding: [0xfd,0x18] + # CHECK: i8x16.eq # encoding: [0xfd,0x23] i8x16.eq - # CHECK: i8x16.ne # encoding: [0xfd,0x19] + # CHECK: i8x16.ne # encoding: [0xfd,0x24] i8x16.ne - # CHECK: i8x16.lt_s # encoding: [0xfd,0x1a] + # CHECK: i8x16.lt_s # encoding: [0xfd,0x25] i8x16.lt_s - # CHECK: i8x16.lt_u # encoding: [0xfd,0x1b] + # CHECK: i8x16.lt_u # encoding: [0xfd,0x26] i8x16.lt_u - # CHECK: i8x16.gt_s # encoding: [0xfd,0x1c] + # CHECK: i8x16.gt_s # encoding: [0xfd,0x27] i8x16.gt_s - # CHECK: i8x16.gt_u # encoding: [0xfd,0x1d] + # CHECK: i8x16.gt_u # encoding: [0xfd,0x28] i8x16.gt_u - # CHECK: i8x16.le_s # encoding: [0xfd,0x1e] + # CHECK: i8x16.le_s # encoding: [0xfd,0x29] i8x16.le_s - # CHECK: i8x16.le_u # encoding: [0xfd,0x1f] + # CHECK: i8x16.le_u # encoding: [0xfd,0x2a] i8x16.le_u - # CHECK: i8x16.ge_s # encoding: [0xfd,0x20] + # CHECK: i8x16.ge_s # encoding: [0xfd,0x2b] i8x16.ge_s - # CHECK: i8x16.ge_u # encoding: [0xfd,0x21] + # CHECK: i8x16.ge_u # encoding: [0xfd,0x2c] i8x16.ge_u - # CHECK: i16x8.eq # encoding: [0xfd,0x22] + # CHECK: i16x8.eq # encoding: [0xfd,0x2d] i16x8.eq - # CHECK: i16x8.ne # encoding: [0xfd,0x23] + # CHECK: i16x8.ne # encoding: [0xfd,0x2e] i16x8.ne - # CHECK: i16x8.lt_s # encoding: [0xfd,0x24] + # CHECK: i16x8.lt_s # encoding: [0xfd,0x2f] i16x8.lt_s - # CHECK: i16x8.lt_u # encoding: [0xfd,0x25] + # CHECK: i16x8.lt_u # encoding: [0xfd,0x30] i16x8.lt_u - # CHECK: i16x8.gt_s # encoding: [0xfd,0x26] + # CHECK: i16x8.gt_s # encoding: [0xfd,0x31] i16x8.gt_s - # CHECK: i16x8.gt_u # encoding: [0xfd,0x27] + # CHECK: i16x8.gt_u # encoding: [0xfd,0x32] i16x8.gt_u - # CHECK: i16x8.le_s # encoding: [0xfd,0x28] + # CHECK: i16x8.le_s # encoding: [0xfd,0x33] i16x8.le_s - # CHECK: i16x8.le_u # encoding: [0xfd,0x29] + # CHECK: i16x8.le_u # encoding: [0xfd,0x34] i16x8.le_u - # CHECK: i16x8.ge_s # encoding: [0xfd,0x2a] + # CHECK: i16x8.ge_s # encoding: [0xfd,0x35] i16x8.ge_s - # CHECK: i16x8.ge_u # encoding: [0xfd,0x2b] + # CHECK: i16x8.ge_u # encoding: [0xfd,0x36] i16x8.ge_u - # CHECK: i32x4.eq # encoding: [0xfd,0x2c] + # CHECK: i32x4.eq # encoding: [0xfd,0x37] i32x4.eq - # CHECK: i32x4.ne # encoding: [0xfd,0x2d] + # CHECK: i32x4.ne # encoding: [0xfd,0x38] i32x4.ne - # CHECK: i32x4.lt_s # encoding: [0xfd,0x2e] + # CHECK: i32x4.lt_s # encoding: [0xfd,0x39] i32x4.lt_s - # CHECK: i32x4.lt_u # encoding: [0xfd,0x2f] + # CHECK: i32x4.lt_u # encoding: [0xfd,0x3a] i32x4.lt_u - # CHECK: i32x4.gt_s # encoding: [0xfd,0x30] + # CHECK: i32x4.gt_s # encoding: [0xfd,0x3b] i32x4.gt_s - # CHECK: i32x4.gt_u # encoding: [0xfd,0x31] + # CHECK: i32x4.gt_u # encoding: [0xfd,0x3c] i32x4.gt_u - # CHECK: i32x4.le_s # encoding: [0xfd,0x32] + # CHECK: i32x4.le_s # encoding: [0xfd,0x3d] i32x4.le_s - # CHECK: i32x4.le_u # encoding: [0xfd,0x33] + # CHECK: i32x4.le_u # encoding: [0xfd,0x3e] i32x4.le_u - # CHECK: i32x4.ge_s # encoding: [0xfd,0x34] + # CHECK: i32x4.ge_s # encoding: [0xfd,0x3f] i32x4.ge_s - # CHECK: i32x4.ge_u # encoding: [0xfd,0x35] + # CHECK: i32x4.ge_u # encoding: [0xfd,0x40] i32x4.ge_u - # CHECK: f32x4.eq # encoding: [0xfd,0x40] + # CHECK: f32x4.eq # encoding: [0xfd,0x41] f32x4.eq - # CHECK: f32x4.ne # encoding: [0xfd,0x41] + # CHECK: f32x4.ne # encoding: [0xfd,0x42] f32x4.ne - # CHECK: f32x4.lt # encoding: [0xfd,0x42] + # CHECK: f32x4.lt # encoding: [0xfd,0x43] f32x4.lt - # CHECK: f32x4.gt # encoding: [0xfd,0x43] + # CHECK: f32x4.gt # encoding: [0xfd,0x44] f32x4.gt - # CHECK: f32x4.le # encoding: [0xfd,0x44] + # CHECK: f32x4.le # encoding: [0xfd,0x45] f32x4.le - # CHECK: f32x4.ge # encoding: [0xfd,0x45] + # CHECK: f32x4.ge # encoding: [0xfd,0x46] f32x4.ge - # CHECK: f64x2.eq # encoding: [0xfd,0x46] + # CHECK: f64x2.eq # encoding: [0xfd,0x47] f64x2.eq - # CHECK: f64x2.ne # encoding: [0xfd,0x47] + # CHECK: f64x2.ne # encoding: [0xfd,0x48] f64x2.ne - # CHECK: f64x2.lt # encoding: [0xfd,0x48] + # CHECK: f64x2.lt # encoding: [0xfd,0x49] f64x2.lt - # CHECK: f64x2.gt # encoding: [0xfd,0x49] + # CHECK: f64x2.gt # encoding: [0xfd,0x4a] f64x2.gt - # CHECK: f64x2.le # encoding: [0xfd,0x4a] + # CHECK: f64x2.le # encoding: [0xfd,0x4b] f64x2.le - # CHECK: f64x2.ge # encoding: [0xfd,0x4b] + # CHECK: f64x2.ge # encoding: [0xfd,0x4c] f64x2.ge - # CHECK: v128.not # encoding: [0xfd,0x4c] + # CHECK: v128.not # encoding: [0xfd,0x4d] v128.not - # CHECK: v128.and # encoding: [0xfd,0x4d] + # CHECK: v128.and # encoding: [0xfd,0x4e] v128.and - # CHECK: v128.or # encoding: [0xfd,0x4e] + # CHECK: v128.andnot # encoding: [0xfd,0x4f] + v128.andnot + + # CHECK: v128.or # encoding: [0xfd,0x50] v128.or - # CHECK: v128.xor # encoding: [0xfd,0x4f] + # CHECK: v128.xor # encoding: [0xfd,0x51] v128.xor - # CHECK: v128.bitselect # encoding: [0xfd,0x50] + # CHECK: v128.bitselect # encoding: [0xfd,0x52] v128.bitselect - # CHECK: i8x16.neg # encoding: [0xfd,0x51] + # CHECK: i8x16.abs # encoding: [0xfd,0x60] + i8x16.abs + + # CHECK: i8x16.neg # encoding: [0xfd,0x61] i8x16.neg - # CHECK: i8x16.any_true # encoding: [0xfd,0x52] + # CHECK: i8x16.any_true # encoding: [0xfd,0x62] i8x16.any_true - # CHECK: i8x16.all_true # encoding: [0xfd,0x53] + # CHECK: i8x16.all_true # encoding: [0xfd,0x63] i8x16.all_true - # CHECK: i8x16.shl # encoding: [0xfd,0x54] + # CHECK: i8x16.bitmask # encoding: [0xfd,0x64] + i8x16.bitmask + + # CHECK: i8x16.narrow_i16x8_s # encoding: [0xfd,0x65] + i8x16.narrow_i16x8_s + + # CHECK: i8x16.narrow_i16x8_u # encoding: [0xfd,0x66] + i8x16.narrow_i16x8_u + + # CHECK: i8x16.shl # encoding: [0xfd,0x6b] i8x16.shl - # CHECK: i8x16.shr_s # encoding: [0xfd,0x55] + # CHECK: i8x16.shr_s # encoding: [0xfd,0x6c] i8x16.shr_s - # CHECK: i8x16.shr_u # encoding: [0xfd,0x56] + # CHECK: i8x16.shr_u # encoding: [0xfd,0x6d] i8x16.shr_u - # CHECK: i8x16.add # encoding: [0xfd,0x57] + # CHECK: i8x16.add # encoding: [0xfd,0x6e] i8x16.add - # CHECK: i8x16.add_saturate_s # encoding: [0xfd,0x58] + # CHECK: i8x16.add_saturate_s # encoding: [0xfd,0x6f] i8x16.add_saturate_s - # CHECK: i8x16.add_saturate_u # encoding: [0xfd,0x59] + # CHECK: i8x16.add_saturate_u # encoding: [0xfd,0x70] i8x16.add_saturate_u - # CHECK: i8x16.sub # encoding: [0xfd,0x5a] + # CHECK: i8x16.sub # encoding: [0xfd,0x71] i8x16.sub - # CHECK: i8x16.sub_saturate_s # encoding: [0xfd,0x5b] + # CHECK: i8x16.sub_saturate_s # encoding: [0xfd,0x72] i8x16.sub_saturate_s - # CHECK: i8x16.sub_saturate_u # encoding: [0xfd,0x5c] + # CHECK: i8x16.sub_saturate_u # encoding: [0xfd,0x73] i8x16.sub_saturate_u - # CHECK: i8x16.mul # encoding: [0xfd,0x5d] + # CHECK: i8x16.mul # encoding: [0xfd,0x75] i8x16.mul - # CHECK: i8x16.min_s # encoding: [0xfd,0x5e] + # CHECK: i8x16.min_s # encoding: [0xfd,0x76] i8x16.min_s - # CHECK: i8x16.min_u # encoding: [0xfd,0x5f] + # CHECK: i8x16.min_u # encoding: [0xfd,0x77] i8x16.min_u - # CHECK: i8x16.max_s # encoding: [0xfd,0x60] + # CHECK: i8x16.max_s # encoding: [0xfd,0x78] i8x16.max_s - # CHECK: i8x16.max_u # encoding: [0xfd,0x61] + # CHECK: i8x16.max_u # encoding: [0xfd,0x79] i8x16.max_u - # CHECK: i16x8.neg # encoding: [0xfd,0x62] + # CHECK: i8x16.avgr_u # encoding: [0xfd,0x7b] + i8x16.avgr_u + + # CHECK: i16x8.abs # encoding: [0xfd,0x80,0x01] + i16x8.abs + + # CHECK: i16x8.neg # encoding: [0xfd,0x81,0x01] i16x8.neg - # CHECK: i16x8.any_true # encoding: [0xfd,0x63] + # CHECK: i16x8.any_true # encoding: [0xfd,0x82,0x01] i16x8.any_true - # CHECK: i16x8.all_true # encoding: [0xfd,0x64] + # CHECK: i16x8.all_true # encoding: [0xfd,0x83,0x01] i16x8.all_true - # CHECK: i16x8.shl # encoding: [0xfd,0x65] + # CHECK: i16x8.bitmask # encoding: [0xfd,0x84,0x01] + i16x8.bitmask + + # CHECK: i16x8.narrow_i32x4_s # encoding: [0xfd,0x85,0x01] + i16x8.narrow_i32x4_s + + # CHECK: i16x8.narrow_i32x4_u # encoding: [0xfd,0x86,0x01] + i16x8.narrow_i32x4_u + + # CHECK: i16x8.widen_low_i8x16_s # encoding: [0xfd,0x87,0x01] + i16x8.widen_low_i8x16_s + + # CHECK: i16x8.widen_high_i8x16_s # encoding: [0xfd,0x88,0x01] + i16x8.widen_high_i8x16_s + + # CHECK: i16x8.widen_low_i8x16_u # encoding: [0xfd,0x89,0x01] + i16x8.widen_low_i8x16_u + + # CHECK: i16x8.widen_high_i8x16_u # encoding: [0xfd,0x8a,0x01] + i16x8.widen_high_i8x16_u + + # CHECK: i16x8.shl # encoding: [0xfd,0x8b,0x01] i16x8.shl - # CHECK: i16x8.shr_s # encoding: [0xfd,0x66] + # CHECK: i16x8.shr_s # encoding: [0xfd,0x8c,0x01] i16x8.shr_s - # CHECK: i16x8.shr_u # encoding: [0xfd,0x67] + # CHECK: i16x8.shr_u # encoding: [0xfd,0x8d,0x01] i16x8.shr_u - # CHECK: i16x8.add # encoding: [0xfd,0x68] + # CHECK: i16x8.add # encoding: [0xfd,0x8e,0x01] i16x8.add - # CHECK: i16x8.add_saturate_s # encoding: [0xfd,0x69] + # CHECK: i16x8.add_saturate_s # encoding: [0xfd,0x8f,0x01] i16x8.add_saturate_s - # CHECK: i16x8.add_saturate_u # encoding: [0xfd,0x6a] + # CHECK: i16x8.add_saturate_u # encoding: [0xfd,0x90,0x01] i16x8.add_saturate_u - # CHECK: i16x8.sub # encoding: [0xfd,0x6b] + # CHECK: i16x8.sub # encoding: [0xfd,0x91,0x01] i16x8.sub - # CHECK: i16x8.sub_saturate_s # encoding: [0xfd,0x6c] + # CHECK: i16x8.sub_saturate_s # encoding: [0xfd,0x92,0x01] i16x8.sub_saturate_s - # CHECK: i16x8.sub_saturate_u # encoding: [0xfd,0x6d] + # CHECK: i16x8.sub_saturate_u # encoding: [0xfd,0x93,0x01] i16x8.sub_saturate_u - # CHECK: i16x8.mul # encoding: [0xfd,0x6e] + # CHECK: i16x8.mul # encoding: [0xfd,0x95,0x01] i16x8.mul - # CHECK: i16x8.min_s # encoding: [0xfd,0x6f] + # CHECK: i16x8.min_s # encoding: [0xfd,0x96,0x01] i16x8.min_s - # CHECK: i16x8.min_u # encoding: [0xfd,0x70] + # CHECK: i16x8.min_u # encoding: [0xfd,0x97,0x01] i16x8.min_u - # CHECK: i16x8.max_s # encoding: [0xfd,0x71] + # CHECK: i16x8.max_s # encoding: [0xfd,0x98,0x01] i16x8.max_s - # CHECK: i16x8.max_u # encoding: [0xfd,0x72] + # CHECK: i16x8.max_u # encoding: [0xfd,0x99,0x01] i16x8.max_u - # CHECK: i32x4.neg # encoding: [0xfd,0x73] + # CHECK: i16x8.avgr_u # encoding: [0xfd,0x9b,0x01] + i16x8.avgr_u + + # CHECK: i32x4.abs # encoding: [0xfd,0xa0,0x01] + i32x4.abs + + # CHECK: i32x4.neg # encoding: [0xfd,0xa1,0x01] i32x4.neg - # CHECK: i32x4.any_true # encoding: [0xfd,0x74] + # CHECK: i32x4.any_true # encoding: [0xfd,0xa2,0x01] i32x4.any_true - # CHECK: i32x4.all_true # encoding: [0xfd,0x75] + # CHECK: i32x4.all_true # encoding: [0xfd,0xa3,0x01] i32x4.all_true - # CHECK: i32x4.shl # encoding: [0xfd,0x76] + # CHECK: i32x4.bitmask # encoding: [0xfd,0xa4,0x01] + i32x4.bitmask + + # CHECK: i32x4.widen_low_i16x8_s # encoding: [0xfd,0xa7,0x01] + i32x4.widen_low_i16x8_s + + # CHECK: i32x4.widen_high_i16x8_s # encoding: [0xfd,0xa8,0x01] + i32x4.widen_high_i16x8_s + + # CHECK: i32x4.widen_low_i16x8_u # encoding: [0xfd,0xa9,0x01] + i32x4.widen_low_i16x8_u + + # CHECK: i32x4.widen_high_i16x8_u # encoding: [0xfd,0xaa,0x01] + i32x4.widen_high_i16x8_u + + # CHECK: i32x4.shl # encoding: [0xfd,0xab,0x01] i32x4.shl - # CHECK: i32x4.shr_s # encoding: [0xfd,0x77] + # CHECK: i32x4.shr_s # encoding: [0xfd,0xac,0x01] i32x4.shr_s - # CHECK: i32x4.shr_u # encoding: [0xfd,0x78] + # CHECK: i32x4.shr_u # encoding: [0xfd,0xad,0x01] i32x4.shr_u - # CHECK: i32x4.add # encoding: [0xfd,0x79] + # CHECK: i32x4.add # encoding: [0xfd,0xae,0x01] i32x4.add - # CHECK: i32x4.sub # encoding: [0xfd,0x7c] + # CHECK: i32x4.sub # encoding: [0xfd,0xb1,0x01] i32x4.sub - # CHECK: i32x4.mul # encoding: [0xfd,0x7f] + # CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xb4,0x01] + i32x4.dot_i16x8_s + + # CHECK: i32x4.mul # encoding: [0xfd,0xb5,0x01] i32x4.mul - # CHECK: i32x4.min_s # encoding: [0xfd,0x80,0x01] + # CHECK: i32x4.min_s # encoding: [0xfd,0xb6,0x01] i32x4.min_s - # CHECK: i32x4.min_u # encoding: [0xfd,0x81,0x01] + # CHECK: i32x4.min_u # encoding: [0xfd,0xb7,0x01] i32x4.min_u - # CHECK: i32x4.max_s # encoding: [0xfd,0x82,0x01] + # CHECK: i32x4.max_s # encoding: [0xfd,0xb8,0x01] i32x4.max_s - # CHECK: i32x4.max_u # encoding: [0xfd,0x83,0x01] + # CHECK: i32x4.max_u # encoding: [0xfd,0xb9,0x01] i32x4.max_u - # CHECK: i64x2.neg # encoding: [0xfd,0x84,0x01] + # CHECK: i64x2.neg # encoding: [0xfd,0xc1,0x01] i64x2.neg - # CHECK: i64x2.any_true # encoding: [0xfd,0x85,0x01] + # CHECK: i64x2.any_true # encoding: [0xfd,0xc2,0x01] i64x2.any_true - # CHECK: i64x2.all_true # encoding: [0xfd,0x86,0x01] + # CHECK: i64x2.all_true # encoding: [0xfd,0xc3,0x01] i64x2.all_true - # CHECK: i64x2.shl # encoding: [0xfd,0x87,0x01] + # CHECK: i64x2.shl # encoding: [0xfd,0xcb,0x01] i64x2.shl - # CHECK: i64x2.shr_s # encoding: [0xfd,0x88,0x01] + # CHECK: i64x2.shr_s # encoding: [0xfd,0xcc,0x01] i64x2.shr_s - # CHECK: i64x2.shr_u # encoding: [0xfd,0x89,0x01] + # CHECK: i64x2.shr_u # encoding: [0xfd,0xcd,0x01] i64x2.shr_u - # CHECK: i64x2.add # encoding: [0xfd,0x8a,0x01] + # CHECK: i64x2.add # encoding: [0xfd,0xce,0x01] i64x2.add - # CHECK: i64x2.sub # encoding: [0xfd,0x8d,0x01] + # CHECK: i64x2.sub # encoding: [0xfd,0xd1,0x01] i64x2.sub - # CHECK: f32x4.abs # encoding: [0xfd,0x95,0x01] + # CHECK: f32x4.abs # encoding: [0xfd,0xe0,0x01] f32x4.abs - # CHECK: f32x4.neg # encoding: [0xfd,0x96,0x01] + # CHECK: f32x4.neg # encoding: [0xfd,0xe1,0x01] f32x4.neg - # CHECK: f32x4.sqrt # encoding: [0xfd,0x97,0x01] + # CHECK: f32x4.sqrt # encoding: [0xfd,0xe3,0x01] f32x4.sqrt - # CHECK: f32x4.qfma # encoding: [0xfd,0x98,0x01] - f32x4.qfma - - # CHECK: f32x4.qfms # encoding: [0xfd,0x99,0x01] - f32x4.qfms - - # CHECK: f32x4.add # encoding: [0xfd,0x9a,0x01] + # CHECK: f32x4.add # encoding: [0xfd,0xe4,0x01] f32x4.add - # CHECK: f32x4.sub # encoding: [0xfd,0x9b,0x01] + # CHECK: f32x4.sub # encoding: [0xfd,0xe5,0x01] f32x4.sub - # CHECK: f32x4.mul # encoding: [0xfd,0x9c,0x01] + # CHECK: f32x4.mul # encoding: [0xfd,0xe6,0x01] f32x4.mul - # CHECK: f32x4.div # encoding: [0xfd,0x9d,0x01] + # CHECK: f32x4.div # encoding: [0xfd,0xe7,0x01] f32x4.div - # CHECK: f32x4.min # encoding: [0xfd,0x9e,0x01] + # CHECK: f32x4.min # encoding: [0xfd,0xe8,0x01] f32x4.min - # CHECK: f32x4.max # encoding: [0xfd,0x9f,0x01] + # CHECK: f32x4.max # encoding: [0xfd,0xe9,0x01] f32x4.max - # CHECK: f64x2.abs # encoding: [0xfd,0xa0,0x01] + # CHECK: f64x2.abs # encoding: [0xfd,0xec,0x01] f64x2.abs - # CHECK: f64x2.neg # encoding: [0xfd,0xa1,0x01] + # CHECK: f64x2.neg # encoding: [0xfd,0xed,0x01] f64x2.neg - # CHECK: f64x2.sqrt # encoding: [0xfd,0xa2,0x01] + # CHECK: f64x2.sqrt # encoding: [0xfd,0xef,0x01] f64x2.sqrt - # CHECK: f64x2.qfma # encoding: [0xfd,0xa3,0x01] - f64x2.qfma - - # CHECK: f64x2.qfms # encoding: [0xfd,0xa4,0x01] - f64x2.qfms - - # CHECK: f64x2.add # encoding: [0xfd,0xa5,0x01] + # CHECK: f64x2.add # encoding: [0xfd,0xf0,0x01] f64x2.add - # CHECK: f64x2.sub # encoding: [0xfd,0xa6,0x01] + # CHECK: f64x2.sub # encoding: [0xfd,0xf1,0x01] f64x2.sub - # CHECK: f64x2.mul # encoding: [0xfd,0xa7,0x01] + # CHECK: f64x2.mul # encoding: [0xfd,0xf2,0x01] f64x2.mul - # CHECK: f64x2.div # encoding: [0xfd,0xa8,0x01] + # CHECK: f64x2.div # encoding: [0xfd,0xf3,0x01] f64x2.div - # CHECK: f64x2.min # encoding: [0xfd,0xa9,0x01] + # CHECK: f64x2.min # encoding: [0xfd,0xf4,0x01] f64x2.min - # CHECK: f64x2.max # encoding: [0xfd,0xaa,0x01] + # CHECK: f64x2.max # encoding: [0xfd,0xf5,0x01] f64x2.max - # CHECK: i32x4.trunc_sat_f32x4_s # encoding: [0xfd,0xab,0x01] + # CHECK: i32x4.trunc_sat_f32x4_s # encoding: [0xfd,0xf8,0x01] i32x4.trunc_sat_f32x4_s - # CHECK: i32x4.trunc_sat_f32x4_u # encoding: [0xfd,0xac,0x01] + # CHECK: i32x4.trunc_sat_f32x4_u # encoding: [0xfd,0xf9,0x01] i32x4.trunc_sat_f32x4_u - # CHECK: i64x2.trunc_sat_f64x2_s # encoding: [0xfd,0xad,0x01] - i64x2.trunc_sat_f64x2_s - - # CHECK: i64x2.trunc_sat_f64x2_u # encoding: [0xfd,0xae,0x01] - i64x2.trunc_sat_f64x2_u - - # CHECK: f32x4.convert_i32x4_s # encoding: [0xfd,0xaf,0x01] + # CHECK: f32x4.convert_i32x4_s # encoding: [0xfd,0xfa,0x01] f32x4.convert_i32x4_s - # CHECK: f32x4.convert_i32x4_u # encoding: [0xfd,0xb0,0x01] + # CHECK: f32x4.convert_i32x4_u # encoding: [0xfd,0xfb,0x01] f32x4.convert_i32x4_u - # CHECK: f64x2.convert_i64x2_s # encoding: [0xfd,0xb1,0x01] - f64x2.convert_i64x2_s - - # CHECK: f64x2.convert_i64x2_u # encoding: [0xfd,0xb2,0x01] - f64x2.convert_i64x2_u - - # CHECK: v8x16.swizzle # encoding: [0xfd,0xc0,0x01] - v8x16.swizzle - - # CHECK: v8x16.load_splat 48 # encoding: [0xfd,0xc2,0x01,0x00,0x30] - v8x16.load_splat 48 - - # CHECK: v16x8.load_splat 48 # encoding: [0xfd,0xc3,0x01,0x01,0x30] - v16x8.load_splat 48 - - # CHECK: v32x4.load_splat 48 # encoding: [0xfd,0xc4,0x01,0x02,0x30] - v32x4.load_splat 48 - - # CHECK: v64x2.load_splat 48 # encoding: [0xfd,0xc5,0x01,0x03,0x30] - v64x2.load_splat 48 - - # CHECK: i8x16.narrow_i16x8_s # encoding: [0xfd,0xc6,0x01] - i8x16.narrow_i16x8_s - - # CHECK: i8x16.narrow_i16x8_u # encoding: [0xfd,0xc7,0x01] - i8x16.narrow_i16x8_u - - # CHECK: i16x8.narrow_i32x4_s # encoding: [0xfd,0xc8,0x01] - i16x8.narrow_i32x4_s - - # CHECK: i16x8.narrow_i32x4_u # encoding: [0xfd,0xc9,0x01] - i16x8.narrow_i32x4_u - - # CHECK: i16x8.widen_low_i8x16_s # encoding: [0xfd,0xca,0x01] - i16x8.widen_low_i8x16_s - - # CHECK: i16x8.widen_high_i8x16_s # encoding: [0xfd,0xcb,0x01] - i16x8.widen_high_i8x16_s - - # CHECK: i16x8.widen_low_i8x16_u # encoding: [0xfd,0xcc,0x01] - i16x8.widen_low_i8x16_u - - # CHECK: i16x8.widen_high_i8x16_u # encoding: [0xfd,0xcd,0x01] - i16x8.widen_high_i8x16_u - - # CHECK: i32x4.widen_low_i16x8_s # encoding: [0xfd,0xce,0x01] - i32x4.widen_low_i16x8_s - - # CHECK: i32x4.widen_high_i16x8_s # encoding: [0xfd,0xcf,0x01] - i32x4.widen_high_i16x8_s - - # CHECK: i32x4.widen_low_i16x8_u # encoding: [0xfd,0xd0,0x01] - i32x4.widen_low_i16x8_u - - # CHECK: i32x4.widen_high_i16x8_u # encoding: [0xfd,0xd1,0x01] - i32x4.widen_high_i16x8_u - - # CHECK: i16x8.load8x8_s 32 # encoding: [0xfd,0xd2,0x01,0x03,0x20] - i16x8.load8x8_s 32 - - # CHECK: i16x8.load8x8_u 32 # encoding: [0xfd,0xd3,0x01,0x03,0x20] - i16x8.load8x8_u 32 - - # CHECK: i32x4.load16x4_s 32 # encoding: [0xfd,0xd4,0x01,0x03,0x20] - i32x4.load16x4_s 32 - - # CHECK: i32x4.load16x4_u 32 # encoding: [0xfd,0xd5,0x01,0x03,0x20] - i32x4.load16x4_u 32 - - # CHECK: i64x2.load32x2_s 32 # encoding: [0xfd,0xd6,0x01,0x03,0x20] - i64x2.load32x2_s 32 - - # CHECK: i64x2.load32x2_u 32 # encoding: [0xfd,0xd7,0x01,0x03,0x20] - i64x2.load32x2_u 32 - - # CHECK: v128.andnot # encoding: [0xfd,0xd8,0x01] - v128.andnot - - # CHECK: i8x16.avgr_u # encoding: [0xfd,0xd9,0x01] - i8x16.avgr_u - - # CHECK: i16x8.avgr_u # encoding: [0xfd,0xda,0x01] - i16x8.avgr_u - - # CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xdb,0x01] - i32x4.dot_i16x8_s - - # CHECK: i8x16.abs # encoding: [0xfd,0xe1,0x01] - i8x16.abs - - # CHECK: i16x8.abs # encoding: [0xfd,0xe2,0x01] - i16x8.abs - - # CHECK: i32x4.abs # encoding: [0xfd,0xe3,0x01] - i32x4.abs + # CHECK: f32x4.qfma # encoding: [0xfd,0xfc,0x01] + f32x4.qfma - # CHECK: i8x16.bitmask # encoding: [0xfd,0xe4,0x01] - i8x16.bitmask + # CHECK: f32x4.qfms # encoding: [0xfd,0xfd,0x01] + f32x4.qfms - # CHECK: i16x8.bitmask # encoding: [0xfd,0xe5,0x01] - i16x8.bitmask + # CHECK: f64x2.qfma # encoding: [0xfd,0xfe,0x01] + f64x2.qfma - # CHECK: i32x4.bitmask # encoding: [0xfd,0xe6,0x01] - i32x4.bitmask + # CHECK: f64x2.qfms # encoding: [0xfd,0xff,0x01] + f64x2.qfms end_function diff --git a/llvm/test/MC/X86/tlsdesc-64.s b/llvm/test/MC/X86/tlsdesc-64.s index 5c4acd234244e..eaec13785b3a5 100644 --- a/llvm/test/MC/X86/tlsdesc-64.s +++ b/llvm/test/MC/X86/tlsdesc-64.s @@ -9,7 +9,7 @@ # SYM: TLS GLOBAL DEFAULT UND a -# CHECK: 0: leaq (%rip), %rax +# CHECK: 0: leaq (%rip), %rax # 7 <{{.*}}> # CHECK-NEXT: 0000000000000003: R_X86_64_GOTPC32_TLSDESC a-0x4 # CHECK-NEXT: 7: callq *(%rax) # CHECK-NEXT: 0000000000000007: R_X86_64_TLSDESC_CALL a diff --git a/llvm/test/Other/cfg_deopt_unreach.ll b/llvm/test/Other/cfg_deopt_unreach.ll index 2b4bcd23c4004..826e4abb42bb4 100644 --- a/llvm/test/Other/cfg_deopt_unreach.ll +++ b/llvm/test/Other/cfg_deopt_unreach.ll @@ -15,24 +15,20 @@ declare i8 @llvm.experimental.deoptimize.i8(...) define i8 @callee(i1* %c) alwaysinline { -; NO-FLAGS: [shape=record,label="{%0:\l %c0 = load volatile i1, i1* %c\l br i1 %c0, label %lleft, label %lright\l|{T|F}}"]; -; DEOPT: [shape=record,label="{%0:\l %c0 = load volatile i1, i1* %c\l br i1 %c0, label %lleft, label %lright\l|{T|F}}"]; -; UNREACH: [shape=record,label="{%0:\l %c0 = load volatile i1, i1* %c\l br i1 %c0, label %lleft, label %lright\l|{T|F}}"]; -; BOTH-FLAGS-NOT: [shape=record,label="{%0:\l %c0 = load volatile i1, i1* %c\l br i1 %c0, label %lleft, label %lright\l|{T|F}}"]; %c0 = load volatile i1, i1* %c br i1 %c0, label %lleft, label %lright -; NO-FLAGS: [shape=record,label="{lleft: \l %v0 = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ \"deopt\"(i32 1)\l... ]\l ret i8 %v0\l}"]; -; DEOPT-NOT: [shape=record,label="{lleft: \l %v0 = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ \"deopt\"(i32 1)\l... ]\l ret i8 %v0\l}"]; -; UNREACH: [shape=record,label="{lleft: \l %v0 = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ \"deopt\"(i32 1)\l... ]\l ret i8 %v0\l}"]; -; BOTH-FLAGS-NOT: [shape=record,label="{lleft: \l %v0 = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ \"deopt\"(i32 1)\l... ]\l ret i8 %v0\l}"]; +; NO-FLAGS: label="{lleft: \l %v0 = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ \"deopt\"(i32 1)\l... ]\l ret i8 %v0\l}" +; DEOPT-NOT: label="{lleft: \l %v0 = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ \"deopt\"(i32 1)\l... ]\l ret i8 %v0\l}" +; UNREACH: label="{lleft: \l %v0 = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ \"deopt\"(i32 1)\l... ]\l ret i8 %v0\l}" +; BOTH-FLAGS-NOT: label="{lleft: \l %v0 = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ \"deopt\"(i32 1)\l... ]\l ret i8 %v0\l}" lleft: %v0 = call i8(...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i32 1) ] ret i8 %v0 -; NO-FLAGS: [shape=record,label="{lright: \l unreachable\l}"]; -; DEOPT: [shape=record,label="{lright: \l unreachable\l}"]; -; UNREACH-NOT: [shape=record,label="{lright: \l unreachable\l}"]; -; BOTH-FLAGS-NOT: [shape=record,label="{lright: \l unreachable\l}"]; +; NO-FLAGS: label="{lright: \l unreachable\l}" +; DEOPT: label="{lright: \l unreachable\l}" +; UNREACH-NOT: label="{lright: \l unreachable\l}" +; BOTH-FLAGS-NOT: label="{lright: \l unreachable\l}" lright: unreachable } diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 31be4111e00a7..03af86f18304d 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -61,8 +61,9 @@ ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo ; These next two can appear in any order since they are accessed as parameters ; on the same call to BlockFrequencyInfo::calculate. -; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo ; CHECK-O-DAG: Running analysis: LoopAnalysis on foo +; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA @@ -163,7 +164,6 @@ ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 2abba17f950f7..baf6f300ae7c4 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -70,8 +70,9 @@ ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo ; These next two can appear in any order since they are accessed as parameters ; on the same call to BlockFrequencyInfo::calculate. -; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo ; CHECK-O-DAG: Running analysis: LoopAnalysis on foo +; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA @@ -171,7 +172,6 @@ ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O3-NEXT: Running pass: ControlHeightReductionPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index 72dde5239b9b0..38eb73111aa5c 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -81,6 +81,7 @@ ; These next two can appear in any order since they are accessed as parameters ; on the same call to BlockFrequencyInfo::calculate. ; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O-DAG: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-DAG: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo ; CHECK-O-NEXT: Invalidating all non-preserved analyses for: @@ -137,8 +138,9 @@ ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo ; These next two can appear in any order since they are accessed as parameters ; on the same call to BlockFrequencyInfo::calculate. -; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo ; CHECK-O-DAG: Running analysis: LoopAnalysis on foo +; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O-DAG: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass @@ -209,7 +211,6 @@ ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O3-NEXT: Running pass: ControlHeightReductionPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index e7908e1cd31fc..b51eb79c1de66 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -70,8 +70,9 @@ ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo ; These next two can appear in any order since they are accessed as parameters ; on the same call to BlockFrequencyInfo::calculate. -; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo ; CHECK-O-DAG: Running analysis: LoopAnalysis on foo +; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA @@ -170,7 +171,6 @@ ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O3-NEXT: Running pass: ControlHeightReductionPass on foo diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index ffb3727dd2492..6a331bbcd9553 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -37,6 +37,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: FunctionPass Manager @@ -83,6 +84,7 @@ ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: Conditionally eliminate dead library calls ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Lazy Branch Probability Analysis @@ -187,6 +189,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Dead Global Elimination @@ -211,6 +214,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Loop Distribution +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Scalar Evolution Analysis @@ -280,6 +284,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Canonicalize natural loops @@ -307,6 +312,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Pass Arguments: @@ -314,6 +320,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index d74e381f886c9..2710e54c0065a 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -40,6 +40,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: FunctionPass Manager @@ -88,6 +89,7 @@ ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: Conditionally eliminate dead library calls ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Lazy Branch Probability Analysis @@ -192,6 +194,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Dead Global Elimination @@ -216,6 +219,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Loop Distribution +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Scalar Evolution Analysis @@ -285,6 +289,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Canonicalize natural loops @@ -312,6 +317,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Pass Arguments: @@ -319,6 +325,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index 64e22add64cb6..1015286f43344 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -37,6 +37,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: FunctionPass Manager @@ -174,6 +175,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Dead Global Elimination @@ -198,6 +200,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Loop Distribution +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Scalar Evolution Analysis @@ -267,6 +270,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Canonicalize natural loops @@ -294,6 +298,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Pass Arguments: @@ -301,6 +306,7 @@ ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Post-Dominator Tree Construction ; CHECK-NEXT: Branch Probability Analysis ; CHECK-NEXT: Block Frequency Analysis diff --git a/llvm/test/ThinLTO/X86/empty-module.ll b/llvm/test/ThinLTO/X86/empty-module.ll index 550be4df66573..3a63a65259da7 100644 --- a/llvm/test/ThinLTO/X86/empty-module.ll +++ b/llvm/test/ThinLTO/X86/empty-module.ll @@ -1,11 +1,11 @@ ; RUN: opt -module-summary -o %t.bc %s -; RUN: rm -f %t2.0 -; RUN: llvm-lto2 run %t.bc -r %t.bc,foo,pl -o %t2 -thinlto-distributed-indexes -; RUN: llvm-readobj -h %t2.0 | FileCheck %s -; RUN: llvm-nm %t2.0 2>&1 | count 0 - -; CHECK: Format: elf64-x86-64 +; RUN: rm -f %t2.* +; RUN: llvm-lto2 run %t.bc -r %t.bc,foo,pl -o %t2 -thinlto-distributed-indexes -save-temps +; Ensure lto does not emit empty combined module. +; RUN: test ! -e %t2.0 +; Ensure empty combined module has only 2 temp files. +; RUN: ls %t2.0.*.bc | count 2 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll index 5e84460b9c0c2..39108874b7f98 100644 --- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll @@ -257,12 +257,13 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ] ; CHECK: ret i8 [[LOADED]] %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst @@ -307,12 +308,13 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: dmb ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ] ; CHECK: ret i16 [[LOADED]] %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic @@ -328,9 +330,13 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva ; CHECK: [[LOOP]]: ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] @@ -340,16 +346,19 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[OLDVAL]] +; CHECK: ret i32 [[LOADED_EXIT]] %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire %old = extractvalue { i32, i1 } %pairold, 0 @@ -371,9 +380,13 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 @@ -387,16 +400,19 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: dmb ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i64 [[OLDVAL]] +; CHECK: ret i64 [[LOADED_EXIT]] %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic %old = extractvalue { i64, i1 } %pairold, 0 @@ -411,9 +427,13 @@ define i32 @test_cmpxchg_minsize(i32* %addr, i32 %desired, i32 %new) minsize { ; CHECK: [[START]]: ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ] ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[START]] @@ -423,16 +443,19 @@ define i32 @test_cmpxchg_minsize(i32* %addr, i32 %desired, i32 %new) minsize { ; CHECK: br label %[[END:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK: [[LOADED_NO_STORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ] ; CHECK: call void @llvm.arm.clrex() ; CHECK: br label %[[FAILURE_BB]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[END]] ; CHECK: [[END]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[LOADED]] +; CHECK: ret i32 [[LOADED_EXIT]] %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %oldval = extractvalue { i32, i1 } %pair, 0 diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll index 8397182e7e8ff..deecf01f24365 100644 --- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll @@ -91,9 +91,13 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr) ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 @@ -104,16 +108,19 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i8 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i8 [[OLDVAL]] +; CHECK: ret i8 [[LOADED_EXIT]] %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst %old = extractvalue { i8, i1 } %pairold, 0 @@ -129,9 +136,13 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr) ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 @@ -142,16 +153,20 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; The PHI is not required. +; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i16 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i16 [[OLDVAL]] +; CHECK: ret i16 [[LOADED_EXIT]] %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic %old = extractvalue { i16, i1 } %pairold, 0 @@ -166,9 +181,13 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva ; CHECK: [[LOOP]]: ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] @@ -178,16 +197,19 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[OLDVAL]] +; CHECK: ret i32 [[LOADED_EXIT]] %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire %old = extractvalue { i32, i1 } %pairold, 0 @@ -209,9 +231,13 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]] ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ] ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32 @@ -225,16 +251,19 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] ; CHECK: [[FAILURE_BB]]: +; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ] ; CHECK-NOT: fence_cst ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i64 [[OLDVAL]] +; CHECK: ret i64 [[LOADED_EXIT]] %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic %old = extractvalue { i64, i1 } %pairold, 0 diff --git a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll index 375b41a26dbd4..828c1c4cb3b7e 100644 --- a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll @@ -15,6 +15,7 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) { ; CHECK: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ] ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]] @@ -24,16 +25,19 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) { ; CHECK: br label %[[END:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ] ; CHECK: call void @llvm.arm.clrex() ; CHECK: br label %[[FAILURE_BB]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ], [ [[LOADED_TRYSTORE]], %[[TRY_STORE]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[END]] ; CHECK: [[END]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[LOADED]] +; CHECK: ret i32 [[LOADED_EXIT]] %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %oldval = extractvalue { i32, i1 } %pair, 0 @@ -87,9 +91,13 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) { ; CHECK: [[START]]: ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK-NEXT: br label %[[TRY_STORE]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ] ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]] @@ -99,16 +107,19 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) { ; CHECK: br label %[[END:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ] ; CHECK: call void @llvm.arm.clrex() ; CHECK: br label %[[FAILURE_BB]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ], [ [[LOADED_TRYSTORE]], %[[TRY_STORE]] ] ; CHECK-NOT: dmb ; CHECK: br label %[[END]] ; CHECK: [[END]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[LOADED]] +; CHECK: ret i32 [[LOADED_EXIT]] %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic %oldval = extractvalue { i32, i1 } %pair, 0 @@ -129,6 +140,7 @@ define i32 @test_cmpxchg_seq_cst_minsize(i32* %addr, i32 %desired, i32 %new) min ; CHECK: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: +; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ] ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]] @@ -138,16 +150,19 @@ define i32 @test_cmpxchg_seq_cst_minsize(i32* %addr, i32 %desired, i32 %new) min ; CHECK: br label %[[END:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ] ; CHECK: call void @llvm.arm.clrex() ; CHECK: br label %[[FAILURE_BB]] ; CHECK: [[FAILURE_BB]]: +; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ], [ [[LOADED_TRYSTORE]], %[[TRY_STORE]] ] ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[END]] ; CHECK: [[END]]: +; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ] ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i32 [[LOADED]] +; CHECK: ret i32 [[LOADED_EXIT]] %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %oldval = extractvalue { i32, i1 } %pair, 0 diff --git a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll index 74c05615d0b9a..999fa1541f565 100644 --- a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll +++ b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll @@ -39,12 +39,12 @@ target triple = "sparcv9-unknown-unknown" ; CHECK: %17 = icmp ne i32 %10, %16 ; CHECK: br i1 %17, label %partword.cmpxchg.loop, label %partword.cmpxchg.end ; CHECK:partword.cmpxchg.end: -; CHECK: %18 = lshr i32 %14, %ShiftAmt -; CHECK: %19 = trunc i32 %18 to i8 -; CHECK: %20 = insertvalue { i8, i1 } undef, i8 %19, 0 -; CHECK: %21 = insertvalue { i8, i1 } %20, i1 %15, 1 +; CHECK: %shifted = lshr i32 %14, %ShiftAmt +; CHECK: %extracted = trunc i32 %shifted to i8 +; CHECK: %18 = insertvalue { i8, i1 } undef, i8 %extracted, 0 +; CHECK: %19 = insertvalue { i8, i1 } %18, i1 %15, 1 ; CHECK: fence seq_cst -; CHECK: %ret = extractvalue { i8, i1 } %21, 0 +; CHECK: %ret = extractvalue { i8, i1 } %19, 0 ; CHECK: ret i8 %ret define i8 @test_cmpxchg_i8(i8* %arg, i8 %old, i8 %new) { entry: @@ -84,12 +84,12 @@ entry: ; CHECK: %17 = icmp ne i32 %10, %16 ; CHECK: br i1 %17, label %partword.cmpxchg.loop, label %partword.cmpxchg.end ; CHECK:partword.cmpxchg.end: -; CHECK: %18 = lshr i32 %14, %ShiftAmt -; CHECK: %19 = trunc i32 %18 to i16 -; CHECK: %20 = insertvalue { i16, i1 } undef, i16 %19, 0 -; CHECK: %21 = insertvalue { i16, i1 } %20, i1 %15, 1 +; CHECK: %shifted = lshr i32 %14, %ShiftAmt +; CHECK: %extracted = trunc i32 %shifted to i16 +; CHECK: %18 = insertvalue { i16, i1 } undef, i16 %extracted, 0 +; CHECK: %19 = insertvalue { i16, i1 } %18, i1 %15, 1 ; CHECK: fence seq_cst -; CHECK: %ret = extractvalue { i16, i1 } %21, 0 +; CHECK: %ret = extractvalue { i16, i1 } %19, 0 ; CHECK: ret i16 %ret define i16 @test_cmpxchg_i16(i16* %arg, i16 %old, i16 %new) { entry: @@ -125,10 +125,10 @@ entry: ; CHECK: %newloaded = extractvalue { i32, i1 } %9, 0 ; CHECK: br i1 %success, label %atomicrmw.end, label %atomicrmw.start ; CHECK:atomicrmw.end: -; CHECK: %10 = lshr i32 %newloaded, %ShiftAmt -; CHECK: %11 = trunc i32 %10 to i16 +; CHECK: %shifted = lshr i32 %newloaded, %ShiftAmt +; CHECK: %extracted = trunc i32 %shifted to i16 ; CHECK: fence seq_cst -; CHECK: ret i16 %11 +; CHECK: ret i16 %extracted define i16 @test_add_i16(i16* %arg, i16 %val) { entry: %ret = atomicrmw add i16* %arg, i16 %val seq_cst @@ -174,15 +174,15 @@ entry: ; CHECK-LABEL: @test_min_i16( ; CHECK:atomicrmw.start: -; CHECK: %6 = lshr i32 %loaded, %ShiftAmt -; CHECK: %7 = trunc i32 %6 to i16 -; CHECK: %8 = icmp sle i16 %7, %val -; CHECK: %new = select i1 %8, i16 %7, i16 %val -; CHECK: %9 = zext i16 %new to i32 -; CHECK: %10 = shl i32 %9, %ShiftAmt -; CHECK: %11 = and i32 %loaded, %Inv_Mask -; CHECK: %12 = or i32 %11, %10 -; CHECK: %13 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %12 monotonic monotonic +; CHECK: %shifted = lshr i32 %loaded, %ShiftAmt +; CHECK: %extracted = trunc i32 %shifted to i16 +; CHECK: %6 = icmp sle i16 %extracted, %val +; CHECK: %new = select i1 %6, i16 %extracted, i16 %val +; CHECK: %extended = zext i16 %new to i32 +; CHECK: %shifted1 = shl nuw i32 %extended, %ShiftAmt +; CHECK: %unmasked = and i32 %loaded, %Inv_Mask +; CHECK: %inserted = or i32 %unmasked, %shifted1 +; CHECK: %7 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %inserted monotonic monotonic ; CHECK:atomicrmw.end: define i16 @test_min_i16(i16* %arg, i16 %val) { entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll index 77e75084059a8..1d7979c2305a5 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll @@ -31,13 +31,13 @@ entry: } define i32 @f(i32 %x) { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f -; IS__TUNIT_OPM-SAME: (i32 [[X:%.*]]) -; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[X_ADDR:%.*]] = alloca i32 -; IS__TUNIT_OPM-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 -; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) -; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] +; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@f +; NOT_TUNIT_NPM-SAME: (i32 [[X:%.*]]) +; NOT_TUNIT_NPM-NEXT: entry: +; NOT_TUNIT_NPM-NEXT: [[X_ADDR:%.*]] = alloca i32 +; NOT_TUNIT_NPM-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 +; NOT_TUNIT_NPM-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) +; NOT_TUNIT_NPM-NEXT: ret i32 [[TMP1]] ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f ; IS__TUNIT_NPM-SAME: (i32 [[X:%.*]]) @@ -48,14 +48,6 @@ define i32 @f(i32 %x) { ; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[TMP0]]) ; IS__TUNIT_NPM-NEXT: ret i32 [[TMP1]] ; -; IS__CGSCC____-LABEL: define {{[^@]+}}@f -; IS__CGSCC____-SAME: (i32 [[X:%.*]]) -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[X_ADDR:%.*]] = alloca i32 -; IS__CGSCC____-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nofree nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) -; IS__CGSCC____-NEXT: ret i32 [[TMP1]] -; entry: %x_addr = alloca i32 store i32 %x, i32* %x_addr, align 4 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll index 5fc1b94fcd986..e357272536b9d 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; PR2498 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll index 0a3ac67b31ff5..7322a984d2c2b 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll @@ -36,7 +36,7 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_OPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 @@ -48,7 +48,7 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 @@ -60,8 +60,8 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -72,8 +72,8 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -119,7 +119,7 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_OPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 @@ -131,7 +131,7 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 1 ; IS__TUNIT_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 @@ -144,8 +144,8 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -156,8 +156,8 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index f6c993f5ee98c..f9390f6cc07d4 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -41,7 +41,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -53,7 +53,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -66,8 +66,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -78,8 +78,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -128,7 +128,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -140,7 +140,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -153,8 +153,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -165,8 +165,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -215,7 +215,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -227,7 +227,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -240,8 +240,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -252,8 +252,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -302,7 +302,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -314,7 +314,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -327,8 +327,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -339,8 +339,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -387,7 +387,7 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -399,7 +399,7 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -411,8 +411,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -423,8 +423,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -471,7 +471,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -483,7 +483,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -495,8 +495,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -507,8 +507,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -557,7 +557,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -569,7 +569,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -582,8 +582,8 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -594,8 +594,8 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -644,7 +644,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -656,7 +656,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -669,8 +669,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -681,8 +681,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll index b715e428d563a..4dc4a232ffc8c 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll @@ -50,7 +50,7 @@ define void @exportedfun(%struct.a* %a) { ; IS__TUNIT____-SAME: (%struct.a* nocapture nofree readnone [[A:%.*]]) ; IS__TUNIT____-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() ; IS__TUNIT____-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 -; IS__TUNIT____-NEXT: call x86_thiscallcc void @internalfun(%struct.a* noalias nofree readnone undef, <{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; IS__TUNIT____-NEXT: call x86_thiscallcc void @internalfun(%struct.a* noalias nocapture nofree readnone undef, <{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) ; IS__TUNIT____-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) ; IS__TUNIT____-NEXT: ret void ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll index cc1df4abcaa1f..fb8ad684943ab 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll @@ -5,11 +5,11 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM define void @f() { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f() -; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[A:%.*]] = alloca i32, align 1 -; IS__TUNIT_OPM-NEXT: call void @g(i32* noalias nocapture nonnull readonly dereferenceable(4) [[A]]) -; IS__TUNIT_OPM-NEXT: ret void +; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@f() +; NOT_TUNIT_NPM-NEXT: entry: +; NOT_TUNIT_NPM-NEXT: [[A:%.*]] = alloca i32, align 1 +; NOT_TUNIT_NPM-NEXT: call void @g(i32* noalias nocapture nonnull readonly dereferenceable(4) [[A]]) +; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f() ; IS__TUNIT_NPM-NEXT: entry: @@ -18,12 +18,6 @@ define void @f() { ; IS__TUNIT_NPM-NEXT: call void @g(i32 [[TMP0]]) ; IS__TUNIT_NPM-NEXT: ret void ; -; IS__CGSCC____-LABEL: define {{[^@]+}}@f() -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32, align 1 -; IS__CGSCC____-NEXT: call void @g(i32* noalias nonnull readonly dereferenceable(4) [[A]]) -; IS__CGSCC____-NEXT: ret void -; entry: %a = alloca i32, align 1 call void @g(i32* %a) diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll index deab6567c7f43..3f440b79c0bdf 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll @@ -41,12 +41,12 @@ define internal i32 @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { ; IS__TUNIT_NPM-NEXT: ret i32 [[A]] ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 -; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; IS__CGSCC_OPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; IS__CGSCC_OPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 ; IS__CGSCC_OPM-NEXT: store i32 0, i32* [[X]], align 4 ; IS__CGSCC_OPM-NEXT: [[L:%.*]] = load i32, i32* [[X]], align 4 ; IS__CGSCC_OPM-NEXT: [[A:%.*]] = add i32 [[L]], [[TMP2]] @@ -56,16 +56,16 @@ define internal i32 @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[X_PRIV:%.*]] = alloca i32 -; IS__CGSCC_NPM-NEXT: store i32 [[TMP2]], i32* [[X_PRIV]] +; IS__CGSCC_NPM-NEXT: store i32 [[TMP2]], i32* [[X_PRIV]], align 4 ; IS__CGSCC_NPM-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] ; IS__CGSCC_NPM-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* -; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]] +; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4 ; IS__CGSCC_NPM-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 -; IS__CGSCC_NPM-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]] +; IS__CGSCC_NPM-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4 ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 -; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; IS__CGSCC_NPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; IS__CGSCC_NPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 ; IS__CGSCC_NPM-NEXT: store i32 0, i32* [[X_PRIV]], align 4 ; IS__CGSCC_NPM-NEXT: [[L:%.*]] = load i32, i32* [[X_PRIV]], align 4 ; IS__CGSCC_NPM-NEXT: [[A:%.*]] = add i32 [[L]], [[TMP2]] @@ -122,7 +122,7 @@ define i32 @test(i32* %X) { ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[TMP1]], align 8 ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__CGSCC_OPM-NEXT: store i64 2, i64* [[TMP4]], align 4 -; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @f(%struct.ss* noalias nofree nonnull readnone byval align 8 dereferenceable(12) [[S]], i32* noalias nocapture nofree nonnull readnone byval align 4 dereferenceable(4) [[X]]) +; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree nonnull readnone byval align 8 dereferenceable(12) [[S]], i32* noalias nocapture nofree nonnull readnone byval align 4 dereferenceable(4) [[X]]) ; IS__CGSCC_OPM-NEXT: ret i32 [[C]] ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll index 25febca6cd292..88f28e41071e1 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll @@ -60,7 +60,7 @@ define internal i32 @caller(i32* %B) { ; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32 ; IS__CGSCC____-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[C]] ; %A = alloca i32 @@ -70,11 +70,11 @@ define internal i32 @caller(i32* %B) { } define i32 @callercaller() { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@callercaller() -; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i32 -; IS__TUNIT_OPM-NEXT: store i32 2, i32* [[B]], align 4 -; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) -; IS__TUNIT_OPM-NEXT: ret i32 [[X]] +; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callercaller() +; NOT_TUNIT_NPM-NEXT: [[B:%.*]] = alloca i32 +; NOT_TUNIT_NPM-NEXT: store i32 2, i32* [[B]], align 4 +; NOT_TUNIT_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; NOT_TUNIT_NPM-NEXT: ret i32 [[X]] ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callercaller() ; IS__TUNIT_NPM-NEXT: [[B:%.*]] = alloca i32 @@ -82,12 +82,6 @@ define i32 @callercaller() { ; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 1 ; IS__TUNIT_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32 [[TMP1]]) ; IS__TUNIT_NPM-NEXT: ret i32 [[X]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@callercaller() -; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32 -; IS__CGSCC____-NEXT: store i32 2, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nofree nonnull readonly align 4 dereferenceable(4) [[B]]) -; IS__CGSCC____-NEXT: ret i32 [[X]] ; %B = alloca i32 store i32 2, i32* %B diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll index cf10658bb7ef3..1fe0fe6044edd 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll @@ -8,12 +8,12 @@ define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind { ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 -; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; IS__CGSCC_OPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; IS__CGSCC_OPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 ; IS__CGSCC_OPM-NEXT: store i32 0, i32* [[X]], align 4 ; IS__CGSCC_OPM-NEXT: ret void ; @@ -21,16 +21,16 @@ define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind { ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[X_PRIV:%.*]] = alloca i32 -; IS__CGSCC_NPM-NEXT: store i32 [[TMP2]], i32* [[X_PRIV]] +; IS__CGSCC_NPM-NEXT: store i32 [[TMP2]], i32* [[X_PRIV]], align 4 ; IS__CGSCC_NPM-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] ; IS__CGSCC_NPM-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* -; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]] +; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4 ; IS__CGSCC_NPM-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 -; IS__CGSCC_NPM-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]] +; IS__CGSCC_NPM-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4 ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 -; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; IS__CGSCC_NPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; IS__CGSCC_NPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 ; IS__CGSCC_NPM-NEXT: store i32 0, i32* [[X_PRIV]], align 4 ; IS__CGSCC_NPM-NEXT: ret void ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll index 5ac448e22f704..b36744677d215 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll @@ -9,14 +9,14 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 %struct.ss = type { i32, i64 } define internal i32 @f(%struct.ss* byval %b) nounwind { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f -; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]]) -; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 -; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 -; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; IS__TUNIT_OPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 -; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] +; IS________OPM-LABEL: define {{[^@]+}}@f +; IS________OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]]) +; IS________OPM-NEXT: entry: +; IS________OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 +; IS________OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 +; IS________OPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; IS________OPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 +; IS________OPM-NEXT: ret i32 [[TMP1]] ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f ; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) @@ -32,27 +32,18 @@ define internal i32 @f(%struct.ss* byval %b) nounwind { ; IS__TUNIT_NPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 ; IS__TUNIT_NPM-NEXT: ret i32 [[TMP1]] ; -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[B:%.*]]) -; IS__CGSCC_OPM-NEXT: entry: -; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 -; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 -; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; IS__CGSCC_OPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 -; IS__CGSCC_OPM-NEXT: ret i32 [[TMP1]] -; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] ; IS__CGSCC_NPM-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* -; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]] +; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4 ; IS__CGSCC_NPM-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 -; IS__CGSCC_NPM-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]] +; IS__CGSCC_NPM-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4 ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 -; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; IS__CGSCC_NPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; IS__CGSCC_NPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 ; IS__CGSCC_NPM-NEXT: ret i32 [[TMP1]] ; entry: @@ -65,37 +56,42 @@ entry: define internal i32 @g(%struct.ss* byval align 32 %b) nounwind { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@g -; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]]) -; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 -; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 -; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; IS__TUNIT_OPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 32 -; IS__TUNIT_OPM-NEXT: ret i32 [[TMP2]] +; IS________OPM-LABEL: define {{[^@]+}}@g +; IS________OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]]) +; IS________OPM-NEXT: entry: +; IS________OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 +; IS________OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 +; IS________OPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; IS________OPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 32 +; IS________OPM-NEXT: ret i32 [[TMP2]] ; -; IS________NPM-LABEL: define {{[^@]+}}@g -; IS________NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) -; IS________NPM-NEXT: entry: -; IS________NPM-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] -; IS________NPM-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* -; IS________NPM-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]] -; IS________NPM-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 -; IS________NPM-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]] -; IS________NPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 -; IS________NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 -; IS________NPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; IS________NPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 32 -; IS________NPM-NEXT: ret i32 [[TMP2]] +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@g +; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) +; IS__TUNIT_NPM-NEXT: entry: +; IS__TUNIT_NPM-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] +; IS__TUNIT_NPM-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* +; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]] +; IS__TUNIT_NPM-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]] +; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; IS__TUNIT_NPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 32 +; IS__TUNIT_NPM-NEXT: ret i32 [[TMP2]] ; -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@g -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(4) [[B:%.*]]) -; IS__CGSCC_OPM-NEXT: entry: -; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 -; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 -; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; IS__CGSCC_OPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 32 -; IS__CGSCC_OPM-NEXT: ret i32 [[TMP2]] +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@g +; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] +; IS__CGSCC_NPM-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* +; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4 +; IS__CGSCC_NPM-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 +; IS__CGSCC_NPM-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 +; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; IS__CGSCC_NPM-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 32 +; IS__CGSCC_NPM-NEXT: ret i32 [[TMP2]] ; entry: %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 @@ -146,8 +142,8 @@ define i32 @main() nounwind { ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[TMP1]], align 32 ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__CGSCC_OPM-NEXT: store i64 2, i64* [[TMP4]], align 4 -; IS__CGSCC_OPM-NEXT: [[C0:%.*]] = call i32 @f(%struct.ss* noalias nofree nonnull readnone byval align 32 dereferenceable(12) [[S]]) -; IS__CGSCC_OPM-NEXT: [[C1:%.*]] = call i32 @g(%struct.ss* noalias nofree nonnull readnone byval align 32 dereferenceable(12) [[S]]) +; IS__CGSCC_OPM-NEXT: [[C0:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree nonnull readnone byval align 32 dereferenceable(12) [[S]]) +; IS__CGSCC_OPM-NEXT: [[C1:%.*]] = call i32 @g(%struct.ss* noalias nocapture nofree nonnull readnone byval align 32 dereferenceable(12) [[S]]) ; IS__CGSCC_OPM-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; IS__CGSCC_OPM-NEXT: ret i32 [[A]] ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll index e8fd28fa817dd..9c0e0c26a4e9d 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll index 2e6d68463771e..416169dd71451 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll @@ -28,12 +28,12 @@ define internal i32 @callee(i1 %C, i32* %P) { ; IS__TUNIT_NPM-NEXT: ret i32 [[X]] ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@callee -; IS__CGSCC____-SAME: (i32* nocapture nofree readonly [[P:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) ; IS__CGSCC____-NEXT: br label [[F:%.*]] ; IS__CGSCC____: T: ; IS__CGSCC____-NEXT: unreachable ; IS__CGSCC____: F: -; IS__CGSCC____-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; IS__CGSCC____-NEXT: [[X:%.*]] = load i32, i32* [[P]], align 4 ; IS__CGSCC____-NEXT: ret i32 [[X]] ; br i1 %C, label %T, label %F @@ -63,7 +63,7 @@ define i32 @foo() { ; IS__CGSCC____-LABEL: define {{[^@]+}}@foo() ; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32 ; IS__CGSCC____-NEXT: store i32 17, i32* [[A]], align 4 -; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @callee(i32* noalias nofree nonnull readonly align 4 dereferenceable(4) [[A]]) +; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @callee(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]]) ; IS__CGSCC____-NEXT: ret i32 [[X]] ; %A = alloca i32 ; [#uses=2] diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll index 37cb25bce5da2..885102f1d314a 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll @@ -77,9 +77,9 @@ define internal i64 @CaptureAStruct(%struct.Foo* byval %a) { ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[A_PRIV:%.*]] = alloca [[STRUCT_FOO:%.*]] ; IS__CGSCC____-NEXT: [[A_PRIV_CAST:%.*]] = bitcast %struct.Foo* [[A_PRIV]] to i32* -; IS__CGSCC____-NEXT: store i32 [[TMP0]], i32* [[A_PRIV_CAST]] +; IS__CGSCC____-NEXT: store i32 [[TMP0]], i32* [[A_PRIV_CAST]], align 4 ; IS__CGSCC____-NEXT: [[A_PRIV_0_1:%.*]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A_PRIV]], i32 0, i32 1 -; IS__CGSCC____-NEXT: store i64 [[TMP1]], i64* [[A_PRIV_0_1]] +; IS__CGSCC____-NEXT: store i64 [[TMP1]], i64* [[A_PRIV_0_1]], align 8 ; IS__CGSCC____-NEXT: [[A_PTR:%.*]] = alloca %struct.Foo* ; IS__CGSCC____-NEXT: br label [[LOOP:%.*]] ; IS__CGSCC____: loop: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll index beefa33a436af..efaf6e36a2f1b 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll @@ -40,25 +40,15 @@ entry: } define i32 @main() { -; IS__TUNIT____-LABEL: define {{[^@]+}}@main() -; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]] -; IS__TUNIT____-NEXT: [[F0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; IS__TUNIT____-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT____-NEXT: store i32 1, i32* [[F0]], align 4 -; IS__TUNIT____-NEXT: store i32 2, i32* [[F1]], align 4 -; IS__TUNIT____-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* inalloca noalias nocapture nofree nonnull align 4 dereferenceable(8) [[S]]) -; IS__TUNIT____-NEXT: ret i32 [[R]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@main() -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]] -; IS__CGSCC____-NEXT: [[F0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; IS__CGSCC____-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__CGSCC____-NEXT: store i32 1, i32* [[F0]], align 4 -; IS__CGSCC____-NEXT: store i32 2, i32* [[F1]], align 4 -; IS__CGSCC____-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* inalloca noalias nofree nonnull align 4 dereferenceable(8) [[S]]) -; IS__CGSCC____-NEXT: ret i32 [[R]] +; CHECK-LABEL: define {{[^@]+}}@main() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]] +; CHECK-NEXT: [[F0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i32 1, i32* [[F0]], align 4 +; CHECK-NEXT: store i32 2, i32* [[F1]], align 4 +; CHECK-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* inalloca noalias nocapture nofree nonnull align 4 dereferenceable(8) [[S]]) +; CHECK-NEXT: ret i32 [[R]] ; entry: %S = alloca inalloca %struct.ss diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll index bb7af74ed57ed..48259aa42533f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -20,14 +20,23 @@ define internal i32 @test(i32* %X, i32* %Y) { ; IS__TUNIT____: dead: ; IS__TUNIT____-NEXT: unreachable ; -; IS__CGSCC____-LABEL: define {{[^@]+}}@test -; IS__CGSCC____-SAME: (i32* nocapture nofree writeonly [[X:%.*]]) -; IS__CGSCC____-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] -; IS__CGSCC____: live: -; IS__CGSCC____-NEXT: store i32 0, i32* [[X]] -; IS__CGSCC____-NEXT: ret i32 undef -; IS__CGSCC____: dead: -; IS__CGSCC____-NEXT: unreachable +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test +; IS__CGSCC_OPM-SAME: (i32* nocapture nofree writeonly align 4 [[X:%.*]]) +; IS__CGSCC_OPM-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] +; IS__CGSCC_OPM: live: +; IS__CGSCC_OPM-NEXT: store i32 0, i32* [[X]], align 4 +; IS__CGSCC_OPM-NEXT: ret i32 undef +; IS__CGSCC_OPM: dead: +; IS__CGSCC_OPM-NEXT: unreachable +; +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test +; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_NPM-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] +; IS__CGSCC_NPM: live: +; IS__CGSCC_NPM-NEXT: store i32 0, i32* [[X]], align 4 +; IS__CGSCC_NPM-NEXT: ret i32 undef +; IS__CGSCC_NPM: dead: +; IS__CGSCC_NPM-NEXT: unreachable ; br i1 true, label %live, label %dead live: @@ -48,17 +57,17 @@ define internal i32 @caller(i32* %B) { ; IS__TUNIT____-NEXT: ret i32 undef ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@caller -; IS__CGSCC_OPM-SAME: (i32* nocapture nofree writeonly [[B:%.*]]) +; IS__CGSCC_OPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC_OPM-NEXT: [[A:%.*]] = alloca i32 ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree writeonly [[B]]) +; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC_OPM-NEXT: ret i32 0 ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller -; IS__CGSCC_NPM-SAME: (i32* nocapture nofree writeonly [[B:%.*]]) +; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC_NPM-NEXT: [[A:%.*]] = alloca i32 ; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree writeonly [[B]]) +; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC_NPM-NEXT: ret i32 undef ; %A = alloca i32 @@ -68,17 +77,11 @@ define internal i32 @caller(i32* %B) { } define i32 @callercaller() { -; IS__TUNIT____-LABEL: define {{[^@]+}}@callercaller() -; IS__TUNIT____-NEXT: [[B:%.*]] = alloca i32 -; IS__TUNIT____-NEXT: store i32 2, i32* [[B]], align 4 -; IS__TUNIT____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) -; IS__TUNIT____-NEXT: ret i32 0 -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@callercaller() -; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32 -; IS__CGSCC____-NEXT: store i32 2, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) -; IS__CGSCC____-NEXT: ret i32 0 +; CHECK-LABEL: define {{[^@]+}}@callercaller() +; CHECK-NEXT: [[B:%.*]] = alloca i32 +; CHECK-NEXT: store i32 2, i32* [[B]], align 4 +; CHECK-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: ret i32 0 ; %B = alloca i32 store i32 2, i32* %B diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll index a5e0140977fe4..16038889314a7 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll @@ -73,7 +73,7 @@ define i32 @caller2(%T* %g) { ; IS__TUNIT____-NEXT: ret i32 0 ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller2 -; IS__CGSCC____-SAME: (%T* nocapture nofree readonly [[G:%.*]]) +; IS__CGSCC____-SAME: (%T* nocapture nofree readonly align 4 [[G:%.*]]) ; IS__CGSCC____-NEXT: [[V:%.*]] = call i32 @test2(%T* nocapture nofree readonly [[G]], i32 0) ; IS__CGSCC____-NEXT: ret i32 [[V]] ; @@ -124,7 +124,7 @@ define i32 @caller2b(%T* %g) { ; IS__TUNIT____-NEXT: ret i32 0 ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller2b -; IS__CGSCC____-SAME: (%T* nocapture nofree readonly [[G:%.*]]) +; IS__CGSCC____-SAME: (%T* nocapture nofree readonly align 4 [[G:%.*]]) ; IS__CGSCC____-NEXT: [[V:%.*]] = call i32 @test2b(%T* nocapture nofree readonly [[G]], i32 0) ; IS__CGSCC____-NEXT: ret i32 [[V]] ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/pr32917.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/pr32917.ll index c03473bcf1160..91d6355b5abc5 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/pr32917.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/pr32917.ll @@ -13,14 +13,14 @@ define i32 @fn2() local_unnamed_addr { ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* @b, align 4 ; IS__TUNIT____-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 ; IS__TUNIT____-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32* -; IS__TUNIT____-NEXT: call fastcc void @fn1(i32* nofree readonly align 4 [[TMP3]]) +; IS__TUNIT____-NEXT: call fastcc void @fn1(i32* nocapture nofree readonly align 4 [[TMP3]]) ; IS__TUNIT____-NEXT: ret i32 undef ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@fn2() local_unnamed_addr ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* @b, align 4 ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 ; IS__CGSCC____-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32* -; IS__CGSCC____-NEXT: call fastcc void @fn1(i32* nofree nonnull readonly align 4 [[TMP3]]) +; IS__CGSCC____-NEXT: call fastcc void @fn1(i32* nocapture nofree nonnull readonly align 4 [[TMP3]]) ; IS__CGSCC____-NEXT: ret i32 undef ; %1 = load i32, i32* @b, align 4 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll index e3842c1aa213e..b4f3e00b53786 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll @@ -8,11 +8,11 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 ; Checks if !prof metadata is corret in deadargelim. define void @caller() #0 { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@caller() -; IS__TUNIT_OPM-NEXT: [[X:%.*]] = alloca i32 -; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[X]], align 4 -; IS__TUNIT_OPM-NEXT: call void @promote_i32_ptr(i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[X]]), !prof !0 -; IS__TUNIT_OPM-NEXT: ret void +; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@caller() +; NOT_TUNIT_NPM-NEXT: [[X:%.*]] = alloca i32 +; NOT_TUNIT_NPM-NEXT: store i32 42, i32* [[X]], align 4 +; NOT_TUNIT_NPM-NEXT: call void @promote_i32_ptr(i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[X]]), !prof !0 +; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@caller() ; IS__TUNIT_NPM-NEXT: [[X:%.*]] = alloca i32 @@ -20,12 +20,6 @@ define void @caller() #0 { ; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 1 ; IS__TUNIT_NPM-NEXT: call void @promote_i32_ptr(i32 [[TMP1]]), !prof !0 ; IS__TUNIT_NPM-NEXT: ret void -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@caller() -; IS__CGSCC____-NEXT: [[X:%.*]] = alloca i32 -; IS__CGSCC____-NEXT: store i32 42, i32* [[X]], align 4 -; IS__CGSCC____-NEXT: call void @promote_i32_ptr(i32* noalias nonnull readonly align 4 dereferenceable(4) [[X]]), !prof !0 -; IS__CGSCC____-NEXT: ret void ; %x = alloca i32 store i32 42, i32* %x diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll index ac61432c1843a..a646b258a0fcc 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll @@ -48,7 +48,7 @@ define i32 @main() { ; IS__TUNIT____-NEXT: store i32* @g, i32** [[TMP0]], align 8, !tbaa !5 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32*, i32** @a, align 8, !tbaa !5 ; IS__TUNIT____-NEXT: store i32 1, i32* [[TMP1]], align 4, !tbaa !0 -; IS__TUNIT____-NEXT: call fastcc void @fn(i32* nofree nonnull readonly align 4 dereferenceable(4) @g) +; IS__TUNIT____-NEXT: call fastcc void @fn(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @g) ; IS__TUNIT____-NEXT: ret i32 0 ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@main() diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll index 1affb64add139..cad432d27e663 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll @@ -49,29 +49,17 @@ define internal void @add({i32, i32}* %this, i32* sret %r) { } define void @f() { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f() -; IS__TUNIT_OPM-NEXT: [[R:%.*]] = alloca i32 -; IS__TUNIT_OPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } -; IS__TUNIT_OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) -; IS__TUNIT_OPM-NEXT: ret void -; -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f() -; IS__TUNIT_NPM-NEXT: [[R:%.*]] = alloca i32 -; IS__TUNIT_NPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } -; IS__TUNIT_NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) -; IS__TUNIT_NPM-NEXT: ret void -; -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f() -; IS__CGSCC_OPM-NEXT: [[R:%.*]] = alloca i32 -; IS__CGSCC_OPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } -; IS__CGSCC_OPM-NEXT: call void @add({ i32, i32 }* nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) -; IS__CGSCC_OPM-NEXT: ret void +; IS________OPM-LABEL: define {{[^@]+}}@f() +; IS________OPM-NEXT: [[R:%.*]] = alloca i32 +; IS________OPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } +; IS________OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; IS________OPM-NEXT: ret void ; -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f() -; IS__CGSCC_NPM-NEXT: [[R:%.*]] = alloca i32 -; IS__CGSCC_NPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } -; IS__CGSCC_NPM-NEXT: call void @add({ i32, i32 }* noalias nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) -; IS__CGSCC_NPM-NEXT: ret void +; IS________NPM-LABEL: define {{[^@]+}}@f() +; IS________NPM-NEXT: [[R:%.*]] = alloca i32 +; IS________NPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } +; IS________NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; IS________NPM-NEXT: ret void ; %r = alloca i32 %pair = alloca {i32, i32} diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll index c1388072f5a5d..2797389a442b1 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll @@ -17,15 +17,25 @@ define internal void @bar(%pair* byval %Data) { ; IS________OPM-NEXT: [[TMP1:%.*]] = tail call i8* @foo(%pair* [[DATA]]) ; IS________OPM-NEXT: ret void ; -; IS________NPM-LABEL: define {{[^@]+}}@bar -; IS________NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) -; IS________NPM-NEXT: [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]] -; IS________NPM-NEXT: [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32* -; IS________NPM-NEXT: store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]] -; IS________NPM-NEXT: [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1 -; IS________NPM-NEXT: store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]] -; IS________NPM-NEXT: [[TMP3:%.*]] = call i8* @foo(%pair* [[DATA_PRIV]]) -; IS________NPM-NEXT: ret void +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@bar +; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; IS__TUNIT_NPM-NEXT: [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]] +; IS__TUNIT_NPM-NEXT: [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32* +; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]] +; IS__TUNIT_NPM-NEXT: [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]] +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = call i8* @foo(%pair* [[DATA_PRIV]]) +; IS__TUNIT_NPM-NEXT: ret void +; +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@bar +; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; IS__CGSCC_NPM-NEXT: [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]] +; IS__CGSCC_NPM-NEXT: [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32* +; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]], align 4 +; IS__CGSCC_NPM-NEXT: [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1 +; IS__CGSCC_NPM-NEXT: store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]], align 4 +; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = call i8* @foo(%pair* nonnull align 4 dereferenceable(8) [[DATA_PRIV]]) +; IS__CGSCC_NPM-NEXT: ret void ; tail call i8* @foo(%pair* %Data) ret void diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll index cfac2978e05c2..0a743608de624 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll @@ -27,7 +27,7 @@ define internal void @vfu1(%struct.MYstr* byval align 4 %u) nounwind { ; IS__CGSCC_NPM-NEXT: [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8* ; IS__CGSCC_NPM-NEXT: store i8 [[TMP0]], i8* [[U_PRIV_CAST]] ; IS__CGSCC_NPM-NEXT: [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1 -; IS__CGSCC_NPM-NEXT: store i32 [[TMP1]], i32* [[U_PRIV_0_1]] +; IS__CGSCC_NPM-NEXT: store i32 [[TMP1]], i32* [[U_PRIV_0_1]], align 4 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1 ; IS__CGSCC_NPM-NEXT: store i32 99, i32* [[TMP2]], align 4 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0 @@ -98,7 +98,7 @@ entry: define i32 @unions() nounwind { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@unions() ; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2(%struct.MYstr* nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) +; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2(%struct.MYstr* nocapture nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) ; IS__TUNIT_OPM-NEXT: ret i32 [[RESULT]] ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@unions() @@ -173,7 +173,7 @@ define internal i32 @vfu2_v2(%struct.MYstr* byval align 4 %u) nounwind readonly ; IS__CGSCC_NPM-NEXT: [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8* ; IS__CGSCC_NPM-NEXT: store i8 [[TMP0]], i8* [[U_PRIV_CAST]] ; IS__CGSCC_NPM-NEXT: [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1 -; IS__CGSCC_NPM-NEXT: store i32 [[TMP1]], i32* [[U_PRIV_0_1]] +; IS__CGSCC_NPM-NEXT: store i32 [[TMP1]], i32* [[U_PRIV_0_1]], align 4 ; IS__CGSCC_NPM-NEXT: [[Z:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1 ; IS__CGSCC_NPM-NEXT: store i32 99, i32* [[Z]], align 4 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1 @@ -199,7 +199,7 @@ entry: define i32 @unions_v2() nounwind { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@unions_v2() ; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) +; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* nocapture nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) ; IS__TUNIT_OPM-NEXT: ret i32 [[RESULT]] ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@unions_v2() @@ -213,7 +213,7 @@ define i32 @unions_v2() nounwind { ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@unions_v2() ; IS__CGSCC_OPM-NEXT: entry: -; IS__CGSCC_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* noalias nofree nonnull readnone byval align 8 dereferenceable(8) @mystr) +; IS__CGSCC_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* noalias nocapture nofree nonnull readnone byval align 8 dereferenceable(8) @mystr) ; IS__CGSCC_OPM-NEXT: ret i32 [[RESULT]] ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@unions_v2() diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll index 0c1884ff0a89c..57912e7490465 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -8,15 +8,15 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define i64 @fn2() { -; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@fn2() -; NOT_TUNIT_NPM-NEXT: entry: -; NOT_TUNIT_NPM-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 undef) -; NOT_TUNIT_NPM-NEXT: ret i64 [[CALL2]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@fn2() +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 undef) #0, !range !0 +; IS__TUNIT____-NEXT: ret i64 [[CALL2]] ; -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@fn2() -; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 undef) #1, !range !0 -; IS__TUNIT_NPM-NEXT: ret i64 [[CALL2]] +; IS__CGSCC____-LABEL: define {{[^@]+}}@fn2() +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 undef) +; IS__CGSCC____-NEXT: ret i64 [[CALL2]] ; entry: %conv = sext i32 undef to i64 @@ -26,21 +26,21 @@ entry: } define i64 @fn2b(i32 %arg) { -; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@fn2b -; NOT_TUNIT_NPM-SAME: (i32 [[ARG:%.*]]) -; NOT_TUNIT_NPM-NEXT: entry: -; NOT_TUNIT_NPM-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 -; NOT_TUNIT_NPM-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] -; NOT_TUNIT_NPM-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) -; NOT_TUNIT_NPM-NEXT: ret i64 [[CALL2]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@fn2b +; IS__TUNIT____-SAME: (i32 [[ARG:%.*]]) +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 +; IS__TUNIT____-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) #0, !range !0 +; IS__TUNIT____-NEXT: ret i64 [[CALL2]] ; -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@fn2b -; IS__TUNIT_NPM-SAME: (i32 [[ARG:%.*]]) -; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 -; IS__TUNIT_NPM-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] -; IS__TUNIT_NPM-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) #1, !range !0 -; IS__TUNIT_NPM-NEXT: ret i64 [[CALL2]] +; IS__CGSCC____-LABEL: define {{[^@]+}}@fn2b +; IS__CGSCC____-SAME: (i32 [[ARG:%.*]]) +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 +; IS__CGSCC____-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) +; IS__CGSCC____-NEXT: ret i64 [[CALL2]] ; entry: %conv = sext i32 %arg to i64 @@ -50,15 +50,10 @@ entry: } define i64 @fn2c() { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fn2c() -; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 42) -; IS__TUNIT_OPM-NEXT: ret i64 [[CALL2]] -; -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@fn2c() -; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 42) #1, !range !0 -; IS__TUNIT_NPM-NEXT: ret i64 [[CALL2]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@fn2c() +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 42) #0, !range !0 +; IS__TUNIT____-NEXT: ret i64 [[CALL2]] ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@fn2c() ; IS__CGSCC____-NEXT: entry: diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll index 778ef542fe3d7..9ddb65ff28153 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; @@ -83,66 +83,66 @@ entry: } define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %N, float* dereferenceable(4) %p, i64 %q) { -; IS________OPM-LABEL: define {{[^@]+}}@.omp_outlined. -; IS________OPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) -; IS________OPM-NEXT: entry: -; IS________OPM-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 -; IS________OPM-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -; IS________OPM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -; IS________OPM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -; IS________OPM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -; IS________OPM-NEXT: store i64 4617315517961601024, i64* [[Q_ADDR]], align 8 -; IS________OPM-NEXT: [[CONV:%.*]] = bitcast i64* [[Q_ADDR]] to double* -; IS________OPM-NEXT: [[TMP:%.*]] = load i32, i32* [[N]], align 4 -; IS________OPM-NEXT: [[SUB3:%.*]] = add nsw i32 [[TMP]], -3 -; IS________OPM-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP]], 2 -; IS________OPM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -; IS________OPM: omp.precond.then: -; IS________OPM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -; IS________OPM-NEXT: store i32 [[SUB3]], i32* [[DOTOMP_UB]], align 4 -; IS________OPM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -; IS________OPM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -; IS________OPM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; IS________OPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP5]], i32 34, i32* nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1) -; IS________OPM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -; IS________OPM-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]] -; IS________OPM-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -; IS________OPM: cond.true: -; IS________OPM-NEXT: br label [[COND_END:%.*]] -; IS________OPM: cond.false: -; IS________OPM-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -; IS________OPM-NEXT: br label [[COND_END]] -; IS________OPM: cond.end: -; IS________OPM-NEXT: [[COND:%.*]] = phi i32 [ [[SUB3]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] -; IS________OPM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -; IS________OPM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -; IS________OPM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -; IS________OPM: omp.inner.for.cond: -; IS________OPM-NEXT: [[DOTOMP_IV_0:%.*]] = phi i32 [ [[TMP8]], [[COND_END]] ], [ [[ADD11:%.*]], [[OMP_INNER_FOR_INC:%.*]] ] -; IS________OPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -; IS________OPM-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[DOTOMP_IV_0]], [[TMP9]] -; IS________OPM-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], label [[OMP_INNER_FOR_BODY:%.*]] -; IS________OPM: omp.inner.for.cond.cleanup: -; IS________OPM-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -; IS________OPM: omp.inner.for.body: -; IS________OPM-NEXT: [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2 -; IS________OPM-NEXT: [[TMP10:%.*]] = load float, float* [[P]], align 4 -; IS________OPM-NEXT: [[TMP11:%.*]] = load double, double* [[CONV]], align 8 -; IS________OPM-NEXT: call void @bar(i32 [[ADD10]], float [[TMP10]], double [[TMP11]]) -; IS________OPM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -; IS________OPM: omp.body.continue: -; IS________OPM-NEXT: br label [[OMP_INNER_FOR_INC]] -; IS________OPM: omp.inner.for.inc: -; IS________OPM-NEXT: [[ADD11]] = add nsw i32 [[DOTOMP_IV_0]], 1 -; IS________OPM-NEXT: br label [[OMP_INNER_FOR_COND]] -; IS________OPM: omp.inner.for.end: -; IS________OPM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -; IS________OPM: omp.loop.exit: -; IS________OPM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; IS________OPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP12]]) -; IS________OPM-NEXT: br label [[OMP_PRECOND_END]] -; IS________OPM: omp.precond.end: -; IS________OPM-NEXT: ret void +; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@.omp_outlined. +; NOT_TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) +; NOT_TUNIT_NPM-NEXT: entry: +; NOT_TUNIT_NPM-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 +; NOT_TUNIT_NPM-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +; NOT_TUNIT_NPM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +; NOT_TUNIT_NPM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +; NOT_TUNIT_NPM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +; NOT_TUNIT_NPM-NEXT: store i64 4617315517961601024, i64* [[Q_ADDR]], align 8 +; NOT_TUNIT_NPM-NEXT: [[CONV:%.*]] = bitcast i64* [[Q_ADDR]] to double* +; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load i32, i32* [[N]], align 4 +; NOT_TUNIT_NPM-NEXT: [[SUB3:%.*]] = add nsw i32 [[TMP]], -3 +; NOT_TUNIT_NPM-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP]], 2 +; NOT_TUNIT_NPM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +; NOT_TUNIT_NPM: omp.precond.then: +; NOT_TUNIT_NPM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +; NOT_TUNIT_NPM-NEXT: store i32 [[SUB3]], i32* [[DOTOMP_UB]], align 4 +; NOT_TUNIT_NPM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +; NOT_TUNIT_NPM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +; NOT_TUNIT_NPM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; NOT_TUNIT_NPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP5]], i32 34, i32* nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1) +; NOT_TUNIT_NPM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +; NOT_TUNIT_NPM-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]] +; NOT_TUNIT_NPM-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; NOT_TUNIT_NPM: cond.true: +; NOT_TUNIT_NPM-NEXT: br label [[COND_END:%.*]] +; NOT_TUNIT_NPM: cond.false: +; NOT_TUNIT_NPM-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +; NOT_TUNIT_NPM-NEXT: br label [[COND_END]] +; NOT_TUNIT_NPM: cond.end: +; NOT_TUNIT_NPM-NEXT: [[COND:%.*]] = phi i32 [ [[SUB3]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +; NOT_TUNIT_NPM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +; NOT_TUNIT_NPM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +; NOT_TUNIT_NPM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +; NOT_TUNIT_NPM: omp.inner.for.cond: +; NOT_TUNIT_NPM-NEXT: [[DOTOMP_IV_0:%.*]] = phi i32 [ [[TMP8]], [[COND_END]] ], [ [[ADD11:%.*]], [[OMP_INNER_FOR_INC:%.*]] ] +; NOT_TUNIT_NPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +; NOT_TUNIT_NPM-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[DOTOMP_IV_0]], [[TMP9]] +; NOT_TUNIT_NPM-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], label [[OMP_INNER_FOR_BODY:%.*]] +; NOT_TUNIT_NPM: omp.inner.for.cond.cleanup: +; NOT_TUNIT_NPM-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +; NOT_TUNIT_NPM: omp.inner.for.body: +; NOT_TUNIT_NPM-NEXT: [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2 +; NOT_TUNIT_NPM-NEXT: [[TMP10:%.*]] = load float, float* [[P]], align 4 +; NOT_TUNIT_NPM-NEXT: [[TMP11:%.*]] = load double, double* [[CONV]], align 8 +; NOT_TUNIT_NPM-NEXT: call void @bar(i32 [[ADD10]], float [[TMP10]], double [[TMP11]]) +; NOT_TUNIT_NPM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +; NOT_TUNIT_NPM: omp.body.continue: +; NOT_TUNIT_NPM-NEXT: br label [[OMP_INNER_FOR_INC]] +; NOT_TUNIT_NPM: omp.inner.for.inc: +; NOT_TUNIT_NPM-NEXT: [[ADD11]] = add nsw i32 [[DOTOMP_IV_0]], 1 +; NOT_TUNIT_NPM-NEXT: br label [[OMP_INNER_FOR_COND]] +; NOT_TUNIT_NPM: omp.inner.for.end: +; NOT_TUNIT_NPM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +; NOT_TUNIT_NPM: omp.loop.exit: +; NOT_TUNIT_NPM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; NOT_TUNIT_NPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP12]]) +; NOT_TUNIT_NPM-NEXT: br label [[OMP_PRECOND_END]] +; NOT_TUNIT_NPM: omp.precond.end: +; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@.omp_outlined. ; IS__TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) @@ -205,67 +205,6 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %. ; IS__TUNIT_NPM: omp.precond.end: ; IS__TUNIT_NPM-NEXT: ret void ; -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@.omp_outlined. -; IS__CGSCC_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture nonnull readonly dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) -; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 -; IS__CGSCC_NPM-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -; IS__CGSCC_NPM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -; IS__CGSCC_NPM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -; IS__CGSCC_NPM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -; IS__CGSCC_NPM-NEXT: store i64 4617315517961601024, i64* [[Q_ADDR]], align 8 -; IS__CGSCC_NPM-NEXT: [[CONV:%.*]] = bitcast i64* [[Q_ADDR]] to double* -; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load i32, i32* [[N]], align 4 -; IS__CGSCC_NPM-NEXT: [[SUB3:%.*]] = add nsw i32 [[TMP]], -3 -; IS__CGSCC_NPM-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP]], 2 -; IS__CGSCC_NPM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -; IS__CGSCC_NPM: omp.precond.then: -; IS__CGSCC_NPM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -; IS__CGSCC_NPM-NEXT: store i32 [[SUB3]], i32* [[DOTOMP_UB]], align 4 -; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -; IS__CGSCC_NPM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -; IS__CGSCC_NPM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; IS__CGSCC_NPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP5]], i32 34, i32* nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1) -; IS__CGSCC_NPM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -; IS__CGSCC_NPM-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]] -; IS__CGSCC_NPM-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -; IS__CGSCC_NPM: cond.true: -; IS__CGSCC_NPM-NEXT: br label [[COND_END:%.*]] -; IS__CGSCC_NPM: cond.false: -; IS__CGSCC_NPM-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -; IS__CGSCC_NPM-NEXT: br label [[COND_END]] -; IS__CGSCC_NPM: cond.end: -; IS__CGSCC_NPM-NEXT: [[COND:%.*]] = phi i32 [ [[SUB3]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] -; IS__CGSCC_NPM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -; IS__CGSCC_NPM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -; IS__CGSCC_NPM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -; IS__CGSCC_NPM: omp.inner.for.cond: -; IS__CGSCC_NPM-NEXT: [[DOTOMP_IV_0:%.*]] = phi i32 [ [[TMP8]], [[COND_END]] ], [ [[ADD11:%.*]], [[OMP_INNER_FOR_INC:%.*]] ] -; IS__CGSCC_NPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -; IS__CGSCC_NPM-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[DOTOMP_IV_0]], [[TMP9]] -; IS__CGSCC_NPM-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], label [[OMP_INNER_FOR_BODY:%.*]] -; IS__CGSCC_NPM: omp.inner.for.cond.cleanup: -; IS__CGSCC_NPM-NEXT: br label [[OMP_INNER_FOR_END:%.*]] -; IS__CGSCC_NPM: omp.inner.for.body: -; IS__CGSCC_NPM-NEXT: [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2 -; IS__CGSCC_NPM-NEXT: [[TMP10:%.*]] = load float, float* [[P]], align 4 -; IS__CGSCC_NPM-NEXT: [[TMP11:%.*]] = load double, double* [[CONV]], align 8 -; IS__CGSCC_NPM-NEXT: call void @bar(i32 [[ADD10]], float [[TMP10]], double [[TMP11]]) -; IS__CGSCC_NPM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -; IS__CGSCC_NPM: omp.body.continue: -; IS__CGSCC_NPM-NEXT: br label [[OMP_INNER_FOR_INC]] -; IS__CGSCC_NPM: omp.inner.for.inc: -; IS__CGSCC_NPM-NEXT: [[ADD11]] = add nsw i32 [[DOTOMP_IV_0]], 1 -; IS__CGSCC_NPM-NEXT: br label [[OMP_INNER_FOR_COND]] -; IS__CGSCC_NPM: omp.inner.for.end: -; IS__CGSCC_NPM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -; IS__CGSCC_NPM: omp.loop.exit: -; IS__CGSCC_NPM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; IS__CGSCC_NPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP12]]) -; IS__CGSCC_NPM-NEXT: br label [[OMP_PRECOND_END]] -; IS__CGSCC_NPM: omp.precond.end: -; IS__CGSCC_NPM-NEXT: ret void -; entry: %q.addr = alloca i64, align 8 %.omp.lb = alloca i32, align 4 diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll index 78272c19cc993..54e99802556f0 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll @@ -36,10 +36,10 @@ define dso_local i32 @main() { ; IS__TUNIT____-NEXT: [[ALLOC1:%.*]] = alloca i8, align 8 ; IS__TUNIT____-NEXT: [[ALLOC2:%.*]] = alloca i8, align 8 ; IS__TUNIT____-NEXT: [[THREAD:%.*]] = alloca i64, align 8 -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @foo, i8* noalias nofree readnone align 536870912 undef) -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @bar, i8* noalias nofree nonnull readnone align 8 dereferenceable(8) undef) -; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) -; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @buz, i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC2]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @foo, i8* noalias nocapture nofree readnone align 536870912 undef) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @bar, i8* noalias nofree nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" undef) +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) +; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @buz, i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC2]]) ; IS__TUNIT____-NEXT: ret i32 0 ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@main() @@ -47,10 +47,10 @@ define dso_local i32 @main() { ; IS__CGSCC_OPM-NEXT: [[ALLOC1:%.*]] = alloca i8, align 8 ; IS__CGSCC_OPM-NEXT: [[ALLOC2:%.*]] = alloca i8, align 8 ; IS__CGSCC_OPM-NEXT: [[THREAD:%.*]] = alloca i64, align 8 -; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @foo, i8* noalias align 536870912 null) -; IS__CGSCC_OPM-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @bar, i8* nonnull align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*)) -; IS__CGSCC_OPM-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @baz, i8* nocapture nonnull align 8 dereferenceable(1) [[ALLOC1]]) -; IS__CGSCC_OPM-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @buz, i8* nonnull align 8 dereferenceable(1) [[ALLOC2]]) +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @foo, i8* noalias nocapture align 536870912 null) +; IS__CGSCC_OPM-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @bar, i8* nonnull align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*)) +; IS__CGSCC_OPM-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @baz, i8* nocapture nonnull align 8 dereferenceable(1) [[ALLOC1]]) +; IS__CGSCC_OPM-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @buz, i8* nonnull align 8 dereferenceable(1) [[ALLOC2]]) ; IS__CGSCC_OPM-NEXT: ret i32 0 ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@main() @@ -58,10 +58,10 @@ define dso_local i32 @main() { ; IS__CGSCC_NPM-NEXT: [[ALLOC1:%.*]] = alloca i8, align 8 ; IS__CGSCC_NPM-NEXT: [[ALLOC2:%.*]] = alloca i8, align 8 ; IS__CGSCC_NPM-NEXT: [[THREAD:%.*]] = alloca i64, align 8 -; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @foo, i8* noalias nofree readnone align 536870912 null) -; IS__CGSCC_NPM-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @bar, i8* noalias nofree nonnull readnone align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*)) -; IS__CGSCC_NPM-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) -; IS__CGSCC_NPM-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias align 536870912 null, i8* (i8*)* nonnull @buz, i8* noalias nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC2]]) +; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @foo, i8* noalias nocapture nofree readnone align 536870912 null) +; IS__CGSCC_NPM-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @bar, i8* noalias nofree nonnull readnone align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*)) +; IS__CGSCC_NPM-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) +; IS__CGSCC_NPM-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @buz, i8* noalias nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC2]]) ; IS__CGSCC_NPM-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll index 036a2e81e2a61..12dd77a61b986 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll @@ -42,12 +42,12 @@ entry: define dso_local void @caller() { ; IS__TUNIT____-LABEL: define {{[^@]+}}@caller() ; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: call void @broker(i32* nofree nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nofree nonnull readonly align 4 dereferenceable(4) undef) +; IS__TUNIT____-NEXT: call void @broker(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) undef) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller() ; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: call void @broker(i32* nofree nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nofree nonnull readonly align 4 dereferenceable(4) @gsh) +; IS__CGSCC____-NEXT: call void @broker(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @gsh) ; IS__CGSCC____-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll index e6245c6033f7d..059234d12c897 100644 --- a/llvm/test/Transforms/Attributor/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -184,7 +184,7 @@ define internal i8* @f3(i8* readnone %0) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8* @a2, null ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP4:%.*]] ; CHECK: 2: -; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1(i8* noalias nofree nonnull readnone align 16 dereferenceable(1) @a2) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1(i8* noalias nofree nonnull readnone align 16 dereferenceable(1) "no-capture-maybe-returned" @a2) ; CHECK-NEXT: br label [[TMP4]] ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = phi i8* [ [[TMP3]], [[TMP2]] ], [ @a1, [[TMP0:%.*]] ] @@ -206,7 +206,7 @@ define internal i8* @f3(i8* readnone %0) local_unnamed_addr #0 { ; Better than IR information define align 4 i8* @test7() #0 { ; IS__TUNIT____-LABEL: define {{[^@]+}}@test7() -; IS__TUNIT____-NEXT: [[C:%.*]] = tail call i8* @f1(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) @a1) +; IS__TUNIT____-NEXT: [[C:%.*]] = tail call i8* @f1(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) ; IS__TUNIT____-NEXT: ret i8* [[C]] ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@test7() @@ -300,7 +300,7 @@ define internal i8* @f3b(i8* readnone %0) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8* @a2, null ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP4:%.*]] ; CHECK: 2: -; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 16 dereferenceable(1) @a2) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 16 dereferenceable(1) "no-capture-maybe-returned" @a2) ; CHECK-NEXT: br label [[TMP4]] ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = phi i8* [ [[TMP3]], [[TMP2]] ], [ @a1, [[TMP0:%.*]] ] @@ -319,10 +319,15 @@ define internal i8* @f3b(i8* readnone %0) local_unnamed_addr #0 { } define align 4 i32* @test7b(i32* align 32 %p) #0 { -; CHECK-LABEL: define {{[^@]+}}@test7b -; CHECK-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) @a1) -; CHECK-NEXT: ret i32* [[P]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@test7b +; IS__TUNIT____-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) +; IS__TUNIT____-NEXT: ret i32* [[P]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test7b +; IS__CGSCC____-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) @a1) +; IS__CGSCC____-NEXT: ret i32* [[P]] ; tail call i8* @f1b(i8* align 8 dereferenceable(1) @a1) ret i32* %p @@ -334,9 +339,9 @@ define void @test8_helper() { ; CHECK-NEXT: [[PTR0:%.*]] = tail call i32* @unknown() ; CHECK-NEXT: [[PTR1:%.*]] = tail call align 4 i32* @unknown() ; CHECK-NEXT: [[PTR2:%.*]] = tail call align 8 i32* @unknown() -; CHECK-NEXT: tail call void @test8(i32* noalias readnone align 4 [[PTR1]], i32* noalias readnone align 4 [[PTR1]], i32* noalias readnone [[PTR0]]) -; CHECK-NEXT: tail call void @test8(i32* noalias readnone align 8 [[PTR2]], i32* noalias readnone align 4 [[PTR1]], i32* noalias readnone align 4 [[PTR1]]) -; CHECK-NEXT: tail call void @test8(i32* noalias readnone align 8 [[PTR2]], i32* noalias readnone align 4 [[PTR1]], i32* noalias readnone align 4 [[PTR1]]) +; CHECK-NEXT: tail call void @test8(i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone [[PTR0]]) +; CHECK-NEXT: tail call void @test8(i32* noalias nocapture readnone align 8 [[PTR2]], i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone align 4 [[PTR1]]) +; CHECK-NEXT: tail call void @test8(i32* noalias nocapture readnone align 8 [[PTR2]], i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone align 4 [[PTR1]]) ; CHECK-NEXT: ret void ; %ptr0 = tail call i32* @unknown() diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll index 7d375a3ce8f87..89f16a5acab63 100644 --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -25,7 +25,7 @@ define void @t0_caller(i32* %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t0_caller @@ -37,11 +37,11 @@ define void @t0_caller(i32* %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t0_caller -; IS__CGSCC_OPM-SAME: (i32* [[A:%.*]]) +; IS__CGSCC_OPM-SAME: (i32* align 256 [[A:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[B:%.*]] = alloca i32, align 32 ; IS__CGSCC_OPM-NEXT: [[C:%.*]] = alloca i32*, align 64 @@ -49,7 +49,7 @@ define void @t0_caller(i32* %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t0_caller @@ -61,7 +61,7 @@ define void @t0_caller(i32* %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -133,7 +133,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t1_caller @@ -145,11 +145,11 @@ define void @t1_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t1_caller -; IS__CGSCC_OPM-SAME: (i32* noalias [[A:%.*]]) +; IS__CGSCC_OPM-SAME: (i32* noalias align 256 [[A:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[B:%.*]] = alloca i32, align 32 ; IS__CGSCC_OPM-NEXT: [[C:%.*]] = alloca i32*, align 64 @@ -157,7 +157,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t1_caller @@ -169,7 +169,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -193,7 +193,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 ; IS________OPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 -; IS________OPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; IS________OPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* nocapture [[TMP0]]) ; IS________OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t1_callback_callee @@ -202,7 +202,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; IS__TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; IS__TUNIT_NPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* nocapture [[TMP0]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t1_callback_callee @@ -211,7 +211,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; IS__CGSCC_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__CGSCC_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 8 -; IS__CGSCC_NPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; IS__CGSCC_NPM-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* nocapture [[TMP0]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -240,7 +240,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t2_caller @@ -252,11 +252,11 @@ define void @t2_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t2_caller -; IS__CGSCC_OPM-SAME: (i32* noalias [[A:%.*]]) +; IS__CGSCC_OPM-SAME: (i32* noalias align 256 [[A:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[B:%.*]] = alloca i32, align 32 ; IS__CGSCC_OPM-NEXT: [[C:%.*]] = alloca i32*, align 64 @@ -264,7 +264,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t2_caller @@ -276,7 +276,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -302,7 +302,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 ; IS________OPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 -; IS________OPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; IS________OPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* nocapture [[TMP0]]) ; IS________OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t2_callback_callee @@ -311,7 +311,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; IS__TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; IS__TUNIT_NPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* nocapture [[TMP0]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t2_callback_callee @@ -320,7 +320,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; IS__CGSCC_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__CGSCC_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 8 -; IS__CGSCC_NPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; IS__CGSCC_NPM-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* nocapture [[TMP0]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -349,8 +349,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t3_caller @@ -362,12 +362,12 @@ define void @t3_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t3_caller -; IS__CGSCC_OPM-SAME: (i32* noalias [[A:%.*]]) +; IS__CGSCC_OPM-SAME: (i32* noalias align 256 [[A:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[B:%.*]] = alloca i32, align 32 ; IS__CGSCC_OPM-NEXT: [[C:%.*]] = alloca i32*, align 64 @@ -375,8 +375,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t3_caller @@ -388,8 +388,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -416,7 +416,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 ; IS________OPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 -; IS________OPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; IS________OPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 99, i32* nocapture [[TMP0]]) ; IS________OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t3_callback_callee @@ -425,7 +425,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; IS__TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; IS__TUNIT_NPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 99, i32* nocapture [[TMP0]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t3_callback_callee @@ -434,7 +434,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; IS__CGSCC_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__CGSCC_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 8 -; IS__CGSCC_NPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; IS__CGSCC_NPM-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 99, i32* nocapture [[TMP0]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index addb7de8d70ab..973e76a92eec8 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -43,19 +43,12 @@ define void @nofree_arg_only(i8* %p1, i8* %p2) { ; TEST 1 - negative, pointer freed in another function. define void @test1() { -; IS________OPM-LABEL: define {{[^@]+}}@test1() -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @nocapture_func_frees_pointer(i8* noalias [[TMP1]]) -; IS________OPM-NEXT: tail call void (...) @func_throws() -; IS________OPM-NEXT: tail call void @free(i8* noalias [[TMP1]]) -; IS________OPM-NEXT: ret void -; -; IS________NPM-LABEL: define {{[^@]+}}@test1() -; IS________NPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________NPM-NEXT: tail call void @nocapture_func_frees_pointer(i8* noalias nocapture [[TMP1]]) -; IS________NPM-NEXT: tail call void (...) @func_throws() -; IS________NPM-NEXT: tail call void @free(i8* noalias [[TMP1]]) -; IS________NPM-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@test1() +; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; CHECK-NEXT: tail call void @nocapture_func_frees_pointer(i8* noalias nocapture [[TMP1]]) +; CHECK-NEXT: tail call void (...) @func_throws() +; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) +; CHECK-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @nocapture_func_frees_pointer(i8* %1) @@ -70,7 +63,7 @@ define void @test2() { ; CHECK-LABEL: define {{[^@]+}}@test2() ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) ; CHECK-NEXT: tail call void @sync_func(i8* [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* [[TMP1]]) +; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) ; CHECK-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) @@ -84,8 +77,8 @@ define void @test2() { define void @test3() { ; IS________OPM-LABEL: define {{[^@]+}}@test3() ; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nofree [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias [[TMP1]]) +; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test3() @@ -100,25 +93,18 @@ define void @test3() { } define void @test3a(i8* %p) { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test3a -; IS__TUNIT_OPM-SAME: (i8* nocapture [[P:%.*]]) -; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS__TUNIT_OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP1]], i8* nocapture [[P]]) -; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias [[TMP1]]) -; IS__TUNIT_OPM-NEXT: ret void +; IS________OPM-LABEL: define {{[^@]+}}@test3a +; IS________OPM-SAME: (i8* nocapture [[P:%.*]]) +; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP1]], i8* nocapture [[P]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) +; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test3a ; IS________NPM-SAME: (i8* nocapture [[P:%.*]]) ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4 ; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP1]], i8* nocapture [[P]]) ; IS________NPM-NEXT: ret void -; -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test3a -; IS__CGSCC_OPM-SAME: (i8* nocapture [[P:%.*]]) -; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS__CGSCC_OPM-NEXT: tail call void @nofree_arg_only(i8* nofree [[TMP1]], i8* nocapture [[P]]) -; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias [[TMP1]]) -; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @nofree_arg_only(i8* %1, i8* %p) @@ -129,25 +115,18 @@ define void @test3a(i8* %p) { declare noalias i8* @aligned_alloc(i64, i64) define void @test3b(i8* %p) { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test3b -; IS__TUNIT_OPM-SAME: (i8* nocapture [[P:%.*]]) -; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 32, i64 128) -; IS__TUNIT_OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP1]], i8* nocapture [[P]]) -; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias [[TMP1]]) -; IS__TUNIT_OPM-NEXT: ret void +; IS________OPM-LABEL: define {{[^@]+}}@test3b +; IS________OPM-SAME: (i8* nocapture [[P:%.*]]) +; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 32, i64 128) +; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP1]], i8* nocapture [[P]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) +; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test3b ; IS________NPM-SAME: (i8* nocapture [[P:%.*]]) ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 128, align 32 ; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP1]], i8* nocapture [[P]]) ; IS________NPM-NEXT: ret void -; -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test3b -; IS__CGSCC_OPM-SAME: (i8* nocapture [[P:%.*]]) -; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 32, i64 128) -; IS__CGSCC_OPM-NEXT: tail call void @nofree_arg_only(i8* nofree [[TMP1]], i8* nocapture [[P]]) -; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias [[TMP1]]) -; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @aligned_alloc(i64 32, i64 128) tail call void @nofree_arg_only(i8* %1, i8* %p) @@ -160,7 +139,7 @@ define void @test3c(i64 %alignment) { ; CHECK-LABEL: define {{[^@]+}}@test3c ; CHECK-SAME: (i64 [[ALIGNMENT:%.*]]) ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 [[ALIGNMENT]], i64 128) -; CHECK-NEXT: tail call void @free(i8* noalias [[TMP1]]) +; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) ; CHECK-NEXT: ret void ; %1 = tail call noalias i8* @aligned_alloc(i64 %alignment, i64 128) @@ -173,8 +152,8 @@ declare noalias i8* @calloc(i64, i64) define void @test0() { ; IS________OPM-LABEL: define {{[^@]+}}@test0() ; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 2, i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nofree [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias [[TMP1]]) +; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test0() @@ -194,7 +173,7 @@ define void @test0() { define void @test4() { ; IS________OPM-LABEL: define {{[^@]+}}@test4() ; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @nofree_func(i8* noalias nofree [[TMP1]]) +; IS________OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP1]]) ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test4() @@ -211,20 +190,20 @@ define void @test4() { ; are in nofree functions and are not captured define void @test5(i32, i8* %p) { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test5 -; IS__TUNIT_OPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) -; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; IS__TUNIT_OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] -; IS__TUNIT_OPM: 4: -; IS__TUNIT_OPM-NEXT: tail call void @nofree_func(i8* noalias nofree [[TMP2]]) -; IS__TUNIT_OPM-NEXT: br label [[TMP6:%.*]] -; IS__TUNIT_OPM: 5: -; IS__TUNIT_OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP2]], i8* nocapture [[P]]) -; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias [[TMP2]]) -; IS__TUNIT_OPM-NEXT: br label [[TMP6]] -; IS__TUNIT_OPM: 6: -; IS__TUNIT_OPM-NEXT: ret void +; IS________OPM-LABEL: define {{[^@]+}}@test5 +; IS________OPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) +; IS________OPM-NEXT: [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS________OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; IS________OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] +; IS________OPM: 4: +; IS________OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]]) +; IS________OPM-NEXT: br label [[TMP6:%.*]] +; IS________OPM: 5: +; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP2]], i8* nocapture [[P]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP2]]) +; IS________OPM-NEXT: br label [[TMP6]] +; IS________OPM: 6: +; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test5 ; IS________NPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) @@ -239,21 +218,6 @@ define void @test5(i32, i8* %p) { ; IS________NPM-NEXT: br label [[TMP6]] ; IS________NPM: 6: ; IS________NPM-NEXT: ret void -; -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test5 -; IS__CGSCC_OPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) -; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; IS__CGSCC_OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] -; IS__CGSCC_OPM: 4: -; IS__CGSCC_OPM-NEXT: tail call void @nofree_func(i8* noalias nofree [[TMP2]]) -; IS__CGSCC_OPM-NEXT: br label [[TMP6:%.*]] -; IS__CGSCC_OPM: 5: -; IS__CGSCC_OPM-NEXT: tail call void @nofree_arg_only(i8* nofree [[TMP2]], i8* nocapture [[P]]) -; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias [[TMP2]]) -; IS__CGSCC_OPM-NEXT: br label [[TMP6]] -; IS__CGSCC_OPM: 6: -; IS__CGSCC_OPM-NEXT: ret void ; %2 = tail call noalias i8* @malloc(i64 4) %3 = icmp eq i32 %0, 0 @@ -281,11 +245,11 @@ define void @test6(i32) { ; IS________OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 ; IS________OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] ; IS________OPM: 4: -; IS________OPM-NEXT: tail call void @nofree_func(i8* noalias nofree [[TMP2]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias [[TMP2]]) +; IS________OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP2]]) ; IS________OPM-NEXT: br label [[TMP6:%.*]] ; IS________OPM: 5: -; IS________OPM-NEXT: tail call void @free(i8* noalias [[TMP2]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP2]]) ; IS________OPM-NEXT: br label [[TMP6]] ; IS________OPM: 6: ; IS________OPM-NEXT: ret void @@ -348,7 +312,7 @@ define void @test8() { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 ; CHECK-NEXT: tail call void @foo(i32* align 4 [[TMP2]]) -; CHECK-NEXT: tail call void @free(i8* nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: tail call void @free(i8* nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) ; CHECK-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) @@ -369,7 +333,7 @@ define void @test9() { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 ; CHECK-NEXT: tail call void @foo_nounw(i32* nofree align 4 [[TMP2]]) -; CHECK-NEXT: tail call void @free(i8* nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: tail call void @free(i8* nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) ; CHECK-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) @@ -387,11 +351,11 @@ define void @test9() { define i32 @test10() { ; IS________OPM-LABEL: define {{[^@]+}}@test10() ; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nofree [[TMP1]]) +; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) ; IS________OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; IS________OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 ; IS________OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; IS________OPM-NEXT: tail call void @free(i8* noalias nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) ; IS________OPM-NEXT: ret i32 [[TMP3]] ; ; IS________NPM-LABEL: define {{[^@]+}}@test10() @@ -414,18 +378,18 @@ define i32 @test10() { define i32 @test_lifetime() { ; IS________OPM-LABEL: define {{[^@]+}}@test_lifetime() ; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nofree [[TMP1]]) -; IS________OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) +; IS________OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) ; IS________OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; IS________OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 ; IS________OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; IS________OPM-NEXT: tail call void @free(i8* noalias nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) ; IS________OPM-NEXT: ret i32 [[TMP3]] ; ; IS________NPM-LABEL: define {{[^@]+}}@test_lifetime() ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4 ; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS________NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) ; IS________NPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; IS________NPM-NEXT: store i32 10, i32* [[TMP2]], align 4 ; IS________NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 @@ -447,7 +411,7 @@ define void @test11() { ; IS________OPM-LABEL: define {{[^@]+}}@test11() ; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) ; IS________OPM-NEXT: tail call void @sync_will_return(i8* [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* [[TMP1]]) +; IS________OPM-NEXT: tail call void @free(i8* nocapture [[TMP1]]) ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test11() @@ -490,7 +454,7 @@ define i32 @irreducible_cfg(i32 %0) { ; IS________OPM-NEXT: br label [[TMP8]] ; IS________OPM: 15: ; IS________OPM-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP3]] to i8* -; IS________OPM-NEXT: call void @free(i8* [[TMP16]]) +; IS________OPM-NEXT: call void @free(i8* nocapture [[TMP16]]) ; IS________OPM-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP3]], align 4 ; IS________OPM-NEXT: ret i32 [[TMP17]] ; @@ -629,11 +593,11 @@ define i32 @malloc_in_loop(i32 %0) { define i32 @test13() { ; CHECK-LABEL: define {{[^@]+}}@test13() ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 256) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nofree [[TMP1]]) +; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @free(i8* noalias nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @malloc(i64 256) @@ -648,11 +612,11 @@ define i32 @test13() { define i32 @test_sle() { ; CHECK-LABEL: define {{[^@]+}}@test_sle() ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 -1) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nofree [[TMP1]]) +; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @free(i8* noalias nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @malloc(i64 -1) @@ -667,11 +631,11 @@ define i32 @test_sle() { define i32 @test_overflow() { ; CHECK-LABEL: define {{[^@]+}}@test_overflow() ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 65537, i64 65537) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nofree [[TMP1]]) +; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @free(i8* noalias nonnull align 4 dereferenceable(4) [[TMP1]]) +; CHECK-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @calloc(i64 65537, i64 65537) @@ -686,8 +650,8 @@ define i32 @test_overflow() { define void @test14() { ; CHECK-LABEL: define {{[^@]+}}@test14() ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 64, i64 4) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nofree [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* noalias [[TMP1]]) +; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) +; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) ; CHECK-NEXT: ret void ; %1 = tail call noalias i8* @calloc(i64 64, i64 4) @@ -700,8 +664,8 @@ define void @test15(i64 %S) { ; CHECK-LABEL: define {{[^@]+}}@test15 ; CHECK-SAME: (i64 [[S:%.*]]) ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 [[S]]) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nofree [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* noalias [[TMP1]]) +; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) +; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) ; CHECK-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 %S) @@ -715,8 +679,8 @@ define void @test16a(i8 %v, i8** %P) { ; IS________OPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree readnone [[P:%.*]]) ; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) ; IS________OPM-NEXT: store i8 [[V]], i8* [[TMP1]] -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nofree nonnull dereferenceable(1) [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nonnull dereferenceable(1) [[TMP1]]) +; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree nonnull dereferenceable(1) [[TMP1]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture nonnull dereferenceable(1) [[TMP1]]) ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test16a @@ -734,21 +698,13 @@ define void @test16a(i8 %v, i8** %P) { } define void @test16b(i8 %v, i8** %P) { -; IS________OPM-LABEL: define {{[^@]+}}@test16b -; IS________OPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: store i8* [[TMP1]], i8** [[P]] -; IS________OPM-NEXT: tail call void @no_sync_func(i8* nofree [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* [[TMP1]]) -; IS________OPM-NEXT: ret void -; -; IS________NPM-LABEL: define {{[^@]+}}@test16b -; IS________NPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) -; IS________NPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________NPM-NEXT: store i8* [[TMP1]], i8** [[P]] -; IS________NPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) -; IS________NPM-NEXT: tail call void @free(i8* [[TMP1]]) -; IS________NPM-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@test16b +; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; CHECK-NEXT: store i8* [[TMP1]], i8** [[P]] +; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) +; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) +; CHECK-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) store i8* %1, i8** %P @@ -762,8 +718,8 @@ define void @test16c(i8 %v, i8** %P) { ; IS________OPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) ; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) ; IS________OPM-NEXT: store i8* [[TMP1]], i8** [[P]] -; IS________OPM-NEXT: tail call void @no_sync_func(i8* nofree [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* [[TMP1]]) +; IS________OPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) +; IS________OPM-NEXT: tail call void @free(i8* nocapture [[TMP1]]) ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test16c diff --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll index 50a3de608270f..2efc5e745c000 100644 --- a/llvm/test/Transforms/Attributor/internal-noalias.ll +++ b/llvm/test/Transforms/Attributor/internal-noalias.ll @@ -99,8 +99,8 @@ define dso_local i32 @visible_local(i32* %A) #0 { ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nofree nonnull readonly align 4 dereferenceable(4) [[B]]) -; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; IS__CGSCC____-NEXT: ret i32 [[ADD]] ; @@ -146,11 +146,11 @@ define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 { } define i32 @visible_local_2() { -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@visible_local_2() -; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i32, align 4 -; IS__TUNIT_OPM-NEXT: store i32 5, i32* [[B]], align 4 -; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) -; IS__TUNIT_OPM-NEXT: ret i32 [[CALL]] +; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@visible_local_2() +; NOT_TUNIT_NPM-NEXT: [[B:%.*]] = alloca i32, align 4 +; NOT_TUNIT_NPM-NEXT: store i32 5, i32* [[B]], align 4 +; NOT_TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; NOT_TUNIT_NPM-NEXT: ret i32 [[CALL]] ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@visible_local_2() ; IS__TUNIT_NPM-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -159,12 +159,6 @@ define i32 @visible_local_2() { ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[B]], align 1 ; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32 [[TMP1]], i32 [[TMP2]]) ; IS__TUNIT_NPM-NEXT: ret i32 [[CALL]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@visible_local_2() -; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 -; IS__CGSCC____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32* noalias nofree nonnull readonly align 4 dereferenceable(4) [[B]], i32* noalias nofree nonnull readonly align 4 dereferenceable(4) [[B]]) -; IS__CGSCC____-NEXT: ret i32 [[CALL]] ; %B = alloca i32, align 4 store i32 5, i32* %B, align 4 @@ -191,17 +185,11 @@ define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 { } define i32 @visible_local_3() { -; IS__TUNIT____-LABEL: define {{[^@]+}}@visible_local_3() -; IS__TUNIT____-NEXT: [[B:%.*]] = alloca i32, align 4 -; IS__TUNIT____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[B]]) -; IS__TUNIT____-NEXT: ret i32 [[CALL]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@visible_local_3() -; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 -; IS__CGSCC____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nofree nonnull align 4 dereferenceable(4) [[B]]) -; IS__CGSCC____-NEXT: ret i32 [[CALL]] +; CHECK-LABEL: define {{[^@]+}}@visible_local_3() +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 5, i32* [[B]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: ret i32 [[CALL]] ; %B = alloca i32, align 4 store i32 5, i32* %B, align 4 diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index 06102151d253d..66c54a5a9305c 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -1671,10 +1671,10 @@ define void @call_via_pointer_with_dead_args_caller(i32* %a, i32* %b) { ; CHECK-NEXT: [[PTR2:%.*]] = alloca i32, align 128 ; CHECK-NEXT: [[PTR3:%.*]] = alloca i32, align 128 ; CHECK-NEXT: [[PTR4:%.*]] = alloca i32, align 128 -; CHECK-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nofree nonnull @called_via_pointer) -; CHECK-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nofree nonnull @called_via_pointer_internal_1) -; CHECK-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nofree nonnull @called_via_pointer) -; CHECK-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nofree nonnull @called_via_pointer_internal_2) +; CHECK-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer) +; CHECK-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer_internal_1) +; CHECK-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer) +; CHECK-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer_internal_2) ; CHECK-NEXT: ret void ; %ptr1 = alloca i32, align 128 diff --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll index 2b07926d8aef4..f2e58bc769df3 100644 --- a/llvm/test/Transforms/Attributor/memory_locations.ll +++ b/llvm/test/Transforms/Attributor/memory_locations.ll @@ -367,7 +367,7 @@ define void @callerC2() { ; CHECK-NOT: Function Attrs define void @callerD1() { ; CHECK-LABEL: define {{[^@]+}}@callerD1() -; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @argmem_only(i8* noalias align 536870912 null) +; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @argmem_only(i8* noalias nocapture align 536870912 null) ; CHECK-NEXT: store i8 0, i8* [[UNKNOWN]] ; CHECK-NEXT: ret void ; @@ -378,7 +378,7 @@ define void @callerD1() { ; CHECK-NOT: Function Attrs define void @callerD2() { ; CHECK-LABEL: define {{[^@]+}}@callerD2() -; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noalias align 536870912 null) +; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noalias nocapture align 536870912 null) ; CHECK-NEXT: store i8 0, i8* [[UNKNOWN]] ; CHECK-NEXT: ret void ; @@ -435,7 +435,7 @@ define void @writeonly_global() { ; CHECK-SAME: writeonly define void @writeonly_global_via_arg() { ; CHECK-LABEL: define {{[^@]+}}@writeonly_global_via_arg() -; CHECK-NEXT: call void @write_global_via_arg(i32* nofree nonnull writeonly align 4 dereferenceable(4) @G) +; CHECK-NEXT: call void @write_global_via_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) @G) ; CHECK-NEXT: ret void ; call void @write_global_via_arg(i32* @G) diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index 790b5f1220c66..45f8dccec1085 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -161,7 +161,7 @@ define i8* @test6() nounwind uwtable ssp { ; CHECK-NEXT: store i8 97, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 1 ; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @strdup(i8* nonnull dereferenceable(2) [[ARRAYIDX]]) +; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @strdup(i8* nocapture nonnull dereferenceable(2) [[ARRAYIDX]]) ; CHECK-NEXT: ret i8* [[CALL]] ; %x = alloca [2 x i8], align 1 @@ -233,7 +233,7 @@ define i8* @test8(i32* %0) nounwind uwtable { declare void @use_i8(i8* nocapture) define internal void @test9a(i8* %a, i8* %b) { ; CHECK-LABEL: define {{[^@]+}}@test9a() -; CHECK-NEXT: call void @use_i8(i8* noalias align 536870912 null) +; CHECK-NEXT: call void @use_i8(i8* noalias nocapture align 536870912 null) ; CHECK-NEXT: ret void ; call void @use_i8(i8* null) @@ -265,25 +265,15 @@ define internal void @test9c(i8* %a, i8* %b, i8* %c) { ret void } define void @test9_helper(i8* %a, i8* %b) { -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test9_helper -; NOT_CGSCC_NPM-SAME: (i8* nocapture [[A:%.*]], i8* nocapture [[B:%.*]]) -; NOT_CGSCC_NPM-NEXT: tail call void @test9a() -; NOT_CGSCC_NPM-NEXT: tail call void @test9a() -; NOT_CGSCC_NPM-NEXT: tail call void @test9b(i8* noalias nocapture [[A]], i8* nocapture [[B]]) -; NOT_CGSCC_NPM-NEXT: tail call void @test9b(i8* noalias nocapture [[B]], i8* noalias nocapture [[A]]) -; NOT_CGSCC_NPM-NEXT: tail call void @test9c(i8* noalias nocapture [[A]], i8* nocapture [[B]], i8* nocapture [[B]]) -; NOT_CGSCC_NPM-NEXT: tail call void @test9c(i8* noalias nocapture [[B]], i8* noalias nocapture [[A]], i8* noalias nocapture [[A]]) -; NOT_CGSCC_NPM-NEXT: ret void -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@test9_helper -; IS__CGSCC____-SAME: (i8* nocapture [[A:%.*]], i8* nocapture [[B:%.*]]) -; IS__CGSCC____-NEXT: tail call void @test9a() -; IS__CGSCC____-NEXT: tail call void @test9a() -; IS__CGSCC____-NEXT: tail call void @test9b(i8* noalias [[A]], i8* [[B]]) -; IS__CGSCC____-NEXT: tail call void @test9b(i8* noalias [[B]], i8* noalias [[A]]) -; IS__CGSCC____-NEXT: tail call void @test9c(i8* noalias [[A]], i8* [[B]], i8* [[B]]) -; IS__CGSCC____-NEXT: tail call void @test9c(i8* noalias nocapture [[B]], i8* noalias [[A]], i8* noalias nocapture [[A]]) -; IS__CGSCC____-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@test9_helper +; CHECK-SAME: (i8* nocapture [[A:%.*]], i8* nocapture [[B:%.*]]) +; CHECK-NEXT: tail call void @test9a() +; CHECK-NEXT: tail call void @test9a() +; CHECK-NEXT: tail call void @test9b(i8* noalias nocapture [[A]], i8* nocapture [[B]]) +; CHECK-NEXT: tail call void @test9b(i8* noalias nocapture [[B]], i8* noalias nocapture [[A]]) +; CHECK-NEXT: tail call void @test9c(i8* noalias nocapture [[A]], i8* nocapture [[B]], i8* nocapture [[B]]) +; CHECK-NEXT: tail call void @test9c(i8* noalias nocapture [[B]], i8* noalias nocapture [[A]], i8* noalias nocapture [[A]]) +; CHECK-NEXT: ret void ; tail call void @test9a(i8* noalias %a, i8* %b) tail call void @test9a(i8* noalias %b, i8* noalias %a) @@ -342,23 +332,14 @@ define void @test11(i8* noalias %a) { declare void @use_nocapture(i8* nocapture) declare void @use(i8*) define void @test12_1() { -; IS________OPM-LABEL: define {{[^@]+}}@test12_1() -; IS________OPM-NEXT: [[A:%.*]] = alloca i8, align 4 -; IS________OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @use_nocapture(i8* noalias nonnull align 4 dereferenceable(1) [[A]]) -; IS________OPM-NEXT: tail call void @use_nocapture(i8* noalias nonnull align 4 dereferenceable(1) [[A]]) -; IS________OPM-NEXT: tail call void @use_nocapture(i8* noalias [[B]]) -; IS________OPM-NEXT: tail call void @use_nocapture(i8* noalias [[B]]) -; IS________OPM-NEXT: ret void -; -; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_1() -; NOT_TUNIT_OPM-NEXT: [[A:%.*]] = alloca i8, align 4 -; NOT_TUNIT_OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) -; NOT_TUNIT_OPM-NEXT: tail call void @use_nocapture(i8* noalias nonnull align 4 dereferenceable(1) [[A]]) -; NOT_TUNIT_OPM-NEXT: tail call void @use_nocapture(i8* noalias nonnull align 4 dereferenceable(1) [[A]]) -; NOT_TUNIT_OPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[B]]) -; NOT_TUNIT_OPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[B]]) -; NOT_TUNIT_OPM-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@test12_1() +; CHECK-NEXT: [[A:%.*]] = alloca i8, align 4 +; CHECK-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) +; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture nonnull align 4 dereferenceable(1) [[A]]) +; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture nonnull align 4 dereferenceable(1) [[A]]) +; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[B]]) +; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[B]]) +; CHECK-NEXT: ret void ; %A = alloca i8, align 4 %B = tail call noalias i8* @malloc(i64 4) @@ -370,21 +351,13 @@ define void @test12_1() { } define void @test12_2(){ -; IS________OPM-LABEL: define {{[^@]+}}@test12_2() -; IS________OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[A]]) -; IS________OPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[A]]) -; IS________OPM-NEXT: tail call void @use(i8* [[A]]) -; IS________OPM-NEXT: tail call void @use_nocapture(i8* [[A]]) -; IS________OPM-NEXT: ret void -; -; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_2() -; NOT_TUNIT_OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; NOT_TUNIT_OPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[A]]) -; NOT_TUNIT_OPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[A]]) -; NOT_TUNIT_OPM-NEXT: tail call void @use(i8* [[A]]) -; NOT_TUNIT_OPM-NEXT: tail call void @use_nocapture(i8* nocapture [[A]]) -; NOT_TUNIT_OPM-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@test12_2() +; CHECK-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) +; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[A]]) +; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[A]]) +; CHECK-NEXT: tail call void @use(i8* [[A]]) +; CHECK-NEXT: tail call void @use_nocapture(i8* nocapture [[A]]) +; CHECK-NEXT: ret void ; ; FIXME: This should be @use_nocapture(i8* noalias [[A]]) ; FIXME: This should be @use_nocapture(i8* noalias nocapture [[A]]) @@ -398,15 +371,10 @@ define void @test12_2(){ declare void @two_args(i8* nocapture , i8* nocapture) define void @test12_3(){ -; IS________OPM-LABEL: define {{[^@]+}}@test12_3() -; IS________OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @two_args(i8* [[A]], i8* [[A]]) -; IS________OPM-NEXT: ret void -; -; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_3() -; NOT_TUNIT_OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A]]) -; NOT_TUNIT_OPM-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@test12_3() +; CHECK-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) +; CHECK-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A]]) +; CHECK-NEXT: ret void ; %A = tail call noalias i8* @malloc(i64 4) tail call void @two_args(i8* %A, i8* %A) @@ -420,10 +388,10 @@ define void @test12_4(){ ; IS________OPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 ; IS________OPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 ; IS________OPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 -; IS________OPM-NEXT: tail call void @two_args(i8* [[A]], i8* [[B]]) -; IS________OPM-NEXT: tail call void @two_args(i8* [[A]], i8* [[A_0]]) -; IS________OPM-NEXT: tail call void @two_args(i8* [[A]], i8* [[A_1]]) -; IS________OPM-NEXT: tail call void @two_args(i8* [[A_0]], i8* [[B_0]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[B]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) ; IS________OPM-NEXT: ret void ; ; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_4() @@ -488,23 +456,14 @@ define void @test13_use_noalias(){ } define void @test13_use_alias(){ -; IS________OPM-LABEL: define {{[^@]+}}@test13_use_alias() -; IS________OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* -; IS________OPM-NEXT: [[C2A:%.*]] = bitcast i16* [[C1]] to i8* -; IS________OPM-NEXT: [[C2B:%.*]] = bitcast i16* [[C1]] to i8* -; IS________OPM-NEXT: call void @use_i8_internal(i8* [[C2A]]) -; IS________OPM-NEXT: call void @use_i8_internal(i8* [[C2B]]) -; IS________OPM-NEXT: ret void -; -; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test13_use_alias() -; NOT_TUNIT_OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) -; NOT_TUNIT_OPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* -; NOT_TUNIT_OPM-NEXT: [[C2A:%.*]] = bitcast i16* [[C1]] to i8* -; NOT_TUNIT_OPM-NEXT: [[C2B:%.*]] = bitcast i16* [[C1]] to i8* -; NOT_TUNIT_OPM-NEXT: call void @use_i8_internal(i8* nocapture [[C2A]]) -; NOT_TUNIT_OPM-NEXT: call void @use_i8_internal(i8* nocapture [[C2B]]) -; NOT_TUNIT_OPM-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@test13_use_alias() +; CHECK-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) +; CHECK-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* +; CHECK-NEXT: [[C2A:%.*]] = bitcast i16* [[C1]] to i8* +; CHECK-NEXT: [[C2B:%.*]] = bitcast i16* [[C1]] to i8* +; CHECK-NEXT: call void @use_i8_internal(i8* nocapture [[C2A]]) +; CHECK-NEXT: call void @use_i8_internal(i8* nocapture [[C2B]]) +; CHECK-NEXT: ret void ; %m1 = tail call noalias i8* @malloc(i64 4) %c1 = bitcast i8* %m1 to i16* @@ -537,7 +496,7 @@ define i32 @i2p(i32* %arg) { ; NOT_CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) ; NOT_CGSCC_NPM-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* ; NOT_CGSCC_NPM-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* -; NOT_CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nofree readonly align 4 [[BC]]) +; NOT_CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) ; NOT_CGSCC_NPM-NEXT: ret i32 [[CALL]] ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@i2p @@ -545,7 +504,7 @@ define i32 @i2p(i32* %arg) { ; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) ; IS__CGSCC____-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* ; IS__CGSCC____-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nofree nonnull readonly align 4 dereferenceable(4) [[BC]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[BC]]) ; IS__CGSCC____-NEXT: ret i32 [[CALL]] ; %c = call i32 @p2i(i32* %arg) @@ -579,11 +538,11 @@ define internal fastcc double @strtox(i8* %s, i8** %p, i32 %prec) unnamed_addr { ; NOT_CGSCC_NPM-NEXT: entry: ; NOT_CGSCC_NPM-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 ; NOT_CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; NOT_CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 144, i8* nonnull align 8 dereferenceable(240) [[TMP0]]) +; NOT_CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 144, i8* nocapture nonnull align 8 dereferenceable(240) [[TMP0]]) ; NOT_CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) ; NOT_CGSCC_NPM-NEXT: call void @__shlim(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i64 0) ; NOT_CGSCC_NPM-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i32 1, i32 1) -; NOT_CGSCC_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 144, i8* nonnull align 8 dereferenceable(240) [[TMP0]]) +; NOT_CGSCC_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 144, i8* nocapture nonnull align 8 dereferenceable(240) [[TMP0]]) ; NOT_CGSCC_NPM-NEXT: ret double [[CALL1]] ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@strtox @@ -591,11 +550,11 @@ define internal fastcc double @strtox(i8* %s, i8** %p, i32 %prec) unnamed_addr { ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 ; IS__CGSCC____-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; IS__CGSCC____-NEXT: call void @llvm.lifetime.start.p0i8(i64 144, i8* nonnull align 8 dereferenceable(240) [[TMP0]]) +; IS__CGSCC____-NEXT: call void @llvm.lifetime.start.p0i8(i64 144, i8* nocapture nonnull align 8 dereferenceable(240) [[TMP0]]) ; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) ; IS__CGSCC____-NEXT: call void @__shlim(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i64 0) ; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i32 1, i32 1) -; IS__CGSCC____-NEXT: call void @llvm.lifetime.end.p0i8(i64 144, i8* nonnull align 8 dereferenceable(240) [[TMP0]]) +; IS__CGSCC____-NEXT: call void @llvm.lifetime.end.p0i8(i64 144, i8* nocapture nonnull align 8 dereferenceable(240) [[TMP0]]) ; IS__CGSCC____-NEXT: ret double [[CALL1]] ; entry: @@ -731,7 +690,7 @@ define internal void @test16_sub(i32* noalias %p, i32 %c1, i32 %c2) { ; NOT_CGSCC_NPM-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 ; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] ; NOT_CGSCC_NPM: if.then2: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nofree writeonly align 4 [[P]]) +; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) ; NOT_CGSCC_NPM-NEXT: br label [[IF_END3]] ; NOT_CGSCC_NPM: if.end3: ; NOT_CGSCC_NPM-NEXT: ret void @@ -748,7 +707,7 @@ define internal void @test16_sub(i32* noalias %p, i32 %c1, i32 %c2) { ; IS__CGSCC____-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 ; IS__CGSCC____-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] ; IS__CGSCC____: if.then2: -; IS__CGSCC____-NEXT: tail call void @only_store(i32* nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) +; IS__CGSCC____-NEXT: tail call void @only_store(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) ; IS__CGSCC____-NEXT: br label [[IF_END3]] ; IS__CGSCC____: if.end3: ; IS__CGSCC____-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index aa908d5ce09f4..619bcbff0d318 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -104,27 +104,16 @@ define i1 @c5(i32* %q, i32 %bitno) { declare void @throw_if_bit_set(i8*, i8) readonly define i1 @c6(i8* %q, i8 %bit) personality i32 (...)* @__gxx_personality_v0 { -; IS__TUNIT____-LABEL: define {{[^@]+}}@c6 -; IS__TUNIT____-SAME: (i8* readonly [[Q:%.*]], i8 [[BIT:%.*]]) #5 personality i32 (...)* @__gxx_personality_v0 -; IS__TUNIT____-NEXT: invoke void @throw_if_bit_set(i8* readonly [[Q]], i8 [[BIT]]) -; IS__TUNIT____-NEXT: to label [[RET0:%.*]] unwind label [[RET1:%.*]] -; IS__TUNIT____: ret0: -; IS__TUNIT____-NEXT: ret i1 false -; IS__TUNIT____: ret1: -; IS__TUNIT____-NEXT: [[EXN:%.*]] = landingpad { i8*, i32 } -; IS__TUNIT____-NEXT: cleanup -; IS__TUNIT____-NEXT: ret i1 true -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@c6 -; IS__CGSCC____-SAME: (i8* readonly [[Q:%.*]], i8 [[BIT:%.*]]) #4 personality i32 (...)* @__gxx_personality_v0 -; IS__CGSCC____-NEXT: invoke void @throw_if_bit_set(i8* readonly [[Q]], i8 [[BIT]]) -; IS__CGSCC____-NEXT: to label [[RET0:%.*]] unwind label [[RET1:%.*]] -; IS__CGSCC____: ret0: -; IS__CGSCC____-NEXT: ret i1 false -; IS__CGSCC____: ret1: -; IS__CGSCC____-NEXT: [[EXN:%.*]] = landingpad { i8*, i32 } -; IS__CGSCC____-NEXT: cleanup -; IS__CGSCC____-NEXT: ret i1 true +; CHECK-LABEL: define {{[^@]+}}@c6 +; CHECK-SAME: (i8* readonly [[Q:%.*]], i8 [[BIT:%.*]]) #4 personality i32 (...)* @__gxx_personality_v0 +; CHECK-NEXT: invoke void @throw_if_bit_set(i8* readonly [[Q]], i8 [[BIT]]) +; CHECK-NEXT: to label [[RET0:%.*]] unwind label [[RET1:%.*]] +; CHECK: ret0: +; CHECK-NEXT: ret i1 false +; CHECK: ret1: +; CHECK-NEXT: [[EXN:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: ret i1 true ; invoke void @throw_if_bit_set(i8* %q, i8 %bit) to label %ret0 unwind label %ret1 @@ -272,7 +261,7 @@ define void @nc5(void (i8*)* %f, i8* %p) { define void @test1_1(i8* %x1_1, i8* %y1_1, i1 %c) { ; CHECK-LABEL: define {{[^@]+}}@test1_1 ; CHECK-SAME: (i8* nocapture nofree readnone [[X1_1:%.*]], i8* nocapture nofree readnone [[Y1_1:%.*]], i1 [[C:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i8* @test1_2(i8* noalias nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[Y1_1]], i1 [[C]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @test1_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[Y1_1]], i1 [[C]]) ; CHECK-NEXT: store i32* null, i32** @g, align 8 ; CHECK-NEXT: ret void ; @@ -286,7 +275,7 @@ define i8* @test1_2(i8* %x1_2, i8* %y1_2, i1 %c) { ; CHECK-SAME: (i8* nocapture nofree readnone [[X1_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y1_2:%.*]], i1 [[C:%.*]]) ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CHECK: t: -; CHECK-NEXT: call void @test1_1(i8* noalias nofree readnone undef, i8* noalias nocapture nofree readnone [[Y1_2]], i1 [[C]]) +; CHECK-NEXT: call void @test1_1(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone [[Y1_2]], i1 [[C]]) ; CHECK-NEXT: store i32* null, i32** @g, align 8 ; CHECK-NEXT: br label [[F]] ; CHECK: f: @@ -324,7 +313,7 @@ define void @test3(i8* %x3, i8* %y3, i8* %z3) { define void @test4_1(i8* %x4_1, i1 %c) { ; CHECK-LABEL: define {{[^@]+}}@test4_1 ; CHECK-SAME: (i8* nocapture nofree readnone [[X4_1:%.*]], i1 [[C:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i8* @test4_2(i8* noalias nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[X4_1]], i8* noalias nofree readnone undef, i1 [[C]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @test4_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[X4_1]], i8* noalias nocapture nofree readnone undef, i1 [[C]]) ; CHECK-NEXT: store i32* null, i32** @g, align 8 ; CHECK-NEXT: ret void ; @@ -338,7 +327,7 @@ define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2, i1 %c) { ; CHECK-SAME: (i8* nocapture nofree readnone [[X4_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y4_2:%.*]], i8* nocapture nofree readnone [[Z4_2:%.*]], i1 [[C:%.*]]) ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CHECK: t: -; CHECK-NEXT: call void @test4_1(i8* noalias nofree readnone align 536870912 null, i1 [[C]]) +; CHECK-NEXT: call void @test4_1(i8* noalias nocapture nofree readnone align 536870912 null, i1 [[C]]) ; CHECK-NEXT: store i32* null, i32** @g, align 8 ; CHECK-NEXT: br label [[F]] ; CHECK: f: @@ -556,7 +545,7 @@ declare void @unknown(i8*) define void @test_callsite() { ; CHECK-LABEL: define {{[^@]+}}@test_callsite() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @unknown(i8* noalias align 536870912 null) +; CHECK-NEXT: call void @unknown(i8* noalias nocapture align 536870912 null) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/nofree.ll b/llvm/test/Transforms/Attributor/nofree.ll index b9a2305ab5f87..e4faac06eb047 100644 --- a/llvm/test/Transforms/Attributor/nofree.ll +++ b/llvm/test/Transforms/Attributor/nofree.ll @@ -380,16 +380,16 @@ define void @nonnull_assume_call(i8* %arg1, i8* %arg2, i8* %arg3, i8* %arg4) { ; CHECK-LABEL: define {{[^@]+}}@nonnull_assume_call ; CHECK-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) ; CHECK-NEXT: call void @unknown(i8* [[ARG1]], i8* [[ARG2]], i8* [[ARG3]], i8* [[ARG4]]) -; CHECK-NEXT: call void @use_i8_ptr(i8* noalias readnone [[ARG1]]) -; CHECK-NEXT: call void @use_i8_ptr(i8* noalias readnone [[ARG2]]) +; CHECK-NEXT: call void @use_i8_ptr(i8* noalias nocapture readnone [[ARG1]]) +; CHECK-NEXT: call void @use_i8_ptr(i8* noalias nocapture readnone [[ARG2]]) ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nofree"(i8* [[ARG1]]), "nofree"(i8* [[ARG3]]) ] -; CHECK-NEXT: call void @use_i8_ptr(i8* noalias nofree readnone [[ARG3]]) -; CHECK-NEXT: call void @use_i8_ptr(i8* noalias readnone [[ARG4]]) -; CHECK-NEXT: call void @use_i8_ptr_ret(i8* noalias nofree readnone [[ARG1]]) -; CHECK-NEXT: call void @use_i8_ptr_ret(i8* noalias readnone [[ARG2]]) +; CHECK-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree readnone [[ARG3]]) +; CHECK-NEXT: call void @use_i8_ptr(i8* noalias nocapture readnone [[ARG4]]) +; CHECK-NEXT: call void @use_i8_ptr_ret(i8* noalias nocapture nofree readnone [[ARG1]]) +; CHECK-NEXT: call void @use_i8_ptr_ret(i8* noalias nocapture readnone [[ARG2]]) ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nofree"(i8* [[ARG1]]), "nofree"(i8* [[ARG4]]) ] -; CHECK-NEXT: call void @use_i8_ptr_ret(i8* noalias nofree readnone [[ARG3]]) -; CHECK-NEXT: call void @use_i8_ptr_ret(i8* noalias nofree readnone [[ARG4]]) +; CHECK-NEXT: call void @use_i8_ptr_ret(i8* noalias nocapture nofree readnone [[ARG3]]) +; CHECK-NEXT: call void @use_i8_ptr_ret(i8* noalias nocapture nofree readnone [[ARG4]]) ; CHECK-NEXT: ret void ; call void @unknown(i8* %arg1, i8* %arg2, i8* %arg3, i8* %arg4) diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll index 965c37a5552c3..f509e0d53aa3a 100644 --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -31,6 +31,26 @@ define i8* @test2(i8* nonnull %p) { define i8* @test2A(i1 %c, i8* %ret) { ; ATTRIBUTOR: define nonnull i8* @test2A(i1 %c, i8* nofree nonnull readnone returned %ret) +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test2A +; NOT_CGSCC_OPM-SAME: (i1 [[C:%.*]], i8* nofree nonnull readnone returned "no-capture-maybe-returned" [[RET:%.*]]) +; NOT_CGSCC_OPM-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] +; NOT_CGSCC_OPM: A: +; NOT_CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) #11 [ "nonnull"(i8* [[RET]]) ] +; NOT_CGSCC_OPM-NEXT: ret i8* [[RET]] +; NOT_CGSCC_OPM: B: +; NOT_CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) #11 [ "nonnull"(i8* [[RET]]) ] +; NOT_CGSCC_OPM-NEXT: ret i8* [[RET]] +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test2A +; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]], i8* nofree nonnull readnone returned "no-capture-maybe-returned" [[RET:%.*]]) +; IS__CGSCC_OPM-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] +; IS__CGSCC_OPM: A: +; IS__CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) #12 [ "nonnull"(i8* [[RET]]) ] +; IS__CGSCC_OPM-NEXT: ret i8* [[RET]] +; IS__CGSCC_OPM: B: +; IS__CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) #12 [ "nonnull"(i8* [[RET]]) ] +; IS__CGSCC_OPM-NEXT: ret i8* [[RET]] +; br i1 %c, label %A, label %B A: call void @llvm.assume(i1 true) [ "nonnull"(i8* %ret) ] @@ -42,6 +62,26 @@ B: define i8* @test2B(i1 %c, i8* %ret) { ; ATTRIBUTOR: define nonnull dereferenceable(4) i8* @test2B(i1 %c, i8* nofree nonnull readnone returned dereferenceable(4) %ret) +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test2B +; NOT_CGSCC_OPM-SAME: (i1 [[C:%.*]], i8* nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[RET:%.*]]) +; NOT_CGSCC_OPM-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] +; NOT_CGSCC_OPM: A: +; NOT_CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) #11 [ "dereferenceable"(i8* [[RET]], i32 4) ] +; NOT_CGSCC_OPM-NEXT: ret i8* [[RET]] +; NOT_CGSCC_OPM: B: +; NOT_CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) #11 [ "dereferenceable"(i8* [[RET]], i32 4) ] +; NOT_CGSCC_OPM-NEXT: ret i8* [[RET]] +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test2B +; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]], i8* nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[RET:%.*]]) +; IS__CGSCC_OPM-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] +; IS__CGSCC_OPM: A: +; IS__CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) #12 [ "dereferenceable"(i8* [[RET]], i32 4) ] +; IS__CGSCC_OPM-NEXT: ret i8* [[RET]] +; IS__CGSCC_OPM: B: +; IS__CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) #12 [ "dereferenceable"(i8* [[RET]], i32 4) ] +; IS__CGSCC_OPM-NEXT: ret i8* [[RET]] +; br i1 %c, label %A, label %B A: call void @llvm.assume(i1 true) [ "dereferenceable"(i8* %ret, i32 4) ] @@ -268,8 +308,8 @@ define void @test13_helper() { ; CHECK-LABEL: define {{[^@]+}}@test13_helper() ; CHECK-NEXT: [[NONNULLPTR:%.*]] = tail call nonnull i8* @ret_nonnull() ; CHECK-NEXT: [[MAYBENULLPTR:%.*]] = tail call i8* @unknown() -; CHECK-NEXT: tail call void @test13(i8* noalias nofree nonnull readnone [[NONNULLPTR]], i8* noalias nofree nonnull readnone [[NONNULLPTR]], i8* noalias nofree readnone [[MAYBENULLPTR]]) -; CHECK-NEXT: tail call void @test13(i8* noalias nofree nonnull readnone [[NONNULLPTR]], i8* noalias nofree readnone [[MAYBENULLPTR]], i8* noalias nofree nonnull readnone [[NONNULLPTR]]) +; CHECK-NEXT: tail call void @test13(i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree readnone [[MAYBENULLPTR]]) +; CHECK-NEXT: tail call void @test13(i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree readnone [[MAYBENULLPTR]], i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]]) ; CHECK-NEXT: ret void ; %nonnullptr = tail call i8* @ret_nonnull() @@ -688,7 +728,7 @@ declare i32 @esfp(...) define i1 @parent8(i8* %a, i8* %bogus1, i8* %b) personality i8* bitcast (i32 (...)* @esfp to i8*){ ; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@parent8 -; NOT_CGSCC_OPM-SAME: (i8* nonnull [[A:%.*]], i8* nocapture nofree readnone [[BOGUS1:%.*]], i8* nonnull [[B:%.*]]) {{#[0-9]+}} personality i8* bitcast (i32 (...)* @esfp to i8*) +; NOT_CGSCC_OPM-SAME: (i8* nonnull [[A:%.*]], i8* nocapture nofree readnone [[BOGUS1:%.*]], i8* nonnull [[B:%.*]]) #4 personality i8* bitcast (i32 (...)* @esfp to i8*) ; NOT_CGSCC_OPM-NEXT: entry: ; NOT_CGSCC_OPM-NEXT: invoke void @use2nonnull(i8* nonnull [[A]], i8* nonnull [[B]]) ; NOT_CGSCC_OPM-NEXT: to label [[CONT:%.*]] unwind label [[EXC:%.*]] @@ -701,7 +741,7 @@ define i1 @parent8(i8* %a, i8* %bogus1, i8* %b) personality i8* bitcast (i32 (.. ; NOT_CGSCC_OPM-NEXT: unreachable ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@parent8 -; IS__CGSCC_OPM-SAME: (i8* nonnull [[A:%.*]], i8* nocapture nofree readnone [[BOGUS1:%.*]], i8* nonnull [[B:%.*]]) {{#[0-9]+}} personality i8* bitcast (i32 (...)* @esfp to i8*) +; IS__CGSCC_OPM-SAME: (i8* nonnull [[A:%.*]], i8* nocapture nofree readnone [[BOGUS1:%.*]], i8* nonnull [[B:%.*]]) #5 personality i8* bitcast (i32 (...)* @esfp to i8*) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: invoke void @use2nonnull(i8* nonnull [[A]], i8* nonnull [[B]]) ; IS__CGSCC_OPM-NEXT: to label [[CONT:%.*]] unwind label [[EXC:%.*]] @@ -804,7 +844,7 @@ define internal void @called_by_weak(i32* %a) { define weak_odr void @weak_caller(i32* nonnull %a) { ; CHECK-LABEL: define {{[^@]+}}@weak_caller ; CHECK-SAME: (i32* nonnull [[A:%.*]]) -; CHECK-NEXT: call void @called_by_weak(i32* noalias nonnull readnone [[A]]) +; CHECK-NEXT: call void @called_by_weak(i32* noalias nocapture nonnull readnone [[A]]) ; CHECK-NEXT: ret void ; call void @called_by_weak(i32* %a) @@ -850,7 +890,7 @@ define void @make_live(i32* nonnull dereferenceable(8) %a) { ; CHECK-LABEL: define {{[^@]+}}@make_live ; CHECK-SAME: (i32* nonnull align 16 dereferenceable(8) [[A:%.*]]) ; CHECK-NEXT: call void @naked(i32* nonnull align 16 dereferenceable(8) [[A]]) -; CHECK-NEXT: call void @control(i32* noalias nonnull readnone align 16 dereferenceable(8) [[A]]) +; CHECK-NEXT: call void @control(i32* noalias nocapture nonnull readnone align 16 dereferenceable(8) [[A]]) ; CHECK-NEXT: call void @optnone(i32* nonnull align 16 dereferenceable(8) [[A]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll index 9c6ae894766aa..60050b6b497ed 100644 --- a/llvm/test/Transforms/Attributor/range.ll +++ b/llvm/test/Transforms/Attributor/range.ll @@ -17,10 +17,15 @@ define i32 @test0(i32* %p) { } define i32 @test0-range-check(i32* %p) { -; IS__TUNIT____-LABEL: define {{[^@]+}}@test0-range-check -; IS__TUNIT____-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) -; IS__TUNIT____-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #4, !range !0 -; IS__TUNIT____-NEXT: ret i32 [[A]] +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test0-range-check +; IS__TUNIT_OPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) +; IS__TUNIT_OPM-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #3, !range !0 +; IS__TUNIT_OPM-NEXT: ret i32 [[A]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test0-range-check +; IS__TUNIT_NPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) +; IS__TUNIT_NPM-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #2, !range !0 +; IS__TUNIT_NPM-NEXT: ret i32 [[A]] ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@test0-range-check ; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) @@ -45,52 +50,99 @@ define void @use3(i1, i1, i1) { ; TEST0 icmp test define void @test0-icmp-check(i32* %p){ ; ret = [0, 10) -; IS__TUNIT____-LABEL: define {{[^@]+}}@test0-icmp-check -; IS__TUNIT____-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) -; IS__TUNIT____-NEXT: [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #4, !range !0 -; IS__TUNIT____-NEXT: [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9 -; IS__TUNIT____-NEXT: [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8 -; IS__TUNIT____-NEXT: [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1 -; IS__TUNIT____-NEXT: [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0 -; IS__TUNIT____-NEXT: tail call void @use3(i1 false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]]) -; IS__TUNIT____-NEXT: tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 false) -; IS__TUNIT____-NEXT: [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9 -; IS__TUNIT____-NEXT: [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8 -; IS__TUNIT____-NEXT: [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1 -; IS__TUNIT____-NEXT: [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0 -; IS__TUNIT____-NEXT: tail call void @use3(i1 true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]]) -; IS__TUNIT____-NEXT: tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 true) -; IS__TUNIT____-NEXT: [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8 -; IS__TUNIT____-NEXT: [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1 -; IS__TUNIT____-NEXT: [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0 -; IS__TUNIT____-NEXT: tail call void @use3(i1 false, i1 false, i1 [[CMP_UGT_3]]) -; IS__TUNIT____-NEXT: tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 false) -; IS__TUNIT____-NEXT: [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9 -; IS__TUNIT____-NEXT: [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8 -; IS__TUNIT____-NEXT: [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1 -; IS__TUNIT____-NEXT: tail call void @use3(i1 false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]]) -; IS__TUNIT____-NEXT: tail call void @use3(i1 [[CMP_UGE_4]], i1 true, i1 false) -; IS__TUNIT____-NEXT: [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8 -; IS__TUNIT____-NEXT: [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1 -; IS__TUNIT____-NEXT: [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0 -; IS__TUNIT____-NEXT: tail call void @use3(i1 false, i1 false, i1 [[CMP_SGT_3]]) -; IS__TUNIT____-NEXT: tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 true) -; IS__TUNIT____-NEXT: [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9 -; IS__TUNIT____-NEXT: [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8 -; IS__TUNIT____-NEXT: [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1 -; IS__TUNIT____-NEXT: tail call void @use3(i1 false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]]) -; IS__TUNIT____-NEXT: tail call void @use3(i1 [[CMP_GTE_4]], i1 true, i1 true) -; IS__TUNIT____-NEXT: [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9 -; IS__TUNIT____-NEXT: [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8 -; IS__TUNIT____-NEXT: [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1 -; IS__TUNIT____-NEXT: tail call void @use3(i1 true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]]) -; IS__TUNIT____-NEXT: tail call void @use3(i1 [[CMP_SLT_4]], i1 false, i1 false) -; IS__TUNIT____-NEXT: [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8 -; IS__TUNIT____-NEXT: [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1 -; IS__TUNIT____-NEXT: [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0 -; IS__TUNIT____-NEXT: tail call void @use3(i1 true, i1 true, i1 [[CMP_LTE_3]]) -; IS__TUNIT____-NEXT: tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 false) -; IS__TUNIT____-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test0-icmp-check +; IS__TUNIT_OPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) +; IS__TUNIT_OPM-NEXT: [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #3, !range !0 +; IS__TUNIT_OPM-NEXT: [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9 +; IS__TUNIT_OPM-NEXT: [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8 +; IS__TUNIT_OPM-NEXT: [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1 +; IS__TUNIT_OPM-NEXT: [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0 +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]]) +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 false) +; IS__TUNIT_OPM-NEXT: [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9 +; IS__TUNIT_OPM-NEXT: [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8 +; IS__TUNIT_OPM-NEXT: [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1 +; IS__TUNIT_OPM-NEXT: [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0 +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]]) +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 true) +; IS__TUNIT_OPM-NEXT: [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8 +; IS__TUNIT_OPM-NEXT: [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1 +; IS__TUNIT_OPM-NEXT: [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0 +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 false, i1 false, i1 [[CMP_UGT_3]]) +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 false) +; IS__TUNIT_OPM-NEXT: [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9 +; IS__TUNIT_OPM-NEXT: [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8 +; IS__TUNIT_OPM-NEXT: [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1 +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]]) +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 [[CMP_UGE_4]], i1 true, i1 false) +; IS__TUNIT_OPM-NEXT: [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8 +; IS__TUNIT_OPM-NEXT: [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1 +; IS__TUNIT_OPM-NEXT: [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0 +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 false, i1 false, i1 [[CMP_SGT_3]]) +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 true) +; IS__TUNIT_OPM-NEXT: [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9 +; IS__TUNIT_OPM-NEXT: [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8 +; IS__TUNIT_OPM-NEXT: [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1 +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]]) +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 [[CMP_GTE_4]], i1 true, i1 true) +; IS__TUNIT_OPM-NEXT: [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9 +; IS__TUNIT_OPM-NEXT: [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8 +; IS__TUNIT_OPM-NEXT: [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1 +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]]) +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 [[CMP_SLT_4]], i1 false, i1 false) +; IS__TUNIT_OPM-NEXT: [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8 +; IS__TUNIT_OPM-NEXT: [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1 +; IS__TUNIT_OPM-NEXT: [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0 +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 true, i1 true, i1 [[CMP_LTE_3]]) +; IS__TUNIT_OPM-NEXT: tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 false) +; IS__TUNIT_OPM-NEXT: ret void +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test0-icmp-check +; IS__TUNIT_NPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) +; IS__TUNIT_NPM-NEXT: [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #2, !range !0 +; IS__TUNIT_NPM-NEXT: [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9 +; IS__TUNIT_NPM-NEXT: [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8 +; IS__TUNIT_NPM-NEXT: [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1 +; IS__TUNIT_NPM-NEXT: [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0 +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]]) +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 false) +; IS__TUNIT_NPM-NEXT: [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9 +; IS__TUNIT_NPM-NEXT: [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8 +; IS__TUNIT_NPM-NEXT: [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1 +; IS__TUNIT_NPM-NEXT: [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0 +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]]) +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 true) +; IS__TUNIT_NPM-NEXT: [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8 +; IS__TUNIT_NPM-NEXT: [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1 +; IS__TUNIT_NPM-NEXT: [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0 +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 false, i1 false, i1 [[CMP_UGT_3]]) +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 false) +; IS__TUNIT_NPM-NEXT: [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9 +; IS__TUNIT_NPM-NEXT: [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8 +; IS__TUNIT_NPM-NEXT: [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1 +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]]) +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 [[CMP_UGE_4]], i1 true, i1 false) +; IS__TUNIT_NPM-NEXT: [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8 +; IS__TUNIT_NPM-NEXT: [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1 +; IS__TUNIT_NPM-NEXT: [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0 +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 false, i1 false, i1 [[CMP_SGT_3]]) +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 true) +; IS__TUNIT_NPM-NEXT: [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9 +; IS__TUNIT_NPM-NEXT: [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8 +; IS__TUNIT_NPM-NEXT: [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1 +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]]) +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 [[CMP_GTE_4]], i1 true, i1 true) +; IS__TUNIT_NPM-NEXT: [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9 +; IS__TUNIT_NPM-NEXT: [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8 +; IS__TUNIT_NPM-NEXT: [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1 +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]]) +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 [[CMP_SLT_4]], i1 false, i1 false) +; IS__TUNIT_NPM-NEXT: [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8 +; IS__TUNIT_NPM-NEXT: [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1 +; IS__TUNIT_NPM-NEXT: [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0 +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 true, i1 true, i1 [[CMP_LTE_3]]) +; IS__TUNIT_NPM-NEXT: tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 false) +; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@test0-icmp-check ; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) @@ -259,11 +311,17 @@ define i32 @test1(i32* %p) { define i1 @test1-check(i32* %p) { ; -; IS__TUNIT____-LABEL: define {{[^@]+}}@test1-check -; IS__TUNIT____-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) -; IS__TUNIT____-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #4, !range !2 -; IS__TUNIT____-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 500 -; IS__TUNIT____-NEXT: ret i1 [[CMP]] +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test1-check +; IS__TUNIT_OPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) +; IS__TUNIT_OPM-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #3, !range !2 +; IS__TUNIT_OPM-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 500 +; IS__TUNIT_OPM-NEXT: ret i1 [[CMP]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test1-check +; IS__TUNIT_NPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) +; IS__TUNIT_NPM-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #2, !range !2 +; IS__TUNIT_NPM-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 500 +; IS__TUNIT_NPM-NEXT: ret i1 [[CMP]] ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@test1-check ; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) @@ -566,7 +624,7 @@ define dso_local i32 @test4-g2(i32 %u) { ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test4-g2 ; IS__TUNIT_NPM-SAME: (i32 [[U:%.*]]) ; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #2, !range !3 +; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #1, !range !3 ; IS__TUNIT_NPM-NEXT: ret i32 [[CALL]] ; entry: @@ -929,8 +987,8 @@ define i1 @callee_range_2(i1 %c1, i1 %c2) { ; ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@callee_range_2 ; IS__TUNIT_OPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) -; IS__TUNIT_OPM-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #2, !range !4 -; IS__TUNIT_OPM-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #3, !range !4 +; IS__TUNIT_OPM-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #1, !range !4 +; IS__TUNIT_OPM-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #1, !range !4 ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = add i32 [[R1]], [[R2]] ; IS__TUNIT_OPM-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 3 ; IS__TUNIT_OPM-NEXT: [[I2:%.*]] = icmp sge i32 [[A]], 2 @@ -939,8 +997,8 @@ define i1 @callee_range_2(i1 %c1, i1 %c2) { ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_range_2 ; IS__TUNIT_NPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) -; IS__TUNIT_NPM-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #2, !range !5 -; IS__TUNIT_NPM-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #3, !range !5 +; IS__TUNIT_NPM-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #1, !range !5 +; IS__TUNIT_NPM-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #1, !range !5 ; IS__TUNIT_NPM-NEXT: [[A:%.*]] = add i32 [[R1]], [[R2]] ; IS__TUNIT_NPM-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 3 ; IS__TUNIT_NPM-NEXT: [[I2:%.*]] = icmp sge i32 [[A]], 2 diff --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll index 5daa45178c135..1ecee461fe5de 100644 --- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll @@ -36,14 +36,23 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: Function Attrs: argmemonly nofree nosync nounwind define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { -; CHECK-LABEL: define {{[^@]+}}@external_ret2_nrw -; CHECK-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree [[W0]]) -; CHECK-NEXT: [[CALL1:%.*]] = call i32* @internal_ret1_rrw(i32* nofree align 4 [[R0]], i32* nofree [[R0]], i32* nofree [[W0]]) -; CHECK-NEXT: [[CALL2:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nofree readonly align 4 [[R0]], i32* nofree writeonly [[W0]]) -; CHECK-NEXT: [[CALL3:%.*]] = call i32* @internal_ret1_rw(i32* nofree align 4 [[R0]], i32* nofree [[W0]]) -; CHECK-NEXT: ret i32* [[CALL3]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@external_ret2_nrw +; IS__TUNIT____-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]]) +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree [[W0]]) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32* @internal_ret1_rrw(i32* nofree align 4 [[R0]], i32* nofree [[R0]], i32* nofree [[W0]]) +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly align 4 [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) +; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32* @internal_ret1_rw(i32* nofree align 4 [[R0]], i32* nofree [[W0]]) +; IS__TUNIT____-NEXT: ret i32* [[CALL3]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@external_ret2_nrw +; IS__CGSCC____-SAME: (i32* nofree [[N0:%.*]], i32* nofree align 4 [[R0:%.*]], i32* nofree returned [[W0:%.*]]) +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree [[W0]]) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @internal_ret1_rrw(i32* nofree align 4 [[R0]], i32* nofree align 4 [[R0]], i32* nofree [[W0]]) +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly align 4 [[R0]], i32* nofree writeonly [[W0]]) +; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @internal_ret1_rw(i32* nofree align 4 [[R0]], i32* nofree [[W0]]) +; IS__CGSCC____-NEXT: ret i32* [[CALL3]] ; entry: %call = call i32* @internal_ret0_nw(i32* %n0, i32* %w0) @@ -71,8 +80,8 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { ; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) ; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) ; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) -; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) +; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) +; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) ; IS__TUNIT____-NEXT: [[CALL5:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) ; IS__TUNIT____-NEXT: br label [[RETURN]] ; IS__TUNIT____: return: @@ -95,8 +104,8 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { ; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) ; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) ; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) -; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) ; IS__CGSCC____-NEXT: [[CALL5:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) ; IS__CGSCC____-NEXT: br label [[RETURN]] ; IS__CGSCC____: return: @@ -131,32 +140,59 @@ return: ; preds = %if.end, %if.then ; CHECK: Function Attrs: argmemonly nofree nosync nounwind define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) { -; CHECK-LABEL: define {{[^@]+}}@internal_ret1_rrw -; CHECK-SAME: (i32* nofree nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree returned [[R1:%.*]], i32* nofree [[W0:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[R0]], align 4 -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[RETURN:%.*]] -; CHECK: if.end: -; CHECK-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree [[W0]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[R0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[R1]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] -; CHECK-NEXT: store i32 [[ADD]], i32* [[W0]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = call i32* @internal_ret1_rw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: [[CALL2:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: [[CALL3:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: [[CALL5:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: [[CALL6:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: [[CALL7:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: [[CALL8:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL8]], [[IF_END]] ], [ [[R1]], [[IF_THEN]] ] -; CHECK-NEXT: ret i32* undef +; IS__TUNIT____-LABEL: define {{[^@]+}}@internal_ret1_rrw +; IS__TUNIT____-SAME: (i32* nofree nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree returned [[R1:%.*]], i32* nofree [[W0:%.*]]) +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* [[R0]], align 4 +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: br label [[RETURN:%.*]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree [[W0]]) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* [[R0]], align 4 +; IS__TUNIT____-NEXT: [[TMP2:%.*]] = load i32, i32* [[R1]], align 4 +; IS__TUNIT____-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +; IS__TUNIT____-NEXT: store i32 [[ADD]], i32* [[W0]], align 4 +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32* @internal_ret1_rw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL5:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL6:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) +; IS__TUNIT____-NEXT: [[CALL7:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) +; IS__TUNIT____-NEXT: [[CALL8:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: br label [[RETURN]] +; IS__TUNIT____: return: +; IS__TUNIT____-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL8]], [[IF_END]] ], [ [[R1]], [[IF_THEN]] ] +; IS__TUNIT____-NEXT: ret i32* undef +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@internal_ret1_rrw +; IS__CGSCC____-SAME: (i32* nofree nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree returned [[R1:%.*]], i32* nofree [[W0:%.*]]) +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* [[R0]], align 4 +; IS__CGSCC____-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +; IS__CGSCC____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__CGSCC____: if.then: +; IS__CGSCC____-NEXT: br label [[RETURN:%.*]] +; IS__CGSCC____: if.end: +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree [[W0]]) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* [[R0]], align 4 +; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* [[R1]], align 4 +; IS__CGSCC____-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +; IS__CGSCC____-NEXT: store i32 [[ADD]], i32* [[W0]], align 4 +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @internal_ret1_rw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL5:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL6:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL7:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL8:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: br label [[RETURN]] +; IS__CGSCC____: return: +; IS__CGSCC____-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL8]], [[IF_END]] ], [ [[R1]], [[IF_THEN]] ] +; IS__CGSCC____-NEXT: ret i32* undef ; entry: %0 = load i32, i32* %r0, align 4 @@ -222,26 +258,47 @@ return: ; preds = %if.end, %if.then ; CHECK: Function Attrs: argmemonly nofree nosync nounwind define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) { -; CHECK-LABEL: define {{[^@]+}}@internal_ret1_rw -; CHECK-SAME: (i32* nofree nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree returned [[W0:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[R0]], align 4 -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[RETURN:%.*]] -; CHECK: if.end: -; CHECK-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree [[W0]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[R0]], align 4 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[W0]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: [[CALL2:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL4]], [[IF_END]] ], [ [[W0]], [[IF_THEN]] ] -; CHECK-NEXT: ret i32* [[RETVAL_0]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@internal_ret1_rw +; IS__TUNIT____-SAME: (i32* nofree nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree returned [[W0:%.*]]) +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* [[R0]], align 4 +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: br label [[RETURN:%.*]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree [[W0]]) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* [[R0]], align 4 +; IS__TUNIT____-NEXT: store i32 [[TMP1]], i32* [[W0]], align 4 +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) +; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: br label [[RETURN]] +; IS__TUNIT____: return: +; IS__TUNIT____-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL4]], [[IF_END]] ], [ [[W0]], [[IF_THEN]] ] +; IS__TUNIT____-NEXT: ret i32* [[RETVAL_0]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@internal_ret1_rw +; IS__CGSCC____-SAME: (i32* nofree nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree returned [[W0:%.*]]) +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* [[R0]], align 4 +; IS__CGSCC____-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +; IS__CGSCC____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__CGSCC____: if.then: +; IS__CGSCC____-NEXT: br label [[RETURN:%.*]] +; IS__CGSCC____: if.end: +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree [[W0]]) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* [[R0]], align 4 +; IS__CGSCC____-NEXT: store i32 [[TMP1]], i32* [[W0]], align 4 +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: br label [[RETURN]] +; IS__CGSCC____: return: +; IS__CGSCC____-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL4]], [[IF_END]] ], [ [[W0]], [[IF_THEN]] ] +; IS__CGSCC____-NEXT: ret i32* [[RETVAL_0]] ; entry: %0 = load i32, i32* %r0, align 4 @@ -276,10 +333,10 @@ define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { ; IS__TUNIT____-NEXT: ret i32* [[CALL1]] ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@external_source_ret2_nrw -; IS__CGSCC____-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]]) +; IS__CGSCC____-SAME: (i32* nofree [[N0:%.*]], i32* nofree align 4 [[R0:%.*]], i32* nofree returned [[W0:%.*]]) ; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nofree readonly [[R0]], i32* nofree writeonly [[W0]]) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly align 4 [[R0]], i32* nofree writeonly [[W0]]) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree align 4 [[R0]], i32* nofree [[W0]]) ; IS__CGSCC____-NEXT: ret i32* [[CALL1]] ; entry: diff --git a/llvm/test/Transforms/Attributor/returned.ll b/llvm/test/Transforms/Attributor/returned.ll index 03bbdecbe3980..22ed2920953b8 100644 --- a/llvm/test/Transforms/Attributor/returned.ll +++ b/llvm/test/Transforms/Attributor/returned.ll @@ -326,13 +326,13 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { ; IS__TUNIT____-NEXT: [[CMP2:%.*]] = icmp ult double* [[A]], [[B]] ; IS__TUNIT____-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]] ; IS__TUNIT____: if.then3: -; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone [[B]]) -; IS__TUNIT____-NEXT: [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone undef) +; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[B]]) +; IS__TUNIT____-NEXT: [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) ; IS__TUNIT____-NEXT: [[CALL6:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) -; IS__TUNIT____-NEXT: [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL6]], double* noalias nofree readnone undef) +; IS__TUNIT____-NEXT: [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL6]], double* noalias nocapture nofree readnone undef) ; IS__TUNIT____-NEXT: [[CALL8:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) ; IS__TUNIT____-NEXT: [[CALL9:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[CALL5]], double* noalias nofree readnone [[CALL7]], double* noalias nofree readnone [[CALL8]]) -; IS__TUNIT____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[CALL4]], double* noalias nofree readnone [[CALL9]], double* noalias nofree readnone undef) +; IS__TUNIT____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[CALL4]], double* noalias nofree readnone [[CALL9]], double* noalias nocapture nofree readnone undef) ; IS__TUNIT____-NEXT: br label [[RETURN]] ; IS__TUNIT____: if.end12: ; IS__TUNIT____-NEXT: [[CMP13:%.*]] = icmp eq double* [[A]], [[B]] @@ -363,12 +363,12 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { ; IS__CGSCC____-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]] ; IS__CGSCC____: if.then3: ; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone [[B]]) -; IS__CGSCC____-NEXT: [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone undef) +; IS__CGSCC____-NEXT: [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) ; IS__CGSCC____-NEXT: [[CALL6:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) -; IS__CGSCC____-NEXT: [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL6]], double* noalias nofree readnone undef) +; IS__CGSCC____-NEXT: [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL6]], double* noalias nocapture nofree readnone undef) ; IS__CGSCC____-NEXT: [[CALL8:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) ; IS__CGSCC____-NEXT: [[CALL9:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[CALL5]], double* noalias nofree readnone [[CALL7]], double* noalias nofree readnone [[CALL8]]) -; IS__CGSCC____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[CALL4]], double* noalias nofree readnone [[CALL9]], double* noalias nofree readnone undef) +; IS__CGSCC____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[CALL4]], double* noalias nofree readnone [[CALL9]], double* noalias nocapture nofree readnone undef) ; IS__CGSCC____-NEXT: br label [[RETURN]] ; IS__CGSCC____: if.end12: ; IS__CGSCC____-NEXT: [[CMP13:%.*]] = icmp eq double* [[A]], [[B]] @@ -473,17 +473,11 @@ entry: ; TEST another SCC test ; define i32* @rt2_helper(i32* %a) #0 { -; IS__TUNIT____-LABEL: define {{[^@]+}}@rt2_helper -; IS__TUNIT____-SAME: (i32* nofree readnone returned [[A:%.*]]) -; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @rt2(i32* noalias nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[A]]) -; IS__TUNIT____-NEXT: ret i32* [[CALL]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@rt2_helper -; IS__CGSCC____-SAME: (i32* nofree readnone returned [[A:%.*]]) -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @rt2(i32* noalias nofree readnone [[A]], i32* noalias nofree readnone [[A]]) -; IS__CGSCC____-NEXT: ret i32* [[CALL]] +; CHECK-LABEL: define {{[^@]+}}@rt2_helper +; CHECK-SAME: (i32* nofree readnone returned [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32* @rt2(i32* noalias nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[A]]) +; CHECK-NEXT: ret i32* [[CALL]] ; entry: %call = call i32* @rt2(i32* %a, i32* %a) diff --git a/llvm/test/Transforms/Attributor/returned_crash.ll b/llvm/test/Transforms/Attributor/returned_crash.ll index d9af2e1803404..6b4b9a32d79e1 100644 --- a/llvm/test/Transforms/Attributor/returned_crash.ll +++ b/llvm/test/Transforms/Attributor/returned_crash.ll @@ -1,9 +1,23 @@ ; RUN: opt -attributor -S %s | FileCheck %s ; RUN: opt -passes=attributor -S %s | FileCheck %s -; -; CHECK: define i32 addrspace(1)* @foo(i32 addrspace(4)* nofree readnone %arg) + +@var = internal global [1 x i32] undef + +; CHECK-LABEL: define i32 addrspace(1)* @foo(i32 addrspace(4)* nofree readnone %arg) define i32 addrspace(1)* @foo(i32 addrspace(4)* %arg) { entry: %0 = addrspacecast i32 addrspace(4)* %arg to i32 addrspace(1)* ret i32 addrspace(1)* %0 } + +define i32* @func1() { + %ptr = call i32* @func1a([1 x i32]* @var) + ret i32* %ptr +} + +; CHECK-LABEL: define internal nonnull align 4 dereferenceable(4) i32* @func1a() +; CHECK-NEXT: ret i32* getelementptr inbounds ([1 x i32], [1 x i32]* @var, i32 0, i32 0) +define internal i32* @func1a([1 x i32]* %arg) { + %ptr = getelementptr inbounds [1 x i32], [1 x i32]* %arg, i64 0, i64 0 + ret i32* %ptr +} diff --git a/llvm/test/Transforms/Attributor/undefined_behavior.ll b/llvm/test/Transforms/Attributor/undefined_behavior.ll index 53556fb6d2051..9a296b68a550b 100644 --- a/llvm/test/Transforms/Attributor/undefined_behavior.ll +++ b/llvm/test/Transforms/Attributor/undefined_behavior.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll index 11ddf9d2352ba..ad6e048353fa7 100644 --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -277,7 +277,7 @@ define internal i32* @test_inalloca(i32* inalloca %a) { } define i32* @complicated_args_inalloca() { ; CHECK-LABEL: define {{[^@]+}}@complicated_args_inalloca() -; CHECK-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nofree writeonly align 536870912 null) +; CHECK-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree writeonly align 536870912 null) ; CHECK-NEXT: ret i32* [[CALL]] ; %call = call i32* @test_inalloca(i32* null) @@ -304,12 +304,12 @@ define void @complicated_args_sret(%struct.X** %b) { ; ; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_sret ; IS__TUNIT____-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) -; IS__TUNIT____-NEXT: call void @test_sret(%struct.X* noalias nofree writeonly align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) +; IS__TUNIT____-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree writeonly align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@complicated_args_sret ; IS__CGSCC____-SAME: (%struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) -; IS__CGSCC____-NEXT: call void @test_sret(%struct.X* noalias nofree writeonly align 536870912 null, %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B]]) +; IS__CGSCC____-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree writeonly align 536870912 null, %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B]]) ; IS__CGSCC____-NEXT: ret void ; call void @test_sret(%struct.X* null, %struct.X** %b) @@ -329,7 +329,7 @@ define internal %struct.X* @test_nest(%struct.X* nest %a) { } define %struct.X* @complicated_args_nest() { ; CHECK-LABEL: define {{[^@]+}}@complicated_args_nest() -; CHECK-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nofree readnone align 536870912 null) +; CHECK-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree readnone align 536870912 null) ; CHECK-NEXT: ret %struct.X* [[CALL]] ; %call = call %struct.X* @test_nest(%struct.X* null) @@ -348,7 +348,7 @@ define internal void @test_byval(%struct.X* byval %a) { ; IS__CGSCC_NPM-SAME: (i8* nocapture nofree readnone [[TMP0:%.*]]) ; IS__CGSCC_NPM-NEXT: [[A_PRIV:%.*]] = alloca [[STRUCT_X:%.*]] ; IS__CGSCC_NPM-NEXT: [[A_PRIV_CAST:%.*]] = bitcast %struct.X* [[A_PRIV]] to i8** -; IS__CGSCC_NPM-NEXT: store i8* [[TMP0]], i8** [[A_PRIV_CAST]] +; IS__CGSCC_NPM-NEXT: store i8* [[TMP0]], i8** [[A_PRIV_CAST]], align 8 ; IS__CGSCC_NPM-NEXT: [[G0:%.*]] = getelementptr [[STRUCT_X]], %struct.X* [[A_PRIV]], i32 0, i32 0 ; IS__CGSCC_NPM-NEXT: store i8* null, i8** [[G0]], align 8 ; IS__CGSCC_NPM-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/willreturn.ll b/llvm/test/Transforms/Attributor/willreturn.ll index 8f8b3fb919b3d..1c6b2d5bc8f59 100644 --- a/llvm/test/Transforms/Attributor/willreturn.ll +++ b/llvm/test/Transforms/Attributor/willreturn.ll @@ -335,9 +335,7 @@ define void @f1() #0 { ret void } -; IS__TUNIT____: Function Attrs: noinline nounwind uwtable -; FIXME: Because we do not derive norecurse in the module run anymore, willreturn is missing as well. -; IS__TUNIT____-NOT: willreturn +; IS__TUNIT____: Function Attrs: noinline nounwind uwtable willreturn ; IS__CGSCC____: Function Attrs: noinline norecurse nounwind uwtable willreturn define void @f2() #0 { ; CHECK-LABEL: define {{[^@]+}}@f2() diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath-throwing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath-throwing.ll new file mode 100644 index 0000000000000..b9bdcfe324f94 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath-throwing.ll @@ -0,0 +1,171 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +declare void @readnone_may_throw() readnone + +declare void @use(i32 *) + +; Tests where the pointer/object is accessible after the function returns. + +; Cannot remove the store from the entry block, because the call in bb2 may throw. +define void @accessible_after_return_1(i32* noalias %P, i1 %c1) { +; CHECK-LABEL: @accessible_after_return_1( +; CHECK-NEXT: store i32 1, i32* [[P:%.*]] +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @readnone_may_throw() +; CHECK-NEXT: store i32 3, i32* [[P]] +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb5: +; CHECK-NEXT: call void @use(i32* [[P]]) +; CHECK-NEXT: ret void +; + store i32 1, i32* %P + br i1 %c1, label %bb1, label %bb2 + +bb1: + store i32 0, i32* %P + br label %bb5 + +bb2: + call void @readnone_may_throw() + store i32 3, i32* %P + br label %bb5 + +bb5: + call void @use(i32* %P) + ret void +} + +; Cannot remove the store from the entry block, because the call in bb3 may throw. +define void @accessible_after_return6(i32* %P, i1 %c.1, i1 %c.2) { +; CHECK-LABEL: @accessible_after_return6( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: br i1 [[C_1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 [[C_2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i32 1, i32* [[P]] +; CHECK-NEXT: ret void +; CHECK: bb3: +; CHECK-NEXT: call void @readnone_may_throw() +; CHECK-NEXT: store i32 2, i32* [[P]] +; CHECK-NEXT: ret void +; CHECK: bb4: +; CHECK-NEXT: store i32 3, i32* [[P]] +; CHECK-NEXT: ret void +; +entry: + store i32 0, i32* %P + br i1 %c.1, label %bb1, label %bb2 + +bb1: + br i1 %c.2, label %bb3, label %bb4 + +bb2: + store i32 1, i32* %P + ret void + +bb3: + call void @readnone_may_throw() + store i32 2, i32* %P + ret void + +bb4: + store i32 3, i32* %P + ret void +} + +; Tests where the pointer/object is *NOT* accessible after the function returns. + +; The store in the entry block can be eliminated, because it is overwritten +; on all paths to the exit. As the location is not visible to the caller, the +; call in bb2 (which may throw) can be ignored. +define void @alloca_1(i1 %c1) { +; CHECK-LABEL: @alloca_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P:%.*]] = alloca i32 +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @readnone_may_throw() +; CHECK-NEXT: store i32 3, i32* [[P]] +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb5: +; CHECK-NEXT: call void @use(i32* [[P]]) +; CHECK-NEXT: ret void +; +entry: + %P = alloca i32 + store i32 1, i32* %P + br i1 %c1, label %bb1, label %bb2 + +bb1: + store i32 0, i32* %P + br label %bb5 + +bb2: + call void @readnone_may_throw() + store i32 3, i32* %P + br label %bb5 + +bb5: + call void @use(i32* %P) + ret void +} + +; The store in the entry block can be eliminated, because it is overwritten +; on all paths to the exit. As the location is not visible to the caller, the +; call in bb3 (which may throw) can be ignored. +define void @alloca_2(i1 %c.1, i1 %c.2) { +; CHECK-LABEL: @alloca_2( +; CHECK-NEXT: [[P:%.*]] = alloca i32 +; CHECK-NEXT: br i1 [[C_1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br i1 [[C_2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]] +; CHECK: bb3: +; CHECK-NEXT: call void @readnone_may_throw() +; CHECK-NEXT: store i32 3, i32* [[P]] +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb4: +; CHECK-NEXT: store i32 5, i32* [[P]] +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb5: +; CHECK-NEXT: call void @use(i32* [[P]]) +; CHECK-NEXT: ret void +; + %P = alloca i32 + store i32 1, i32* %P + br i1 %c.1, label %bb1, label %bb2 + +bb1: + store i32 0, i32* %P + br label %bb5 + +bb2: + br i1 %c.2, label %bb3, label %bb4 + +bb3: + call void @readnone_may_throw() + store i32 3, i32* %P + br label %bb5 + +bb4: + store i32 5, i32* %P + br label %bb5 + +bb5: + call void @use(i32* %P) + ret void +} diff --git a/llvm/test/Transforms/Inline/print-instructions-deltas-unfinished.ll b/llvm/test/Transforms/Inline/print-instructions-deltas-unfinished.ll new file mode 100644 index 0000000000000..cd7ef830460ca --- /dev/null +++ b/llvm/test/Transforms/Inline/print-instructions-deltas-unfinished.ll @@ -0,0 +1,22 @@ +; Require asserts for -debug-only +; REQUIRES: asserts + +; This test ensures that the hadling of instructions which were not analyzed by +; '-print-instruction-deltas' flag due to the early exit was done correctly. + +; RUN: opt < %s -inline -debug-only=inline-cost -disable-output -print-instruction-deltas -inline-threshold=0 2>&1 | FileCheck %s + +; CHECK: No analysis for the instruction +; CHECK: ret void + +declare void @callee1() + +define void @bar() { + call void @callee1() + ret void +} + +define void @foo() { + call void @bar() + ret void +} diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index 66d58485a7940..6f33e83ee3362 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -525,6 +525,69 @@ not_taken: ret i1 %control } + +define void @always_true_assumption() { +; CHECK-LABEL: @always_true_assumption( +; CHECK-NEXT: ret void +; + call void @llvm.assume(i1 true) + ret void + +} + +; The alloca guarantees that the low bits of %a are zero because of alignment. +; The assume says the opposite. Make sure we don't crash. + +define i64 @PR31809() { +; CHECK-LABEL: @PR31809( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[T1:%.*]] = ptrtoint i32* [[A]] to i64 +; CHECK-NEXT: call void @llvm.assume(i1 false) +; CHECK-NEXT: ret i64 [[T1]] +; + %a = alloca i32 + %t1 = ptrtoint i32* %a to i64 + %cond = icmp eq i64 %t1, 3 + call void @llvm.assume(i1 %cond) + ret i64 %t1 +} + +; Similar to above: there's no way to know which assumption is truthful, +; so just don't crash. + +define i8 @conflicting_assumptions(i8 %x){ +; CHECK-LABEL: @conflicting_assumptions( +; CHECK-NEXT: call void @llvm.assume(i1 false) +; CHECK-NEXT: [[COND2:%.*]] = icmp eq i8 [[X:%.*]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]]) +; CHECK-NEXT: ret i8 5 +; + %add = add i8 %x, 1 + %cond1 = icmp eq i8 %x, 3 + call void @llvm.assume(i1 %cond1) + %cond2 = icmp eq i8 %x, 4 + call void @llvm.assume(i1 %cond2) + ret i8 %add +} + +; Another case of conflicting assumptions. This would crash because we'd +; try to set more known bits than existed in the known bits struct. + +define void @PR36270(i32 %b) { +; CHECK-LABEL: @PR36270( +; CHECK-NEXT: tail call void @llvm.assume(i1 false) +; CHECK-NEXT: unreachable +; + %B7 = xor i32 -1, 2147483647 + %and1 = and i32 %b, 3 + %B12 = lshr i32 %B7, %and1 + %C1 = icmp ult i32 %and1, %B12 + tail call void @llvm.assume(i1 %C1) + %cmp2 = icmp eq i32 0, %B12 + tail call void @llvm.assume(i1 %cmp2) + unreachable +} + declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} diff --git a/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll b/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll index 50cbd763987d3..4d39651af34d5 100644 --- a/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll @@ -1,8 +1,9 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s define double @a(<1 x i64> %y) { ; CHECK-LABEL: @a( -; CHECK-NEXT: [[BC:%.*]] = bitcast <1 x i64> %y to <1 x double> +; CHECK-NEXT: [[BC:%.*]] = bitcast <1 x i64> [[Y:%.*]] to <1 x double> ; CHECK-NEXT: [[C:%.*]] = extractelement <1 x double> [[BC]], i32 0 ; CHECK-NEXT: ret double [[C]] ; @@ -12,7 +13,7 @@ define double @a(<1 x i64> %y) { define i64 @b(<1 x i64> %y) { ; CHECK-LABEL: @b( -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> %y, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[Y:%.*]], i32 0 ; CHECK-NEXT: ret i64 [[TMP1]] ; %c = bitcast <1 x i64> %y to i64 @@ -21,7 +22,7 @@ define i64 @b(<1 x i64> %y) { define <1 x i64> @c(double %y) { ; CHECK-LABEL: @c( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double %y to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[Y:%.*]] to i64 ; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0 ; CHECK-NEXT: ret <1 x i64> [[C]] ; @@ -31,7 +32,7 @@ define <1 x i64> @c(double %y) { define <1 x i64> @d(i64 %y) { ; CHECK-LABEL: @d( -; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 %y, i32 0 +; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[Y:%.*]], i32 0 ; CHECK-NEXT: ret <1 x i64> [[C]] ; %c = bitcast i64 %y to <1 x i64> @@ -40,7 +41,7 @@ define <1 x i64> @d(i64 %y) { define x86_mmx @e(<1 x i64> %y) { ; CHECK-LABEL: @e( -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> %y, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[Y:%.*]], i32 0 ; CHECK-NEXT: [[C:%.*]] = bitcast i64 [[TMP1]] to x86_mmx ; CHECK-NEXT: ret x86_mmx [[C]] ; @@ -50,7 +51,7 @@ define x86_mmx @e(<1 x i64> %y) { define <1 x i64> @f(x86_mmx %y) { ; CHECK-LABEL: @f( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast x86_mmx %y to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast x86_mmx [[Y:%.*]] to i64 ; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0 ; CHECK-NEXT: ret <1 x i64> [[C]] ; @@ -61,7 +62,7 @@ define <1 x i64> @f(x86_mmx %y) { define double @g(x86_mmx %x) { ; CHECK-LABEL: @g( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast x86_mmx %x to double +; CHECK-NEXT: [[TMP0:%.*]] = bitcast x86_mmx [[X:%.*]] to double ; CHECK-NEXT: ret double [[TMP0]] ; entry: @@ -69,3 +70,77 @@ entry: %1 = bitcast <1 x i64> %0 to double ret double %1 } + +define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) { +; CHECK-LABEL: @bitcast_inselt_undef( +; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 +; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> undef, i64 [[XB]], i32 [[IDX:%.*]] +; CHECK-NEXT: ret <3 x i64> [[I]] +; + %xb = bitcast double %x to i64 + %i = insertelement <3 x i64> undef, i64 %xb, i32 %idx + ret <3 x i64> %i +} + +define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) { +; CHECK-LABEL: @bitcast_inselt_undef_fp( +; CHECK-NEXT: [[XB:%.*]] = bitcast i32 [[X:%.*]] to float +; CHECK-NEXT: [[I:%.*]] = insertelement <3 x float> undef, float [[XB]], i567 [[IDX:%.*]] +; CHECK-NEXT: ret <3 x float> [[I]] +; + %xb = bitcast i32 %x to float + %i = insertelement <3 x float> undef, float %xb, i567 %idx + ret <3 x float> %i +} + +declare void @use(i64) + +define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) { +; CHECK-LABEL: @bitcast_inselt_undef_extra_use( +; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 +; CHECK-NEXT: call void @use(i64 [[XB]]) +; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> undef, i64 [[XB]], i32 [[IDX:%.*]] +; CHECK-NEXT: ret <3 x i64> [[I]] +; + %xb = bitcast double %x to i64 + call void @use(i64 %xb) + %i = insertelement <3 x i64> undef, i64 %xb, i32 %idx + ret <3 x i64> %i +} + +define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) { +; CHECK-LABEL: @bitcast_inselt_undef_vec_src( +; CHECK-NEXT: [[XB:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64 +; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> undef, i64 [[XB]], i32 [[IDX:%.*]] +; CHECK-NEXT: ret <3 x i64> [[I]] +; + %xb = bitcast <2 x i32> %x to i64 + %i = insertelement <3 x i64> undef, i64 %xb, i32 %idx + ret <3 x i64> %i +} + +define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) { +; CHECK-LABEL: @bitcast_inselt_undef_from_mmx( +; CHECK-NEXT: [[XB:%.*]] = bitcast x86_mmx [[X:%.*]] to i64 +; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> undef, i64 [[XB]], i32 [[IDX:%.*]] +; CHECK-NEXT: ret <3 x i64> [[I]] +; + %xb = bitcast x86_mmx %x to i64 + %i = insertelement <3 x i64> undef, i64 %xb, i32 %idx + ret <3 x i64> %i +} + +define <2 x i64> @PR45748(double %x, double %y) { +; CHECK-LABEL: @PR45748( +; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 +; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i64> undef, i64 [[XB]], i32 0 +; CHECK-NEXT: [[YB:%.*]] = bitcast double [[Y:%.*]] to i64 +; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i64> [[I0]], i64 [[YB]], i32 1 +; CHECK-NEXT: ret <2 x i64> [[I1]] +; + %xb = bitcast double %x to i64 + %i0 = insertelement <2 x i64> undef, i64 %xb, i32 0 + %yb = bitcast double %y to i64 + %i1 = insertelement <2 x i64> %i0, i64 %yb, i32 1 + ret <2 x i64> %i1 +} diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index d85286b46029a..6ecd0e398654a 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -666,9 +666,10 @@ define i64 @test50(i64 %x) { define i64 @test51(i64 %A, i1 %cond) { ; ALL-LABEL: @test51( ; ALL-NEXT: [[C:%.*]] = and i64 [[A:%.*]], 4294967294 -; ALL-NEXT: [[D:%.*]] = or i64 [[A]], 1 -; ALL-NEXT: [[E:%.*]] = select i1 [[COND:%.*]], i64 [[C]], i64 [[D]] -; ALL-NEXT: [[SEXT:%.*]] = shl i64 [[E]], 32 +; ALL-NEXT: [[NOT_COND:%.*]] = xor i1 [[COND:%.*]], true +; ALL-NEXT: [[MASKSEL:%.*]] = zext i1 [[NOT_COND]] to i64 +; ALL-NEXT: [[E:%.*]] = or i64 [[C]], [[MASKSEL]] +; ALL-NEXT: [[SEXT:%.*]] = shl nuw i64 [[E]], 32 ; ALL-NEXT: [[F:%.*]] = ashr exact i64 [[SEXT]], 32 ; ALL-NEXT: ret i64 [[F]] ; diff --git a/llvm/test/Transforms/InstCombine/extractelement.ll b/llvm/test/Transforms/InstCombine/extractelement.ll index b1f57060b02bd..76b7d28b2c092 100644 --- a/llvm/test/Transforms/InstCombine/extractelement.ll +++ b/llvm/test/Transforms/InstCombine/extractelement.ll @@ -319,7 +319,8 @@ define <4 x double> @invalid_extractelement(<2 x double> %a, <4 x double> %b, do ; ANY-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[TMP1]], <4 x i32> ; ANY-NEXT: [[E:%.*]] = extractelement <4 x double> [[B]], i32 1 ; ANY-NEXT: store double [[E]], double* [[P:%.*]], align 8 -; ANY-NEXT: ret <4 x double> [[T4]] +; ANY-NEXT: [[R:%.*]] = insertelement <4 x double> [[T4]], double undef, i64 0 +; ANY-NEXT: ret <4 x double> [[R]] ; %t3 = extractelement <2 x double> %a, i32 0 %t4 = insertelement <4 x double> %b, double %t3, i32 2 diff --git a/llvm/test/Transforms/InstCombine/fpextend.ll b/llvm/test/Transforms/InstCombine/fpextend.ll index 1483277b93eb5..bd7e353d0d96a 100644 --- a/llvm/test/Transforms/InstCombine/fpextend.ll +++ b/llvm/test/Transforms/InstCombine/fpextend.ll @@ -258,3 +258,118 @@ define float @test18(half %x, half %y) nounwind { %t56 = fptrunc double %t5 to float ret float %t56 } + +define double @FtoItoFtoF_f32_s32_f32_f64(float %f) { +; CHECK-LABEL: @FtoItoFtoF_f32_s32_f32_f64( +; CHECK-NEXT: [[I:%.*]] = fptosi float [[F:%.*]] to i32 +; CHECK-NEXT: [[X:%.*]] = sitofp i32 [[I]] to float +; CHECK-NEXT: [[R:%.*]] = fpext float [[X]] to double +; CHECK-NEXT: ret double [[R]] +; + %i = fptosi float %f to i32 + %x = sitofp i32 %i to float + %r = fpext float %x to double + ret double %r +} + +declare void @use_i32(i32) +declare void @use_f32(float) + +; Extra uses are ok; unsigned is ok. + +define double @FtoItoFtoF_f32_u32_f32_f64_extra_uses(float %f) { +; CHECK-LABEL: @FtoItoFtoF_f32_u32_f32_f64_extra_uses( +; CHECK-NEXT: [[I:%.*]] = fptoui float [[F:%.*]] to i32 +; CHECK-NEXT: call void @use_i32(i32 [[I]]) +; CHECK-NEXT: [[X:%.*]] = uitofp i32 [[I]] to float +; CHECK-NEXT: call void @use_f32(float [[X]]) +; CHECK-NEXT: [[R:%.*]] = fpext float [[X]] to double +; CHECK-NEXT: ret double [[R]] +; + %i = fptoui float %f to i32 + call void @use_i32(i32 %i) + %x = uitofp i32 %i to float + call void @use_f32(float %x) + %r = fpext float %x to double + ret double %r +} + +; Vectors are ok; initial type can be smaller than intermediate type. + +define <3 x double> @FtoItoFtoF_v3f16_v3s32_v3f32_v3f64(<3 x half> %f) { +; CHECK-LABEL: @FtoItoFtoF_v3f16_v3s32_v3f32_v3f64( +; CHECK-NEXT: [[I:%.*]] = fptosi <3 x half> [[F:%.*]] to <3 x i32> +; CHECK-NEXT: [[X:%.*]] = sitofp <3 x i32> [[I]] to <3 x float> +; CHECK-NEXT: [[R:%.*]] = fpext <3 x float> [[X]] to <3 x double> +; CHECK-NEXT: ret <3 x double> [[R]] +; + %i = fptosi <3 x half> %f to <3 x i32> + %x = sitofp <3 x i32> %i to <3 x float> + %r = fpext <3 x float> %x to <3 x double> + ret <3 x double> %r +} + +define fp128 @FtoItoFtoF_f32_s64_f64_f128(float %f) { +; CHECK-LABEL: @FtoItoFtoF_f32_s64_f64_f128( +; CHECK-NEXT: [[I:%.*]] = fptosi float [[F:%.*]] to i64 +; CHECK-NEXT: [[X:%.*]] = sitofp i64 [[I]] to double +; CHECK-NEXT: [[R:%.*]] = fpext double [[X]] to fp128 +; CHECK-NEXT: ret fp128 [[R]] +; + %i = fptosi float %f to i64 + %x = sitofp i64 %i to double + %r = fpext double %x to fp128 + ret fp128 %r +} + +define x86_fp80 @FtoItoFtoF_f64_u32_f64_f80(double %f) { +; CHECK-LABEL: @FtoItoFtoF_f64_u32_f64_f80( +; CHECK-NEXT: [[I:%.*]] = fptoui double [[F:%.*]] to i32 +; CHECK-NEXT: [[X:%.*]] = uitofp i32 [[I]] to double +; CHECK-NEXT: [[R:%.*]] = fpext double [[X]] to x86_fp80 +; CHECK-NEXT: ret x86_fp80 [[R]] +; + %i = fptoui double %f to i32 + %x = uitofp i32 %i to double + %r = fpext double %x to x86_fp80 + ret x86_fp80 %r +} + +define ppc_fp128 @FtoItoFtoF_f64_u32_f64_p128(double %f) { +; CHECK-LABEL: @FtoItoFtoF_f64_u32_f64_p128( +; CHECK-NEXT: [[I:%.*]] = fptoui double [[F:%.*]] to i32 +; CHECK-NEXT: [[X:%.*]] = uitofp i32 [[I]] to double +; CHECK-NEXT: [[R:%.*]] = fpext double [[X]] to ppc_fp128 +; CHECK-NEXT: ret ppc_fp128 [[R]] +; + %i = fptoui double %f to i32 + %x = uitofp i32 %i to double + %r = fpext double %x to ppc_fp128 + ret ppc_fp128 %r +} + +define double @FtoItoFtoF_f32_us32_f32_f64(float %f) { +; CHECK-LABEL: @FtoItoFtoF_f32_us32_f32_f64( +; CHECK-NEXT: [[I:%.*]] = fptoui float [[F:%.*]] to i32 +; CHECK-NEXT: [[X:%.*]] = sitofp i32 [[I]] to float +; CHECK-NEXT: [[R:%.*]] = fpext float [[X]] to double +; CHECK-NEXT: ret double [[R]] +; + %i = fptoui float %f to i32 + %x = sitofp i32 %i to float + %r = fpext float %x to double + ret double %r +} + +define double @FtoItoFtoF_f32_su32_f32_f64(float %f) { +; CHECK-LABEL: @FtoItoFtoF_f32_su32_f32_f64( +; CHECK-NEXT: [[I:%.*]] = fptosi float [[F:%.*]] to i32 +; CHECK-NEXT: [[X:%.*]] = uitofp i32 [[I]] to float +; CHECK-NEXT: [[R:%.*]] = fpext float [[X]] to double +; CHECK-NEXT: ret double [[R]] +; + %i = fptosi float %f to i32 + %x = uitofp i32 %i to float + %r = fpext float %x to double + ret double %r +} diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index 8ab3ca8874183..429a826c79812 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -12,7 +12,7 @@ declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>) nounwind readnone define i32 @lshr_ctlz_zero_is_not_undef(i32 %x) { ; CHECK-LABEL: @lshr_ctlz_zero_is_not_undef( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 %x, 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[SH:%.*]] = zext i1 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[SH]] ; @@ -23,7 +23,7 @@ define i32 @lshr_ctlz_zero_is_not_undef(i32 %x) { define i32 @lshr_cttz_zero_is_not_undef(i32 %x) { ; CHECK-LABEL: @lshr_cttz_zero_is_not_undef( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 %x, 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[SH:%.*]] = zext i1 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[SH]] ; @@ -34,7 +34,7 @@ define i32 @lshr_cttz_zero_is_not_undef(i32 %x) { define i32 @lshr_ctpop(i32 %x) { ; CHECK-LABEL: @lshr_ctpop( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 %x, -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], -1 ; CHECK-NEXT: [[SH:%.*]] = zext i1 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[SH]] ; @@ -45,7 +45,7 @@ define i32 @lshr_ctpop(i32 %x) { define <2 x i8> @lshr_ctlz_zero_is_not_undef_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_ctlz_zero_is_not_undef_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer ; CHECK-NEXT: [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[SH]] ; @@ -56,7 +56,7 @@ define <2 x i8> @lshr_ctlz_zero_is_not_undef_splat_vec(<2 x i8> %x) { define <2 x i8> @lshr_cttz_zero_is_not_undef_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_cttz_zero_is_not_undef_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer ; CHECK-NEXT: [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[SH]] ; @@ -67,7 +67,7 @@ define <2 x i8> @lshr_cttz_zero_is_not_undef_splat_vec(<2 x i8> %x) { define <2 x i8> @lshr_ctpop_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_ctpop_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> %x, +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], ; CHECK-NEXT: [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[SH]] ; @@ -76,9 +76,66 @@ define <2 x i8> @lshr_ctpop_splat_vec(<2 x i8> %x) { ret <2 x i8> %sh } +define i32 @lshr_ctlz_zero_is_undef(i32 %x) { +; CHECK-LABEL: @lshr_ctlz_zero_is_undef( +; CHECK-NEXT: ret i32 0 +; + %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %sh = lshr i32 %ct, 5 + ret i32 %sh +} + +define i32 @lshr_cttz_zero_is_undef(i32 %x) { +; CHECK-LABEL: @lshr_cttz_zero_is_undef( +; CHECK-NEXT: ret i32 0 +; + %ct = call i32 @llvm.cttz.i32(i32 %x, i1 true) + %sh = lshr i32 %ct, 5 + ret i32 %sh +} + +define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) { +; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec( +; CHECK-NEXT: ret <2 x i8> zeroinitializer +; + %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true) + %sh = lshr <2 x i8> %ct, + ret <2 x i8> %sh +} + +define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) { +; CHECK-LABEL: @lshr_ctlz_zero_is_undef_vec( +; CHECK-NEXT: ret i8 0 +; + %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true) + %sh = lshr <2 x i8> %ct, + %ex = extractelement <2 x i8> %sh, i32 0 + ret i8 %ex +} + +define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) { +; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec( +; CHECK-NEXT: ret <2 x i8> zeroinitializer +; + %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true) + %sh = lshr <2 x i8> %ct, + ret <2 x i8> %sh +} + +define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) { +; CHECK-LABEL: @lshr_cttz_zero_is_undef_vec( +; CHECK-NEXT: ret i8 0 +; + %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true) + %sh = lshr <2 x i8> %ct, + %ex = extractelement <2 x i8> %sh, i32 0 + ret i8 %ex +} + + define i8 @lshr_exact(i8 %x) { ; CHECK-LABEL: @lshr_exact( -; CHECK-NEXT: [[SHL:%.*]] = shl i8 %x, 2 +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], 2 ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[SHL]], 4 ; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i8 [[ADD]], 2 ; CHECK-NEXT: ret i8 [[LSHR]] @@ -91,7 +148,7 @@ define i8 @lshr_exact(i8 %x) { define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_exact_splat_vec( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> %x, +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[X:%.*]], ; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[SHL]], ; CHECK-NEXT: [[LSHR:%.*]] = lshr exact <2 x i8> [[ADD]], ; CHECK-NEXT: ret <2 x i8> [[LSHR]] @@ -104,7 +161,7 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { define i16 @bool_zext(i1 %x) { ; CHECK-LABEL: @bool_zext( -; CHECK-NEXT: [[HIBIT:%.*]] = zext i1 %x to i16 +; CHECK-NEXT: [[HIBIT:%.*]] = zext i1 [[X:%.*]] to i16 ; CHECK-NEXT: ret i16 [[HIBIT]] ; %sext = sext i1 %x to i16 @@ -114,7 +171,7 @@ define i16 @bool_zext(i1 %x) { define <2 x i8> @bool_zext_splat(<2 x i1> %x) { ; CHECK-LABEL: @bool_zext_splat( -; CHECK-NEXT: [[HIBIT:%.*]] = zext <2 x i1> %x to <2 x i8> +; CHECK-NEXT: [[HIBIT:%.*]] = zext <2 x i1> [[X:%.*]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[HIBIT]] ; %sext = sext <2 x i1> %x to <2 x i8> @@ -124,7 +181,7 @@ define <2 x i8> @bool_zext_splat(<2 x i1> %x) { define i32 @smear_sign_and_widen(i8 %x) { ; CHECK-LABEL: @smear_sign_and_widen( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i8 %x, 7 +; CHECK-NEXT: [[TMP1:%.*]] = ashr i8 [[X:%.*]], 7 ; CHECK-NEXT: [[HIBIT:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[HIBIT]] ; @@ -135,7 +192,7 @@ define i32 @smear_sign_and_widen(i8 %x) { define i16 @smear_sign_and_widen_should_not_change_type(i4 %x) { ; CHECK-LABEL: @smear_sign_and_widen_should_not_change_type( -; CHECK-NEXT: [[SEXT:%.*]] = sext i4 %x to i16 +; CHECK-NEXT: [[SEXT:%.*]] = sext i4 [[X:%.*]] to i16 ; CHECK-NEXT: [[HIBIT:%.*]] = lshr i16 [[SEXT]], 12 ; CHECK-NEXT: ret i16 [[HIBIT]] ; @@ -146,7 +203,7 @@ define i16 @smear_sign_and_widen_should_not_change_type(i4 %x) { define <2 x i8> @smear_sign_and_widen_splat(<2 x i6> %x) { ; CHECK-LABEL: @smear_sign_and_widen_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i6> %x, +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i6> [[X:%.*]], ; CHECK-NEXT: [[HIBIT:%.*]] = zext <2 x i6> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[HIBIT]] ; @@ -157,7 +214,7 @@ define <2 x i8> @smear_sign_and_widen_splat(<2 x i6> %x) { define i18 @fake_sext(i3 %x) { ; CHECK-LABEL: @fake_sext( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i3 %x, 2 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i3 [[X:%.*]], 2 ; CHECK-NEXT: [[SH:%.*]] = zext i3 [[TMP1]] to i18 ; CHECK-NEXT: ret i18 [[SH]] ; @@ -170,7 +227,7 @@ define i18 @fake_sext(i3 %x) { define i32 @fake_sext_but_should_not_change_type(i3 %x) { ; CHECK-LABEL: @fake_sext_but_should_not_change_type( -; CHECK-NEXT: [[SEXT:%.*]] = sext i3 %x to i32 +; CHECK-NEXT: [[SEXT:%.*]] = sext i3 [[X:%.*]] to i32 ; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[SEXT]], 31 ; CHECK-NEXT: ret i32 [[SH]] ; @@ -181,7 +238,7 @@ define i32 @fake_sext_but_should_not_change_type(i3 %x) { define <2 x i8> @fake_sext_splat(<2 x i3> %x) { ; CHECK-LABEL: @fake_sext_splat( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i3> %x, +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i3> [[X:%.*]], ; CHECK-NEXT: [[SH:%.*]] = zext <2 x i3> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[SH]] ; @@ -194,7 +251,7 @@ define <2 x i8> @fake_sext_splat(<2 x i3> %x) { define <2 x i32> @narrow_lshr_constant(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @narrow_lshr_constant( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> %x, +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], ; CHECK-NEXT: [[SH:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[SH]] ; diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index 3fc1cc54ea3a2..0abefdccc7a57 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -606,3 +606,77 @@ define i32 @mul_div_select(i32 %x, i32 %y, i1 %c) { %mul = mul i32 %sel, %y ret i32 %mul } + +; fold mul(abs(x),abs(x)) -> mul(x,x) +define i31 @combine_mul_abs_i31(i31 %0) { +; CHECK-LABEL: @combine_mul_abs_i31( +; CHECK-NEXT: [[M:%.*]] = mul i31 [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret i31 [[M]] +; + %c = icmp slt i31 %0, 0 + %s = sub nsw i31 0, %0 + %r = select i1 %c, i31 %s, i31 %0 + %m = mul i31 %r, %r + ret i31 %m +} + +define i32 @combine_mul_abs_i32(i32 %0) { +; CHECK-LABEL: @combine_mul_abs_i32( +; CHECK-NEXT: [[M:%.*]] = mul i32 [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret i32 [[M]] +; + %c = icmp slt i32 %0, 0 + %s = sub nsw i32 0, %0 + %r = select i1 %c, i32 %s, i32 %0 + %m = mul i32 %r, %r + ret i32 %m +} + +define <4 x i32> @combine_mul_abs_v4i32(<4 x i32> %0) { +; CHECK-LABEL: @combine_mul_abs_v4i32( +; CHECK-NEXT: [[M:%.*]] = mul <4 x i32> [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret <4 x i32> [[M]] +; + %c = icmp slt <4 x i32> %0, zeroinitializer + %s = sub nsw <4 x i32> zeroinitializer, %0 + %r = select <4 x i1> %c, <4 x i32> %s, <4 x i32> %0 + %m = mul <4 x i32> %r, %r + ret <4 x i32> %m +} + +; fold mul(nabs(x),nabs(x)) -> mul(x,x) +define i31 @combine_mul_nabs_i31(i31 %0) { +; CHECK-LABEL: @combine_mul_nabs_i31( +; CHECK-NEXT: [[M:%.*]] = mul i31 [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret i31 [[M]] +; + %c = icmp slt i31 %0, 0 + %s = sub nsw i31 0, %0 + %r = select i1 %c, i31 %0, i31 %s + %m = mul i31 %r, %r + ret i31 %m +} + +define i32 @combine_mul_nabs_i32(i32 %0) { +; CHECK-LABEL: @combine_mul_nabs_i32( +; CHECK-NEXT: [[M:%.*]] = mul i32 [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret i32 [[M]] +; + %c = icmp slt i32 %0, 0 + %s = sub nsw i32 0, %0 + %r = select i1 %c, i32 %0, i32 %s + %m = mul i32 %r, %r + ret i32 %m +} + +define <4 x i32> @combine_mul_nabs_v4i32(<4 x i32> %0) { +; CHECK-LABEL: @combine_mul_nabs_v4i32( +; CHECK-NEXT: [[M:%.*]] = mul <4 x i32> [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret <4 x i32> [[M]] +; + %c = icmp slt <4 x i32> %0, zeroinitializer + %s = sub nsw <4 x i32> zeroinitializer, %0 + %r = select <4 x i1> %c, <4 x i32> %0, <4 x i32> %s + %m = mul <4 x i32> %r, %r + ret <4 x i32> %m +} diff --git a/llvm/test/Transforms/InstCombine/or-concat.ll b/llvm/test/Transforms/InstCombine/or-concat.ll new file mode 100644 index 0000000000000..77cdaa9a37ddc --- /dev/null +++ b/llvm/test/Transforms/InstCombine/or-concat.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +; +; Tests for combining concat-able ops: +; or(zext(OP(x)), shl(zext(OP(y)),bw/2)) +; --> +; OP(or(zext(x), shl(zext(y),bw/2))) +; + +; BSWAP + +; PR45715 +define i64 @concat_bswap32_unary_split(i64 %a0) { +; CHECK-LABEL: @concat_bswap32_unary_split( +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[A0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP1]] +; + %1 = lshr i64 %a0, 32 + %2 = trunc i64 %1 to i32 + %3 = trunc i64 %a0 to i32 + %4 = tail call i32 @llvm.bswap.i32(i32 %2) + %5 = tail call i32 @llvm.bswap.i32(i32 %3) + %6 = zext i32 %4 to i64 + %7 = zext i32 %5 to i64 + %8 = shl nuw i64 %7, 32 + %9 = or i64 %6, %8 + ret i64 %9 +} + +define i64 @concat_bswap32_unary_flip(i64 %a0) { +; CHECK-LABEL: @concat_bswap32_unary_flip( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A0:%.*]], 32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[A0]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) +; CHECK-NEXT: ret i64 [[TMP4]] +; + %1 = lshr i64 %a0, 32 + %2 = trunc i64 %1 to i32 + %3 = trunc i64 %a0 to i32 + %4 = tail call i32 @llvm.bswap.i32(i32 %2) + %5 = tail call i32 @llvm.bswap.i32(i32 %3) + %6 = zext i32 %4 to i64 + %7 = zext i32 %5 to i64 + %8 = shl nuw i64 %6, 32 + %9 = or i64 %7, %8 + ret i64 %9 +} + +define i64 @concat_bswap32_binary(i32 %a0, i32 %a1) { +; CHECK-LABEL: @concat_bswap32_binary( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A1:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[A0:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; CHECK-NEXT: ret i64 [[TMP5]] +; + %1 = tail call i32 @llvm.bswap.i32(i32 %a0) + %2 = tail call i32 @llvm.bswap.i32(i32 %a1) + %3 = zext i32 %1 to i64 + %4 = zext i32 %2 to i64 + %5 = shl nuw i64 %4, 32 + %6 = or i64 %3, %5 + ret i64 %6 +} + +declare i32 @llvm.bswap.i32(i32) + +; BITREVERSE + +define i64 @concat_bitreverse32_unary_split(i64 %a0) { +; CHECK-LABEL: @concat_bitreverse32_unary_split( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A0:%.*]], 32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[A0]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bitreverse.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.bitreverse.i32(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 32 +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP8]], [[TMP6]] +; CHECK-NEXT: ret i64 [[TMP9]] +; + %1 = lshr i64 %a0, 32 + %2 = trunc i64 %1 to i32 + %3 = trunc i64 %a0 to i32 + %4 = tail call i32 @llvm.bitreverse.i32(i32 %2) + %5 = tail call i32 @llvm.bitreverse.i32(i32 %3) + %6 = zext i32 %4 to i64 + %7 = zext i32 %5 to i64 + %8 = shl nuw i64 %7, 32 + %9 = or i64 %6, %8 + ret i64 %9 +} + +define i64 @concat_bitreverse32_unary_flip(i64 %a0) { +; CHECK-LABEL: @concat_bitreverse32_unary_flip( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A0:%.*]], 32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[A0]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.bitreverse.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.bitreverse.i32(i32 [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP6]], 32 +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP8]], [[TMP7]] +; CHECK-NEXT: ret i64 [[TMP9]] +; + %1 = lshr i64 %a0, 32 + %2 = trunc i64 %1 to i32 + %3 = trunc i64 %a0 to i32 + %4 = tail call i32 @llvm.bitreverse.i32(i32 %2) + %5 = tail call i32 @llvm.bitreverse.i32(i32 %3) + %6 = zext i32 %4 to i64 + %7 = zext i32 %5 to i64 + %8 = shl nuw i64 %6, 32 + %9 = or i64 %7, %8 + ret i64 %9 +} + +define i64 @concat_bitreverse32_binary(i32 %a0, i32 %a1) { +; CHECK-LABEL: @concat_bitreverse32_binary( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.bitreverse.i32(i32 [[A0:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.bitreverse.i32(i32 [[A1:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP6]] +; + %1 = tail call i32 @llvm.bitreverse.i32(i32 %a0) + %2 = tail call i32 @llvm.bitreverse.i32(i32 %a1) + %3 = zext i32 %1 to i64 + %4 = zext i32 %2 to i64 + %5 = shl nuw i64 %4, 32 + %6 = or i64 %3, %5 + ret i64 %6 +} + +declare i32 @llvm.bitreverse.i32(i32) diff --git a/llvm/test/Transforms/InstCombine/or-xor.ll b/llvm/test/Transforms/InstCombine/or-xor.ll index ab5f2f8ef53bb..4663d196a281a 100644 --- a/llvm/test/Transforms/InstCombine/or-xor.ll +++ b/llvm/test/Transforms/InstCombine/or-xor.ll @@ -254,8 +254,8 @@ define i32 @test16(i32 %a, i32 %b) { define i8 @not_or(i8 %x) { ; CHECK-LABEL: @not_or( -; CHECK-NEXT: [[NOTX:%.*]] = or i8 [[X:%.*]], 7 -; CHECK-NEXT: [[OR:%.*]] = xor i8 [[NOTX]], -8 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], 7 +; CHECK-NEXT: [[OR:%.*]] = xor i8 [[TMP1]], -8 ; CHECK-NEXT: ret i8 [[OR]] ; %notx = xor i8 %x, -1 @@ -265,8 +265,8 @@ define i8 @not_or(i8 %x) { define i8 @not_or_xor(i8 %x) { ; CHECK-LABEL: @not_or_xor( -; CHECK-NEXT: [[NOTX:%.*]] = or i8 [[X:%.*]], 7 -; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[NOTX]], -12 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], 7 +; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[TMP1]], -12 ; CHECK-NEXT: ret i8 [[XOR]] ; %notx = xor i8 %x, -1 @@ -277,8 +277,8 @@ define i8 @not_or_xor(i8 %x) { define i8 @xor_or(i8 %x) { ; CHECK-LABEL: @xor_or( -; CHECK-NEXT: [[XOR:%.*]] = or i8 [[X:%.*]], 7 -; CHECK-NEXT: [[OR:%.*]] = xor i8 [[XOR]], 32 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], 7 +; CHECK-NEXT: [[OR:%.*]] = xor i8 [[TMP1]], 32 ; CHECK-NEXT: ret i8 [[OR]] ; %xor = xor i8 %x, 32 @@ -288,8 +288,8 @@ define i8 @xor_or(i8 %x) { define i8 @xor_or2(i8 %x) { ; CHECK-LABEL: @xor_or2( -; CHECK-NEXT: [[XOR:%.*]] = or i8 [[X:%.*]], 7 -; CHECK-NEXT: [[OR:%.*]] = xor i8 [[XOR]], 32 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], 7 +; CHECK-NEXT: [[OR:%.*]] = xor i8 [[TMP1]], 32 ; CHECK-NEXT: ret i8 [[OR]] ; %xor = xor i8 %x, 33 @@ -299,8 +299,8 @@ define i8 @xor_or2(i8 %x) { define i8 @xor_or_xor(i8 %x) { ; CHECK-LABEL: @xor_or_xor( -; CHECK-NEXT: [[XOR1:%.*]] = or i8 [[X:%.*]], 7 -; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[XOR1]], 44 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], 7 +; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[TMP1]], 44 ; CHECK-NEXT: ret i8 [[XOR2]] ; %xor1 = xor i8 %x, 33 @@ -311,8 +311,8 @@ define i8 @xor_or_xor(i8 %x) { define i8 @or_xor_or(i8 %x) { ; CHECK-LABEL: @or_xor_or( -; CHECK-NEXT: [[XOR:%.*]] = or i8 [[X:%.*]], 39 -; CHECK-NEXT: [[OR2:%.*]] = xor i8 [[XOR]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], 39 +; CHECK-NEXT: [[OR2:%.*]] = xor i8 [[TMP1]], 8 ; CHECK-NEXT: ret i8 [[OR2]] ; %or1 = or i8 %x, 33 @@ -414,3 +414,27 @@ define i32 @test22(i32 %x, i32 %y) { %xor = xor i32 %or1, %or2 ret i32 %xor } + +; (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) +define i8 @test23(i8 %A) { +; CHECK-LABEL: @test23( +; CHECK-NEXT: ret i8 -1 +; + %B = or i8 %A, -2 + %C = xor i8 %B, 13 + %D = or i8 %C, 1 + %E = xor i8 %D, 12 + ret i8 %E +} + +define i8 @test23v(<2 x i8> %A) { +; CHECK-LABEL: @test23v( +; CHECK-NEXT: ret i8 -1 +; + %B = or <2 x i8> %A, + %CV = xor <2 x i8> %B, + %C = extractelement <2 x i8> %CV, i32 0 + %D = or i8 %C, 1 + %E = xor i8 %D, 12 + ret i8 %E +} diff --git a/llvm/test/Transforms/InstCombine/pow-1.ll b/llvm/test/Transforms/InstCombine/pow-1.ll index 51b9a019e23aa..724f004e6ca99 100644 --- a/llvm/test/Transforms/InstCombine/pow-1.ll +++ b/llvm/test/Transforms/InstCombine/pow-1.ll @@ -1,18 +1,19 @@ ; Test that the pow library call simplifier works correctly. ; -; RUN: opt -instcombine -S < %s | FileCheck %s --check-prefixes=CHECK,ANY -; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-EXP10 -; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-EXP10 -; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-NO-EXP10 -; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-NO-EXP10 -; RUN: opt -instcombine -S < %s -mtriple=x86_64-netbsd | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-NO-EXP10 -; RUN: opt -instcombine -S < %s -mtriple=arm-apple-tvos9.0 | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-EXP10 -; RUN: opt -instcombine -S < %s -mtriple=arm-apple-watchos2.0 | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-EXP10 +; RUN: opt -instcombine -S < %s | FileCheck %s --check-prefixes=CHECK,LIB,ANY +; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefixes=CHECK,LIB,ANY,CHECK-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s --check-prefixes=CHECK,LIB,ANY,CHECK-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefixes=CHECK,LIB,ANY,CHECK-NO-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefixes=CHECK,LIB,ANY,CHECK-NO-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=x86_64-netbsd | FileCheck %s --check-prefixes=CHECK,LIB,ANY,CHECK-NO-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=arm-apple-tvos9.0 | FileCheck %s --check-prefixes=CHECK,LIB,ANY,CHECK-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=arm-apple-watchos2.0 | FileCheck %s --check-prefixes=CHECK,LIB,ANY,CHECK-EXP10 ; rdar://7251832 -; RUN: opt -instcombine -S < %s -mtriple=i386-pc-windows-msvc18 | FileCheck %s --check-prefixes=CHECK,MSVC,VC32,CHECK-NO-EXP10 -; RUN: opt -instcombine -S < %s -mtriple=i386-pc-windows-msvc | FileCheck %s --check-prefixes=CHECK,MSVC,VC51,VC19,CHECK-NO-EXP10 -; RUN: opt -instcombine -S < %s -mtriple=x86_64-pc-windows-msvc18 | FileCheck %s --check-prefixes=CHECK,MSVC,VC64,CHECK-NO-EXP10 -; RUN: opt -instcombine -S < %s -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefixes=CHECK,MSVC,VC83,VC19,CHECK-NO-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=i386-pc-windows-msvc18 | FileCheck %s --check-prefixes=CHECK,LIB,MSVC,VC32,CHECK-NO-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=i386-pc-windows-msvc | FileCheck %s --check-prefixes=CHECK,LIB,MSVC,VC51,VC19,CHECK-NO-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=x86_64-pc-windows-msvc18 | FileCheck %s --check-prefixes=CHECK,LIB,MSVC,VC64,CHECK-NO-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefixes=CHECK,LIB,MSVC,VC83,VC19,CHECK-NO-EXP10 +; RUN: opt -instcombine -S < %s -mtriple=amdgcn-- | FileCheck %s --check-prefixes=CHECK,NOLIB,CHECK-NO-EXP10 ; NOTE: The readonly attribute on the pow call should be preserved ; in the cases below where pow is transformed into another function call. @@ -31,6 +32,8 @@ define float @test_simplify1(float %x) { ; VC32-NEXT: [[POW:%.*]] = call float @powf(float 1.000000e+00, float [[X:%.*]]) ; VC32-NEXT: ret float [[POW]] ; VC64-NEXT: ret float 1.000000e+00 +; NOLIB-NEXT: [[POW:%.*]] = call float @powf(float 1.000000e+00, float [[X:%.*]]) +; NOLIB-NEXT: ret float [[POW]] ; %retval = call float @powf(float 1.0, float %x) ret float %retval @@ -38,9 +41,7 @@ define float @test_simplify1(float %x) { define <2 x float> @test_simplify1v(<2 x float> %x) { ; CHECK-LABEL: @test_simplify1v( -; ANY-NEXT: ret <2 x float> -; MSVC-NEXT: [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X:%.*]]) -; MSVC-NEXT: ret <2 x float> [[POW]] +; CHECK-NEXT: ret <2 x float> ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %retval @@ -48,7 +49,9 @@ define <2 x float> @test_simplify1v(<2 x float> %x) { define double @test_simplify2(double %x) { ; CHECK-LABEL: @test_simplify2( -; CHECK-NEXT: ret double 1.000000e+00 +; LIB-NEXT: ret double 1.000000e+00 +; NOLIB-NEXT: [[POW:%.*]] = call double @pow(double 1.000000e+00, double [[X:%.*]]) +; NOLIB-NEXT: ret double [[POW]] ; %retval = call double @pow(double 1.0, double %x) ret double %retval @@ -56,9 +59,7 @@ define double @test_simplify2(double %x) { define <2 x double> @test_simplify2v(<2 x double> %x) { ; CHECK-LABEL: @test_simplify2v( -; ANY-NEXT: ret <2 x double> -; MSVC-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X:%.*]]) -; MSVC-NEXT: ret <2 x double> [[POW]] +; CHECK-NEXT: ret <2 x double> ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) ret <2 x double> %retval @@ -78,6 +79,8 @@ define float @test_simplify3(float %x) { ; VC64-NEXT: ret float [[POW]] ; VC83-NEXT: [[EXP2F:%.*]] = call float @exp2f(float [[X:%.*]]) ; VC83-NEXT: ret float [[EXP2F]] +; NOLIB-NEXT: [[POW:%.*]] = call float @powf(float 2.000000e+00, float [[X:%.*]]) +; NOLIB-NEXT: ret float [[POW]] ; %retval = call float @powf(float 2.0, float %x) ret float %retval @@ -95,6 +98,8 @@ define double @test_simplify3n(double %x) { ; VC32-NEXT: ret double [[POW]] ; VC64-NEXT: [[POW:%.*]] = call double @pow(double 2.500000e-01, double [[X:%.*]]) ; VC64-NEXT: ret double [[POW]] +; NOLIB-NEXT: [[POW:%.*]] = call double @pow(double 2.500000e-01, double [[X:%.*]]) +; NOLIB-NEXT: ret double [[POW]] ; %retval = call double @pow(double 0.25, double %x) ret double %retval @@ -106,6 +111,9 @@ define <2 x float> @test_simplify3v(<2 x float> %x) { ; ANY-NEXT: ret <2 x float> [[EXP2]] ; MSVC-NEXT: [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X:%.*]]) ; MSVC-NEXT: ret <2 x float> [[POW]] +; TODO: should be able to simplify llvm.pow to llvm.exp2 even without libcalls +; NOLIB-NEXT: [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; NOLIB-NEXT: ret <2 x float> [[POW]] ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %retval @@ -118,6 +126,9 @@ define <2 x double> @test_simplify3vn(<2 x double> %x) { ; ANY-NEXT: ret <2 x double> [[EXP2]] ; MSVC-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X:%.*]]) ; MSVC-NEXT: ret <2 x double> [[POW]] +; TODO: should be able to simplify llvm.pow to llvm.exp2 even without libcalls +; NOLIB-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; NOLIB-NEXT: ret <2 x double> [[POW]] ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) ret <2 x double> %retval @@ -133,6 +144,8 @@ define double @test_simplify4(double %x) { ; VC32-NEXT: ret double [[POW]] ; VC64-NEXT: [[POW:%.*]] = call double @pow(double 2.000000e+00, double [[X:%.*]]) ; VC64-NEXT: ret double [[POW]] +; NOLIB-NEXT: [[POW:%.*]] = call double @pow(double 2.000000e+00, double [[X:%.*]]) +; NOLIB-NEXT: ret double [[POW]] ; %retval = call double @pow(double 2.0, double %x) ret double %retval @@ -152,6 +165,8 @@ define float @test_simplify4n(float %x) { ; VC83-NEXT: [[MUL:%.*]] = fmul float [[X:%.*]], 3.000000e+00 ; VC83-NEXT: [[EXP2F:%.*]] = call float @exp2f(float [[MUL]]) ; VC83-NEXT: ret float [[EXP2F]] +; NOLIB-NEXT: [[POW:%.*]] = call float @powf(float 8.000000e+00, float [[X:%.*]]) +; NOLIB-NEXT: ret float [[POW]] ; %retval = call float @powf(float 8.0, float %x) ret float %retval @@ -163,6 +178,9 @@ define <2 x double> @test_simplify4v(<2 x double> %x) { ; ANY-NEXT: ret <2 x double> [[EXP2]] ; MSVC-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X:%.*]]) ; MSVC-NEXT: ret <2 x double> [[POW]] +; TODO: should be able to simplify llvm.pow to llvm.exp2 even without libcalls +; NOLIB-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; NOLIB-NEXT: ret <2 x double> [[POW]] ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) ret <2 x double> %retval @@ -175,6 +193,9 @@ define <2 x float> @test_simplify4vn(<2 x float> %x) { ; ANY-NEXT: ret <2 x float> [[EXP2]] ; MSVC-NEXT: [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X:%.*]]) ; MSVC-NEXT: ret <2 x float> [[POW]] +; TODO: should be able to simplify llvm.pow to llvm.exp2 even without libcalls +; NOLIB-NEXT: [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; NOLIB-NEXT: ret <2 x float> [[POW]] ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %retval @@ -191,6 +212,8 @@ define float @test_simplify5(float %x) { ; VC51-NEXT: ret float [[POW]] ; VC64-NEXT: ret float 1.000000e+00 ; VC83-NEXT: ret float 1.000000e+00 +; NOLIB-NEXT: [[POW:%.*]] = call float @powf(float [[X:%.*]], float 0.000000e+00) +; NOLIB-NEXT: ret float [[POW]] ; %retval = call float @powf(float %x, float 0.0) ret float %retval @@ -198,9 +221,7 @@ define float @test_simplify5(float %x) { define <2 x float> @test_simplify5v(<2 x float> %x) { ; CHECK-LABEL: @test_simplify5v( -; ANY-NEXT: ret <2 x float> -; MSVC-NEXT: [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[X:%.*]], <2 x float> zeroinitializer) -; MSVC-NEXT: ret <2 x float> [[POW]] +; CHECK-NEXT: ret <2 x float> ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) ret <2 x float> %retval @@ -208,7 +229,9 @@ define <2 x float> @test_simplify5v(<2 x float> %x) { define double @test_simplify6(double %x) { ; CHECK-LABEL: @test_simplify6( -; CHECK-NEXT: ret double 1.000000e+00 +; LIB-NEXT: ret double 1.000000e+00 +; NOLIB-NEXT: [[POW:%.*]] = call double @pow(double [[X:%.*]], double 0.000000e+00) +; NOLIB-NEXT: ret double [[POW]] ; %retval = call double @pow(double %x, double 0.0) ret double %retval @@ -216,9 +239,7 @@ define double @test_simplify6(double %x) { define <2 x double> @test_simplify6v(<2 x double> %x) { ; CHECK-LABEL: @test_simplify6v( -; ANY-NEXT: ret <2 x double> -; MSVC-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> zeroinitializer) -; MSVC-NEXT: ret <2 x double> [[POW]] +; CHECK-NEXT: ret <2 x double> ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %retval @@ -247,6 +268,8 @@ define float @test_simplify7(float %x) { ; VC83-NEXT: [[ISINF:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000 ; VC83-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], float 0x7FF0000000000000, float [[ABS]] ; VC83-NEXT: ret float [[TMP1]] +; NOLIB-NEXT: [[POW:%.*]] = call float @powf(float [[X:%.*]], float 5.000000e-01) +; NOLIB-NEXT: ret float [[POW]] ; %retval = call float @powf(float %x, float 0.5) ret float %retval @@ -254,11 +277,13 @@ define float @test_simplify7(float %x) { define double @test_simplify8(double %x) { ; CHECK-LABEL: @test_simplify8( -; CHECK-NEXT: [[SQRT:%.*]] = call double @sqrt(double [[X:%.*]]) -; CHECK-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] -; CHECK-NEXT: ret double [[TMP1]] +; LIB-NEXT: [[SQRT:%.*]] = call double @sqrt(double [[X:%.*]]) +; LIB-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) +; LIB-NEXT: [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 +; LIB-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] +; LIB-NEXT: ret double [[TMP1]] +; NOLIB-NEXT: [[POW:%.*]] = call double @pow(double [[X:%.*]], double 5.000000e-01) +; NOLIB-NEXT: ret double [[POW]] ; %retval = call double @pow(double %x, double 0.5) ret double %retval @@ -275,6 +300,8 @@ define float @test_simplify9(float %x) { ; VC51-NEXT: ret float [[POW]] ; VC64-NEXT: ret float 0x7FF0000000000000 ; VC83-NEXT: ret float 0x7FF0000000000000 +; NOLIB-NEXT: [[POW:%.*]] = call float @powf(float 0xFFF0000000000000, float 5.000000e-01) +; NOLIB-NEXT: ret float [[POW]] ; %retval = call float @powf(float 0xFFF0000000000000, float 0.5) ret float %retval @@ -282,7 +309,9 @@ define float @test_simplify9(float %x) { define double @test_simplify10(double %x) { ; CHECK-LABEL: @test_simplify10( -; CHECK-NEXT: ret double 0x7FF0000000000000 +; LIB-NEXT: ret double 0x7FF0000000000000 +; NOLIB-NEXT: [[POW:%.*]] = call double @pow(double 0xFFF0000000000000, double 5.000000e-01) +; NOLIB-NEXT: ret double [[POW]] ; %retval = call double @pow(double 0xFFF0000000000000, double 0.5) ret double %retval @@ -299,6 +328,8 @@ define float @test_simplify11(float %x) { ; VC51-NEXT: ret float [[POW]] ; VC64-NEXT: ret float [[X:%.*]] ; VC83-NEXT: ret float [[X:%.*]] +; NOLIB-NEXT: [[POW:%.*]] = call float @powf(float [[X:%.*]], float 1.000000e+00) +; NOLIB-NEXT: ret float [[POW]] ; %retval = call float @powf(float %x, float 1.0) ret float %retval @@ -306,9 +337,7 @@ define float @test_simplify11(float %x) { define <2 x float> @test_simplify11v(<2 x float> %x) { ; CHECK-LABEL: @test_simplify11v( -; ANY-NEXT: ret <2 x float> [[X:%.*]] -; MSVC-NEXT: [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[X:%.*]], <2 x float> ) -; MSVC-NEXT: ret <2 x float> [[POW]] +; CHECK-NEXT: ret <2 x float> [[X:%.*]] ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) ret <2 x float> %retval @@ -316,7 +345,9 @@ define <2 x float> @test_simplify11v(<2 x float> %x) { define double @test_simplify12(double %x) { ; CHECK-LABEL: @test_simplify12( -; CHECK-NEXT: ret double [[X:%.*]] +; LIB-NEXT: ret double [[X:%.*]] +; NOLIB-NEXT: [[POW:%.*]] = call double @pow(double [[X:%.*]], double 1.000000e+00) +; NOLIB-NEXT: ret double [[POW]] ; %retval = call double @pow(double %x, double 1.0) ret double %retval @@ -324,9 +355,7 @@ define double @test_simplify12(double %x) { define <2 x double> @test_simplify12v(<2 x double> %x) { ; CHECK-LABEL: @test_simplify12v( -; ANY-NEXT: ret <2 x double> [[X:%.*]] -; MSVC-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) -; MSVC-NEXT: ret <2 x double> [[POW]] +; CHECK-NEXT: ret <2 x double> [[X:%.*]] ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %retval @@ -346,6 +375,8 @@ define float @pow2_strict(float %x) { ; VC64-NEXT: ret float [[SQUARE]] ; VC83-NEXT: [[SQUARE:%.*]] = fmul float [[X:%.*]], [[X]] ; VC83-NEXT: ret float [[SQUARE]] +; NOLIB-NEXT: [[POW:%.*]] = call float @powf(float [[X:%.*]], float 2.000000e+00) +; NOLIB-NEXT: ret float [[POW]] ; %r = call float @powf(float %x, float 2.0) ret float %r @@ -353,10 +384,8 @@ define float @pow2_strict(float %x) { define <2 x float> @pow2_strictv(<2 x float> %x) { ; CHECK-LABEL: @pow2_strictv( -; ANY-NEXT: [[SQUARE:%.*]] = fmul <2 x float> [[X:%.*]], [[X]] -; ANY-NEXT: ret <2 x float> [[SQUARE]] -; MSVC-NEXT: [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[X:%.*]], <2 x float> ) -; MSVC-NEXT: ret <2 x float> [[POW]] +; CHECK-NEXT: [[SQUARE:%.*]] = fmul <2 x float> [[X:%.*]], [[X]] +; CHECK-NEXT: ret <2 x float> [[SQUARE]] ; %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) ret <2 x float> %r @@ -364,8 +393,10 @@ define <2 x float> @pow2_strictv(<2 x float> %x) { define double @pow2_double_strict(double %x) { ; CHECK-LABEL: @pow2_double_strict( -; CHECK-NEXT: [[SQUARE:%.*]] = fmul double [[X:%.*]], [[X]] -; CHECK-NEXT: ret double [[SQUARE]] +; LIB-NEXT: [[SQUARE:%.*]] = fmul double [[X:%.*]], [[X]] +; LIB-NEXT: ret double [[SQUARE]] +; NOLIB-NEXT: [[POW:%.*]] = call double @pow(double [[X:%.*]], double 2.000000e+00) +; NOLIB-NEXT: ret double [[POW]] ; %r = call double @pow(double %x, double 2.0) ret double %r @@ -373,10 +404,8 @@ define double @pow2_double_strict(double %x) { define <2 x double> @pow2_double_strictv(<2 x double> %x) { ; CHECK-LABEL: @pow2_double_strictv( -; ANY-NEXT: [[SQUARE:%.*]] = fmul <2 x double> [[X:%.*]], [[X]] -; ANY-NEXT: ret <2 x double> [[SQUARE]] -; MSVC-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) -; MSVC-NEXT: ret <2 x double> [[POW]] +; CHECK-NEXT: [[SQUARE:%.*]] = fmul <2 x double> [[X:%.*]], [[X]] +; CHECK-NEXT: ret <2 x double> [[SQUARE]] ; %r = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r @@ -396,6 +425,8 @@ define float @pow2_fast(float %x) { ; VC64-NEXT: ret float [[SQUARE]] ; VC83-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]] ; VC83-NEXT: ret float [[SQUARE]] +; NOLIB-NEXT: [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 2.000000e+00) +; NOLIB-NEXT: ret float [[POW]] ; %r = call fast float @powf(float %x, float 2.0) ret float %r @@ -415,6 +446,8 @@ define float @pow_neg1_strict(float %x) { ; VC64-NEXT: ret float [[RECIPROCAL]] ; VC83-NEXT: [[RECIPROCAL:%.*]] = fdiv float 1.000000e+00, [[X:%.*]] ; VC83-NEXT: ret float [[RECIPROCAL]] +; NOLIB-NEXT: [[POW:%.*]] = call float @powf(float [[X:%.*]], float -1.000000e+00) +; NOLIB-NEXT: ret float [[POW]] ; %r = call float @powf(float %x, float -1.0) ret float %r @@ -422,10 +455,8 @@ define float @pow_neg1_strict(float %x) { define <2 x float> @pow_neg1_strictv(<2 x float> %x) { ; CHECK-LABEL: @pow_neg1_strictv( -; ANY-NEXT: [[RECIPROCAL:%.*]] = fdiv <2 x float> , [[X:%.*]] -; ANY-NEXT: ret <2 x float> [[RECIPROCAL]] -; MSVC-NEXT: [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[X:%.*]], <2 x float> ) -; MSVC-NEXT: ret <2 x float> [[POW]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv <2 x float> , [[X:%.*]] +; CHECK-NEXT: ret <2 x float> [[RECIPROCAL]] ; %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) ret <2 x float> %r @@ -433,8 +464,10 @@ define <2 x float> @pow_neg1_strictv(<2 x float> %x) { define double @pow_neg1_double_fast(double %x) { ; CHECK-LABEL: @pow_neg1_double_fast( -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[X:%.*]] -; CHECK-NEXT: ret double [[RECIPROCAL]] +; LIB-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[X:%.*]] +; LIB-NEXT: ret double [[RECIPROCAL]] +; NOLIB-NEXT: [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double -1.000000e+00) +; NOLIB-NEXT: ret double [[POW]] ; %r = call fast double @pow(double %x, double -1.0) ret double %r @@ -442,10 +475,8 @@ define double @pow_neg1_double_fast(double %x) { define <2 x double> @pow_neg1_double_fastv(<2 x double> %x) { ; CHECK-LABEL: @pow_neg1_double_fastv( -; ANY-NEXT: [[RECIPROCAL:%.*]] = fdiv fast <2 x double> , [[X:%.*]] -; ANY-NEXT: ret <2 x double> [[RECIPROCAL]] -; MSVC-NEXT: [[POW:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) -; MSVC-NEXT: ret <2 x double> [[POW]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast <2 x double> , [[X:%.*]] +; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] ; %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r @@ -453,11 +484,11 @@ define <2 x double> @pow_neg1_double_fastv(<2 x double> %x) { define double @test_simplify17(double %x) { ; CHECK-LABEL: @test_simplify17( -; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) -; CHECK-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] -; CHECK-NEXT: ret double [[TMP1]] +; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) +; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] +; CHECK-NEXT: ret double [[TMP1]] ; %retval = call double @llvm.pow.f64(double %x, double 0.5) ret double %retval diff --git a/llvm/test/Transforms/InstCombine/pow-3.ll b/llvm/test/Transforms/InstCombine/pow-3.ll index d0edd46587511..16559e9228d27 100644 --- a/llvm/test/Transforms/InstCombine/pow-3.ll +++ b/llvm/test/Transforms/InstCombine/pow-3.ll @@ -1,15 +1,25 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; Test that the pow() won't get simplified to when it's disabled. -; ; RUN: opt < %s -disable-simplify-libcalls -instcombine -S | FileCheck %s declare double @llvm.pow.f64(double, double) declare double @pow(double, double) -define double @test_simplify_unavailable1(double %x) { -; CHECK-LABEL: @test_simplify_unavailable1( -; CHECK-NEXT: [[RETVAL:%.*]] = call double @llvm.pow.f64(double [[X:%.*]], double 5.000000e-01) +define double @sqrt_libcall(double %x) { +; CHECK-LABEL: @sqrt_libcall( +; CHECK-NEXT: [[RETVAL:%.*]] = call double @pow(double [[X:%.*]], double 5.000000e-01) ; CHECK-NEXT: ret double [[RETVAL]] +; + %retval = call double @pow(double %x, double 0.5) + ret double %retval +} + +define double @sqrt_intrinsic(double %x) { +; CHECK-LABEL: @sqrt_intrinsic( +; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]]) +; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]] +; CHECK-NEXT: ret double [[TMP1]] ; %retval = call double @llvm.pow.f64(double %x, double 0.5) ret double %retval @@ -17,8 +27,8 @@ define double @test_simplify_unavailable1(double %x) { ; Shrinking is disabled too. -define float @test_simplify_unavailable2(float %f, float %g) { -; CHECK-LABEL: @test_simplify_unavailable2( +define float @shrink_libcall(float %f, float %g) { +; CHECK-LABEL: @shrink_libcall( ; CHECK-NEXT: [[DF:%.*]] = fpext float [[F:%.*]] to double ; CHECK-NEXT: [[DG:%.*]] = fpext float [[G:%.*]] to double ; CHECK-NEXT: [[CALL:%.*]] = call fast double @pow(double [[DF]], double [[DG]]) @@ -34,8 +44,8 @@ define float @test_simplify_unavailable2(float %f, float %g) { ; Shrinking is disabled for the intrinsic too. -define float @test_simplify_unavailable3(float %f, float %g) { -; CHECK-LABEL: @test_simplify_unavailable3( +define float @shrink_intrinsic(float %f, float %g) { +; CHECK-LABEL: @shrink_intrinsic( ; CHECK-NEXT: [[DF:%.*]] = fpext float [[F:%.*]] to double ; CHECK-NEXT: [[DG:%.*]] = fpext float [[G:%.*]] to double ; CHECK-NEXT: [[CALL:%.*]] = call fast double @llvm.pow.f64(double [[DF]], double [[DG]]) diff --git a/llvm/test/Transforms/InstCombine/pow-exp.ll b/llvm/test/Transforms/InstCombine/pow-exp.ll index a583c367e4dd1..f5bf0f54fab40 100644 --- a/llvm/test/Transforms/InstCombine/pow-exp.ll +++ b/llvm/test/Transforms/InstCombine/pow-exp.ll @@ -261,6 +261,16 @@ define double @pow_ok_ten_base(double %e) { ret double %call } +define double @pow_ok_denorm_base(double %e) { +; CHECK-LABEL: @pow_ok_denorm_base( +; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0xC0904800000005C5 +; CHECK-NEXT: [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]]) +; CHECK-NEXT: ret double [[EXP2]] +; + %call = tail call afn nnan ninf double @pow(double 0x00000000FFFFFFFF, double %e) + ret double %call +} + define float @powf_ok_base(float %e) { ; CHECK-LABEL: @powf_ok_base( ; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0xBFE07762{{.*}} @@ -301,6 +311,16 @@ define float @powf_ok_ten_base(float %e) { ret float %call } +define float @powf_ok_denorm_base(float %e) { +; CHECK-LABEL: @powf_ok_denorm_base( +; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], -1.350000e+02 +; CHECK-NEXT: [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]]) +; CHECK-NEXT: ret float [[EXP2F]] +; + %call = tail call afn nnan ninf float @powf(float 0x3780000000000000, float %e) + ret float %call +} + ; Negative tests define double @pow_zero_base(double %e) { @@ -350,9 +370,10 @@ define double @pow_negative_base(double %e) { define double @pow_multiuse(double %e) { ; CHECK-LABEL: @pow_multiuse( -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn double @pow(double 5.000000e+00, double [[E:%.*]]) -; CHECK-NEXT: tail call void @use_d(double [[CALL]]) -; CHECK-NEXT: ret double [[CALL]] +; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn double [[E:%.*]], 0x4002934{{.*}} +; CHECK-NEXT: [[EXP2:%.*]] = call nnan ninf afn double @exp2(double [[MUL]]) +; CHECK-NEXT: tail call void @use_d(double [[EXP2]]) +; CHECK-NEXT: ret double [[EXP2]] ; %call = tail call afn nnan ninf double @pow(double 5.000000e+00, double %e) tail call void @use_d(double %call) @@ -433,9 +454,10 @@ define float @powf_negative_base(float %e) { define float @powf_multiuse(float %e) { ; CHECK-LABEL: @powf_multiuse( -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn float @powf(float 5.000000e+00, float [[E:%.*]]) -; CHECK-NEXT: tail call void @use_f(float [[CALL]]) -; CHECK-NEXT: ret float [[CALL]] +; CHECK-NEXT: [[MUL:%.*]] = fmul nnan ninf afn float [[E:%.*]], 0x4002934{{.*}} +; CHECK-NEXT: [[EXP2F:%.*]] = call nnan ninf afn float @exp2f(float [[MUL]]) +; CHECK-NEXT: tail call void @use_f(float [[EXP2F]]) +; CHECK-NEXT: ret float [[EXP2F]] ; %call = tail call afn nnan ninf float @powf(float 5.000000e+00, float %e) tail call void @use_f(float %call) diff --git a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll index e785ad35e8d65..44d284c6590a3 100644 --- a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll +++ b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -instcombine -S < %s | FileCheck %s - +declare i3 @llvm.cttz.i3(i3, i1) declare i32 @llvm.cttz.i32(i32, i1 immarg) declare i32 @llvm.ctlz.i32(i32, i1 immarg) declare i64 @llvm.cttz.i64(i64, i1 immarg) @@ -217,3 +217,33 @@ define <2 x i32> @select_clz_to_ctz_vec_with_undef(<2 x i32> %a) { %cond = select <2 x i1> %tobool, <2 x i32> %lz, <2 x i32> %sub1 ret <2 x i32> %cond } + +define i4 @PR45762(i3 %x4) { +; CHECK-LABEL: @PR45762( +; CHECK-NEXT: [[T4:%.*]] = call i3 @llvm.cttz.i3(i3 [[X4:%.*]], i1 false), !range !2 +; CHECK-NEXT: [[T7:%.*]] = zext i3 [[T4]] to i4 +; CHECK-NEXT: [[ONE_HOT_16:%.*]] = shl i4 1, [[T7]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i3 [[X4]], 0 +; CHECK-NEXT: [[NARROW:%.*]] = select i1 [[TMP1]], i3 0, i3 [[T4]] +; CHECK-NEXT: [[UMUL_23:%.*]] = zext i3 [[NARROW]] to i4 +; CHECK-NEXT: [[SEL_71:%.*]] = shl i4 [[ONE_HOT_16]], [[UMUL_23]] +; CHECK-NEXT: ret i4 [[SEL_71]] +; + %t4 = call i3 @llvm.cttz.i3(i3 %x4, i1 false) + %t5 = icmp eq i3 %x4, 0 + %t6 = select i1 %t5, i3 3, i3 %t4 + %t7 = zext i3 %t6 to i4 + %one_hot_16 = shl i4 1, %t7 + %t8 = lshr i4 %one_hot_16, 0 + %bit_slice_61 = trunc i4 %t8 to i1 + %t9 = lshr i4 %one_hot_16, 1 + %bit_slice_62 = trunc i4 %t9 to i1 + %t10 = lshr i4 %one_hot_16, 2 + %bit_slice_64 = trunc i4 %t10 to i1 + %t11 = or i1 %bit_slice_61, %bit_slice_62 + %or_69 = or i1 %t11, %bit_slice_64 + %umul_23 = mul i4 %one_hot_16, %one_hot_16 + %t12 = icmp eq i1 %or_69, false + %sel_71 = select i1 %t12, i4 %one_hot_16, i4 %umul_23 + ret i4 %sel_71 +} diff --git a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll index 8acf49f0ef8cd..d46227d74db37 100644 --- a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll +++ b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll @@ -3,6 +3,9 @@ target datalayout = "n8:16:32:64" +declare void @use(i8) +declare void @use_vec(<2 x i8>) + define i32 @select_icmp_eq_and_1_0_or_2(i32 %x, i32 %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2( ; CHECK-NEXT: [[AND:%.*]] = shl i32 [[X:%.*]], 1 @@ -1449,3 +1452,123 @@ define i32 @shift_xor_multiuse_cmp_and(i32 %x, i32 %y, i32 %z, i32 %w) { %res2 = mul i32 %res, %and2 ; to bump up the use count of the and ret i32 %res2 } + +define i8 @set_bits(i8 %x, i1 %b) { +; CHECK-LABEL: @set_bits( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], -6 +; CHECK-NEXT: [[MASKSEL:%.*]] = select i1 [[B:%.*]], i8 5, i8 0 +; CHECK-NEXT: [[COND:%.*]] = or i8 [[AND]], [[MASKSEL]] +; CHECK-NEXT: ret i8 [[COND]] +; + %and = and i8 %x, 250 + %or = or i8 %x, 5 + %cond = select i1 %b, i8 %or, i8 %and + ret i8 %cond +} + +; Negative test + +define i8 @set_bits_not_inverse_constant(i8 %x, i1 %b) { +; CHECK-LABEL: @set_bits_not_inverse_constant( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], -6 +; CHECK-NEXT: [[OR:%.*]] = or i8 [[X]], 7 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[B:%.*]], i8 [[OR]], i8 [[AND]] +; CHECK-NEXT: ret i8 [[COND]] +; + %and = and i8 %x, 250 + %or = or i8 %x, 7 + %cond = select i1 %b, i8 %or, i8 %and + ret i8 %cond +} + +define i8 @set_bits_extra_use1(i8 %x, i1 %b) { +; CHECK-LABEL: @set_bits_extra_use1( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], -6 +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[MASKSEL:%.*]] = select i1 [[B:%.*]], i8 5, i8 0 +; CHECK-NEXT: [[COND:%.*]] = or i8 [[AND]], [[MASKSEL]] +; CHECK-NEXT: ret i8 [[COND]] +; + %and = and i8 %x, 250 + call void @use(i8 %and) + %or = or i8 %x, 5 + %cond = select i1 %b, i8 %or, i8 %and + ret i8 %cond +} + +; Negative test + +define i8 @set_bits_extra_use2(i8 %x, i1 %b) { +; CHECK-LABEL: @set_bits_extra_use2( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], -6 +; CHECK-NEXT: [[OR:%.*]] = or i8 [[X]], 5 +; CHECK-NEXT: call void @use(i8 [[OR]]) +; CHECK-NEXT: [[COND:%.*]] = select i1 [[B:%.*]], i8 [[OR]], i8 [[AND]] +; CHECK-NEXT: ret i8 [[COND]] +; + %and = and i8 %x, 250 + %or = or i8 %x, 5 + call void @use(i8 %or) + %cond = select i1 %b, i8 %or, i8 %and + ret i8 %cond +} + +define <2 x i8> @clear_bits(<2 x i8> %x, <2 x i1> %b) { +; CHECK-LABEL: @clear_bits( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[MASKSEL:%.*]] = select <2 x i1> [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> +; CHECK-NEXT: [[COND:%.*]] = or <2 x i8> [[AND]], [[MASKSEL]] +; CHECK-NEXT: ret <2 x i8> [[COND]] +; + %and = and <2 x i8> %x, + %or = or <2 x i8> %x, + %cond = select <2 x i1> %b, <2 x i8> %and, <2 x i8> %or + ret <2 x i8> %cond +} + +; Negative test + +define <2 x i8> @clear_bits_not_inverse_constant(<2 x i8> %x, <2 x i1> %b) { +; CHECK-LABEL: @clear_bits_not_inverse_constant( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[X]], +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[B:%.*]], <2 x i8> [[AND]], <2 x i8> [[OR]] +; CHECK-NEXT: ret <2 x i8> [[COND]] +; + %and = and <2 x i8> %x, + %or = or <2 x i8> %x, + %cond = select <2 x i1> %b, <2 x i8> %and, <2 x i8> %or + ret <2 x i8> %cond +} + +define <2 x i8> @clear_bits_extra_use1(<2 x i8> %x, i1 %b) { +; CHECK-LABEL: @clear_bits_extra_use1( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: call void @use_vec(<2 x i8> [[AND]]) +; CHECK-NEXT: [[MASKSEL:%.*]] = select i1 [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> +; CHECK-NEXT: [[COND:%.*]] = or <2 x i8> [[AND]], [[MASKSEL]] +; CHECK-NEXT: ret <2 x i8> [[COND]] +; + %and = and <2 x i8> %x, + call void @use_vec(<2 x i8> %and) + %or = or <2 x i8> %x, + %cond = select i1 %b, <2 x i8> %and, <2 x i8> %or + ret <2 x i8> %cond +} + +; Negative test + +define i8 @clear_bits_extra_use2(i8 %x, i1 %b) { +; CHECK-LABEL: @clear_bits_extra_use2( +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], -6 +; CHECK-NEXT: [[OR:%.*]] = or i8 [[X]], 5 +; CHECK-NEXT: call void @use(i8 [[OR]]) +; CHECK-NEXT: [[COND:%.*]] = select i1 [[B:%.*]], i8 [[AND]], i8 [[OR]] +; CHECK-NEXT: ret i8 [[COND]] +; + %and = and i8 %x, 250 + %or = or i8 %x, 5 + call void @use(i8 %or) + %cond = select i1 %b, i8 %and, i8 %or + ret i8 %cond +} diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll index 6aa262fd931d2..b53b5124ee4f2 100644 --- a/llvm/test/Transforms/InstCombine/shift-shift.ll +++ b/llvm/test/Transforms/InstCombine/shift-shift.ll @@ -8,9 +8,9 @@ define void @pr12967() { ; CHECK-LABEL: @pr12967( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop +; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br label %loop +; CHECK-NEXT: br label [[LOOP]] ; entry: br label %loop @@ -27,9 +27,9 @@ loop: define void @pr26760() { ; CHECK-LABEL: @pr26760( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop +; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br label %loop +; CHECK-NEXT: br label [[LOOP]] ; entry: br label %loop @@ -47,14 +47,14 @@ loop: define i32 @pr8547(i32* %g) { ; CHECK-LABEL: @pr8547( ; CHECK-NEXT: codeRepl: -; CHECK-NEXT: br label %for.cond +; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 0, %codeRepl ], [ 5, %for.cond ] -; CHECK-NEXT: store i32 [[STOREMERGE]], i32* %g, align 4 +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 0, [[CODEREPL:%.*]] ], [ 5, [[FOR_COND]] ] +; CHECK-NEXT: store i32 [[STOREMERGE]], i32* [[G:%.*]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i32 [[STOREMERGE]], 6 ; CHECK-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 64 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[CONV2]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label %for.cond, label %codeRepl2 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_COND]], label [[CODEREPL2:%.*]] ; CHECK: codeRepl2: ; CHECK-NEXT: ret i32 [[CONV2]] ; @@ -73,3 +73,63 @@ codeRepl2: ret i32 %conv2 } +; Two same direction shifts that add up to more than the bitwidth should get +; folded to zero. + +define i32 @shl_shl(i32 %A) { +; CHECK-LABEL: @shl_shl( +; CHECK-NEXT: ret i32 0 +; + %B = shl i32 %A, 6 + %C = shl i32 %B, 28 + ret i32 %C +} + +define <2 x i33> @shl_shl_splat_vec(<2 x i33> %A) { +; CHECK-LABEL: @shl_shl_splat_vec( +; CHECK-NEXT: ret <2 x i33> zeroinitializer +; + %B = shl <2 x i33> %A, + %C = shl <2 x i33> %B, + ret <2 x i33> %C +} + +; FIXME + +define <2 x i33> @shl_shl_vec(<2 x i33> %A) { +; CHECK-LABEL: @shl_shl_vec( +; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], +; CHECK-NEXT: ret <2 x i33> [[C]] +; + %B = shl <2 x i33> %A, + %C = shl <2 x i33> %B, + ret <2 x i33> %C +} + +define i232 @lshr_lshr(i232 %A) { +; CHECK-LABEL: @lshr_lshr( +; CHECK-NEXT: ret i232 0 +; + %B = lshr i232 %A, 231 + %C = lshr i232 %B, 1 + ret i232 %C +} + +define <2 x i32> @lshr_lshr_splat_vec(<2 x i32> %A) { +; CHECK-LABEL: @lshr_lshr_splat_vec( +; CHECK-NEXT: ret <2 x i32> zeroinitializer +; + %B = lshr <2 x i32> %A, + %C = lshr <2 x i32> %B, + ret <2 x i32> %C +} + +define <2 x i32> @lshr_lshr_vec(<2 x i32> %A) { +; CHECK-LABEL: @lshr_lshr_vec( +; CHECK-NEXT: ret <2 x i32> zeroinitializer +; + %B = lshr <2 x i32> %A, + %C = lshr <2 x i32> %B, + ret <2 x i32> %C +} diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll index 54896397f5cf9..0f2e6336a73e4 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll @@ -822,3 +822,95 @@ neg_bb: nonneg_bb: ret i16 0 } + +; 'or' of 1 and operand with no lowest bit set is 'inc' +define i8 @negation_of_increment_via_or_with_no_common_bits_set(i8 %x, i8 %y) { +; CHECK-LABEL: @negation_of_increment_via_or_with_no_common_bits_set( +; CHECK-NEXT: [[T0:%.*]] = shl i8 [[Y:%.*]], 1 +; CHECK-NEXT: [[T1_NEG:%.*]] = xor i8 [[T0]], -1 +; CHECK-NEXT: [[T2:%.*]] = add i8 [[T1_NEG]], [[X:%.*]] +; CHECK-NEXT: ret i8 [[T2]] +; + %t0 = shl i8 %y, 1 + %t1 = or i8 %t0, 1 + %t2 = sub i8 %x, %t1 + ret i8 %t2 +} +define i8 @negation_of_increment_via_or_with_no_common_bits_set_extrause(i8 %x, i8 %y) { +; CHECK-LABEL: @negation_of_increment_via_or_with_no_common_bits_set_extrause( +; CHECK-NEXT: [[T0:%.*]] = shl i8 [[Y:%.*]], 1 +; CHECK-NEXT: [[T1:%.*]] = or i8 [[T0]], 1 +; CHECK-NEXT: call void @use8(i8 [[T1]]) +; CHECK-NEXT: [[T2:%.*]] = sub i8 [[X:%.*]], [[T1]] +; CHECK-NEXT: ret i8 [[T2]] +; + %t0 = shl i8 %y, 1 + %t1 = or i8 %t0, 1 + call void @use8(i8 %t1) + %t2 = sub i8 %x, %t1 + ret i8 %t2 +} +define i8 @negation_of_increment_via_or_common_bits_set(i8 %x, i8 %y) { +; CHECK-LABEL: @negation_of_increment_via_or_common_bits_set( +; CHECK-NEXT: [[T0:%.*]] = shl i8 [[Y:%.*]], 1 +; CHECK-NEXT: [[T1:%.*]] = or i8 [[T0]], 3 +; CHECK-NEXT: [[T2:%.*]] = sub i8 [[X:%.*]], [[T1]] +; CHECK-NEXT: ret i8 [[T2]] +; + %t0 = shl i8 %y, 1 + %t1 = or i8 %t0, 3 + %t2 = sub i8 %x, %t1 + ret i8 %t2 +} + +; 'or' of operands with no common bits set is 'add' +define i8 @add_via_or_with_no_common_bits_set(i8 %x, i8 %y) { +; CHECK-LABEL: @add_via_or_with_no_common_bits_set( +; CHECK-NEXT: [[T0:%.*]] = sub i8 0, [[Y:%.*]] +; CHECK-NEXT: call void @use8(i8 [[T0]]) +; CHECK-NEXT: [[T1_NEG:%.*]] = shl i8 [[Y]], 2 +; CHECK-NEXT: [[T2_NEG:%.*]] = add i8 [[T1_NEG]], -3 +; CHECK-NEXT: [[T3:%.*]] = add i8 [[T2_NEG]], [[X:%.*]] +; CHECK-NEXT: ret i8 [[T3]] +; + %t0 = sub i8 0, %y + call void @use8(i8 %t0) + %t1 = shl i8 %t0, 2 + %t2 = or i8 %t1, 3 + %t3 = sub i8 %x, %t2 + ret i8 %t3 +} +define i8 @add_via_or_with_common_bit_maybe_set(i8 %x, i8 %y) { +; CHECK-LABEL: @add_via_or_with_common_bit_maybe_set( +; CHECK-NEXT: [[T0:%.*]] = sub i8 0, [[Y:%.*]] +; CHECK-NEXT: call void @use8(i8 [[T0]]) +; CHECK-NEXT: [[T1:%.*]] = shl i8 [[T0]], 2 +; CHECK-NEXT: [[T2:%.*]] = or i8 [[T1]], 4 +; CHECK-NEXT: [[T3:%.*]] = sub i8 [[X:%.*]], [[T2]] +; CHECK-NEXT: ret i8 [[T3]] +; + %t0 = sub i8 0, %y + call void @use8(i8 %t0) + %t1 = shl i8 %t0, 2 + %t2 = or i8 %t1, 4 + %t3 = sub i8 %x, %t2 + ret i8 %t3 +} +define i8 @add_via_or_with_no_common_bits_set_extrause(i8 %x, i8 %y) { +; CHECK-LABEL: @add_via_or_with_no_common_bits_set_extrause( +; CHECK-NEXT: [[T0:%.*]] = sub i8 0, [[Y:%.*]] +; CHECK-NEXT: call void @use8(i8 [[T0]]) +; CHECK-NEXT: [[T1:%.*]] = shl i8 [[T0]], 2 +; CHECK-NEXT: [[T2:%.*]] = or i8 [[T1]], 3 +; CHECK-NEXT: call void @use8(i8 [[T2]]) +; CHECK-NEXT: [[T3:%.*]] = sub i8 [[X:%.*]], [[T2]] +; CHECK-NEXT: ret i8 [[T3]] +; + %t0 = sub i8 0, %y + call void @use8(i8 %t0) + %t1 = shl i8 %t0, 2 + %t2 = or i8 %t1, 3 + call void @use8(i8 %t2) + %t3 = sub i8 %x, %t2 + ret i8 %t3 +} diff --git a/llvm/test/Transforms/InstSimplify/insertelement.ll b/llvm/test/Transforms/InstSimplify/insertelement.ll index 97f656a7c8290..c0c91b26f7a3f 100644 --- a/llvm/test/Transforms/InstSimplify/insertelement.ll +++ b/llvm/test/Transforms/InstSimplify/insertelement.ll @@ -42,14 +42,27 @@ define <4 x i32> @test5(<4 x i32> %A) { ret <4 x i32> %I } +; The undef may be replacing a poison value, so it is not safe to just return 'A'. + define <4 x i32> @PR1286(<4 x i32> %A) { ; CHECK-LABEL: @PR1286( -; CHECK-NEXT: ret <4 x i32> [[A:%.*]] +; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 undef, i32 1 +; CHECK-NEXT: ret <4 x i32> [[B]] ; %B = insertelement <4 x i32> %A, i32 undef, i32 1 ret <4 x i32> %B } +; Constant is not poison, so this can simplify. + +define <2 x i32> @undef_into_constant_vector_with_variable_index(<2 x i32> %A, i32 %Index) { +; CHECK-LABEL: @undef_into_constant_vector_with_variable_index( +; CHECK-NEXT: ret <2 x i32> +; + %B = insertelement <2 x i32> , i32 undef, i32 %Index + ret <2 x i32> %B +} + define <8 x i8> @extract_insert_same_vec_and_index(<8 x i8> %in) { ; CHECK-LABEL: @extract_insert_same_vec_and_index( ; CHECK-NEXT: ret <8 x i8> [[IN:%.*]] diff --git a/llvm/test/Transforms/InstSimplify/vscale.ll b/llvm/test/Transforms/InstSimplify/vscale.ll index 608a093a40aa2..669c824685e8e 100644 --- a/llvm/test/Transforms/InstSimplify/vscale.ll +++ b/llvm/test/Transforms/InstSimplify/vscale.ll @@ -17,7 +17,8 @@ define @insertelement_idx_undef( %a) { define @insertelement_value_undef( %a) { ; CHECK-LABEL: @insertelement_value_undef( -; CHECK-NEXT: ret [[A:%.*]] +; CHECK-NEXT: [[R:%.*]] = insertelement [[A:%.*]], i32 undef, i64 0 +; CHECK-NEXT: ret [[R]] ; %r = insertelement %a, i32 undef, i64 0 ret %r diff --git a/llvm/test/Transforms/LICM/sink-foldable.ll b/llvm/test/Transforms/LICM/sink-foldable.ll index 1d4a99081a160..cdeb02a124404 100644 --- a/llvm/test/Transforms/LICM/sink-foldable.ll +++ b/llvm/test/Transforms/LICM/sink-foldable.ll @@ -1,14 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; REQUIRES: aarch64-registered-target ; RUN: opt < %s -licm -S | FileCheck %s target triple = "aarch64--linux-gnueabi" -; CHECK-LABEL:@test1 -; CHECK-LABEL:loopexit1: -; CHECK: %[[PHI:.+]] = phi i8** [ %arrayidx0, %if.end ] -; CHECK: getelementptr inbounds i8*, i8** %[[PHI]], i64 1 define i8** @test1(i32 %j, i8** readonly %P, i8* readnone %Q) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp slt i32 0, [[J:%.*]] +; CHECK-NEXT: br i1 [[CMP0]], label [[FOR_BODY_LR_PH:%.*]], label [[RETURN:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[P_ADDR:%.*]] = phi i8** [ [[P:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ARRAYIDX0:%.*]], [[IF_END:%.*]] ] +; CHECK-NEXT: [[I0:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[I_ADD:%.*]], [[IF_END]] ] +; CHECK-NEXT: [[I0_EXT:%.*]] = sext i32 [[I0]] to i64 +; CHECK-NEXT: [[ARRAYIDX0]] = getelementptr inbounds i8*, i8** [[P_ADDR]], i64 [[I0_EXT]] +; CHECK-NEXT: [[L0:%.*]] = load i8*, i8** [[ARRAYIDX0]], align 8 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i8* [[L0]], [[Q:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOPEXIT0:%.*]], label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8*, i8** [[ARRAYIDX0]], i64 1 +; CHECK-NEXT: [[L1:%.*]] = load i8*, i8** [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP4:%.*]] = icmp ugt i8* [[L1]], [[Q]] +; CHECK-NEXT: [[I_ADD]] = add nsw i32 [[I0]], 2 +; CHECK-NEXT: br i1 [[CMP4]], label [[LOOPEXIT1:%.*]], label [[FOR_BODY]] +; CHECK: loopexit0: +; CHECK-NEXT: [[P1:%.*]] = phi i8** [ [[ARRAYIDX0]], [[FOR_BODY]] ] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: loopexit1: +; CHECK-NEXT: [[ARRAYIDX0_LCSSA:%.*]] = phi i8** [ [[ARRAYIDX0]], [[IF_END]] ] +; CHECK-NEXT: [[ARRAYIDX1_LE:%.*]] = getelementptr inbounds i8*, i8** [[ARRAYIDX0_LCSSA]], i64 1 +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i8** [ [[P1]], [[LOOPEXIT0]] ], [ [[ARRAYIDX1_LE]], [[LOOPEXIT1]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i8** [[RETVAL_0]] +; entry: %cmp0 = icmp slt i32 0, %j br i1 %cmp0, label %for.body.lr.ph, label %return @@ -46,12 +74,46 @@ return: ret i8** %retval.0 } -; CHECK-LABEL: @test2 -; CHECK-LABEL: loopexit2: -; CHECK: %[[PHI:.*]] = phi i8** [ %add.ptr, %if.end ] -; CHECK: getelementptr inbounds i8*, i8** %[[PHI]] define i8** @test2(i32 %j, i8** readonly %P, i8* readnone %Q) { - +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[I_ADDR_0:%.*]] = phi i32 [ [[ADD:%.*]], [[IF_END:%.*]] ] +; CHECK-NEXT: [[P_ADDR_0:%.*]] = phi i8** [ [[ADD_PTR:%.*]], [[IF_END]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_ADDR_0]], [[J:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[LOOPEXIT0:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[P_ADDR:%.*]] = phi i8** [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[P_ADDR_0]], [[FOR_COND:%.*]] ] +; CHECK-NEXT: [[I_ADDR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[I_ADDR_0]], [[FOR_COND]] ] +; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[I_ADDR]] to i64 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8*, i8** [[P_ADDR]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[L0:%.*]] = load i8*, i8** [[ADD_PTR]], align 8 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i8* [[L0]], [[Q:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOPEXIT1:%.*]], label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[I_ADDR]], 1 +; CHECK-NEXT: [[IDX2_EXT:%.*]] = sext i32 [[ADD_I]] to i64 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[ADD_PTR]], i64 [[IDX2_EXT]] +; CHECK-NEXT: [[L1:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i8* [[L1]], [[Q]] +; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD_I]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOPEXIT2:%.*]], label [[FOR_COND]] +; CHECK: loopexit0: +; CHECK-NEXT: [[P0:%.*]] = phi i8** [ null, [[FOR_COND]] ] +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: loopexit1: +; CHECK-NEXT: [[P1:%.*]] = phi i8** [ [[ADD_PTR]], [[FOR_BODY]] ] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: loopexit2: +; CHECK-NEXT: [[IDX2_EXT_LCSSA:%.*]] = phi i64 [ [[IDX2_EXT]], [[IF_END]] ] +; CHECK-NEXT: [[ADD_PTR_LCSSA:%.*]] = phi i8** [ [[ADD_PTR]], [[IF_END]] ] +; CHECK-NEXT: [[ARRAYIDX2_LE:%.*]] = getelementptr inbounds i8*, i8** [[ADD_PTR_LCSSA]], i64 [[IDX2_EXT_LCSSA]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i8** [ [[P1]], [[LOOPEXIT1]] ], [ [[ARRAYIDX2_LE]], [[LOOPEXIT2]] ], [ [[P0]], [[LOOPEXIT0]] ] +; CHECK-NEXT: ret i8** [[RETVAL_0]] +; entry: br label %for.body @@ -99,14 +161,43 @@ return: } -; CHECK-LABEL: @test3 -; CHECK-LABEL: loopexit1: -; CHECK: %[[ADD:.*]] = phi i64 [ %add, %if.end ] -; CHECK: %[[ADDR:.*]] = phi i8** [ %P.addr, %if.end ] -; CHECK: %[[TRUNC:.*]] = trunc i64 %[[ADD]] to i32 -; CHECK: getelementptr inbounds i8*, i8** %[[ADDR]], i32 %[[TRUNC]] -; CHECK: call void @dummy(i32 %[[TRUNC]]) define i8** @test3(i64 %j, i8** readonly %P, i8* readnone %Q) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp slt i64 0, [[J:%.*]] +; CHECK-NEXT: br i1 [[CMP0]], label [[FOR_BODY_LR_PH:%.*]], label [[RETURN:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[P_ADDR:%.*]] = phi i8** [ [[P:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ARRAYIDX0:%.*]], [[IF_END:%.*]] ] +; CHECK-NEXT: [[I0:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[I_ADD:%.*]], [[IF_END]] ] +; CHECK-NEXT: [[I0_EXT:%.*]] = sext i32 [[I0]] to i64 +; CHECK-NEXT: [[ARRAYIDX0]] = getelementptr inbounds i8*, i8** [[P_ADDR]], i64 [[I0_EXT]] +; CHECK-NEXT: [[L0:%.*]] = load i8*, i8** [[ARRAYIDX0]], align 8 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i8* [[L0]], [[Q:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOPEXIT0:%.*]], label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[I0_EXT]], 1 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8*, i8** [[P_ADDR]], i32 [[TRUNC]] +; CHECK-NEXT: [[L1:%.*]] = load i8*, i8** [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[CMP4:%.*]] = icmp ugt i8* [[L1]], [[Q]] +; CHECK-NEXT: [[I_ADD]] = add nsw i32 [[I0]], 2 +; CHECK-NEXT: br i1 [[CMP4]], label [[LOOPEXIT1:%.*]], label [[FOR_BODY]] +; CHECK: loopexit0: +; CHECK-NEXT: [[P1:%.*]] = phi i8** [ [[ARRAYIDX0]], [[FOR_BODY]] ] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: loopexit1: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[IF_END]] ] +; CHECK-NEXT: [[P_ADDR_LCSSA:%.*]] = phi i8** [ [[P_ADDR]], [[IF_END]] ] +; CHECK-NEXT: [[TRUNC_LE:%.*]] = trunc i64 [[ADD_LCSSA]] to i32 +; CHECK-NEXT: [[ARRAYIDX1_LE:%.*]] = getelementptr inbounds i8*, i8** [[P_ADDR_LCSSA]], i32 [[TRUNC_LE]] +; CHECK-NEXT: call void @dummy(i32 [[TRUNC_LE]]) +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i8** [ [[P1]], [[LOOPEXIT0]] ], [ [[ARRAYIDX1_LE]], [[LOOPEXIT1]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i8** [[RETVAL_0]] +; entry: %cmp0 = icmp slt i64 0, %j br i1 %cmp0, label %for.body.lr.ph, label %return diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll index b3a0710545d7b..b5fed033e327e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-macosx10.8.0" -; CHECK: cost of 7 for VF 8 For instruction: %conv = fptosi float %tmp to i8 +; CHECK: cost of 4 for VF 8 For instruction: %conv = fptosi float %tmp to i8 define void @float_to_sint8_cost(i8* noalias nocapture %a, float* noalias nocapture readonly %b) nounwind { entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index c173bd9f8fad2..b406b3f91fa2b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -18,59 +18,77 @@ define i32 @matrix_row_col([100 x i32]* nocapture readonly %data, i32 %i, i32 %j ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4, !tbaa !1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa !1 -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP8]], align 4, !tbaa !1 -; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP9]], align 4, !tbaa !1 -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP10]], align 4, !tbaa !1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> undef, i32 [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP13]], i32 2 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP14]], i32 3 -; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <4 x i32> [[TMP18]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP21]] = add <4 x i32> [[TMP20]], [[TMP19]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, !tbaa !1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP4]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP5]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP6]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP7]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa !1 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP12]], align 4, !tbaa !1 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP13]], align 4, !tbaa !1 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP14]], align 4, !tbaa !1 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP15]], align 4, !tbaa !1 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP16]], align 4, !tbaa !1 +; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP17]], align 4, !tbaa !1 +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP18]], align 4, !tbaa !1 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP20]], i32 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP21]], i32 2 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP29]], i32 [[TMP22]], i32 3 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x i32> [[TMP30]], i32 [[TMP23]], i32 4 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP24]], i32 5 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP25]], i32 6 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP33]], i32 [[TMP26]], i32 7 +; CHECK-NEXT: [[TMP35:%.*]] = mul nsw <8 x i32> [[TMP34]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP36:%.*]] = add <8 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP37]] = add <8 x i32> [[TMP36]], [[TMP35]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 +; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 ; CHECK: middle.block: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP21]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP21]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 100 +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP37]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP37]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 96 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ADD7_LCSSA]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD7]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4, !tbaa !1 +; CHECK-NEXT: [[TMP40:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4, !tbaa !1 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !tbaa !1 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], [[TMP24]] +; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !tbaa !1 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP41]], [[TMP40]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUM_015]], 4 ; CHECK-NEXT: [[ADD7]] = add i32 [[ADD]], [[MUL]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll index f8d20c32bed56..3636446e5c93a 100644 --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -33,3 +33,27 @@ for.end: %tmp4 = phi i32 [ %tmp3, %for.body ] ret i32 %tmp4 } + +; Check for crash exposed by D76992. +; CHECK: N0 [label = +; CHECK-NEXT: "loop:\n" + +; CHECK-NEXT: "WIDEN-INDUCTION %iv = phi 0, %iv.next\l" + +; CHECK-NEXT: "WIDEN\l"" %cond0 = icmp %iv, 13\l" + +; CHECK-NEXT: "WIDEN-SELECT%s = select %cond0, 10, 20\l" + +; CHECK-NEXT: "EMIT vp<%1> = icmp ule ir<%iv> vp<%0>\l" +; CHECK-NEXT: ] +define void @test() { +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %cond0 = icmp ult i64 %iv, 13 + %s = select i1 %cond0, i32 10, i32 20 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 14 + br i1 %exitcond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll b/llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll new file mode 100644 index 0000000000000..ea86c410731b8 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -passes=loop-vectorize -S 2>&1 | FileCheck %s + +%type = type { [3 x double] } + +define void @getScalarFunc(double* %A, double* %C, %type* %B) { +; CHECK-LABEL: getScalarFunc +; This check will catch also the massv version of the function. +; CHECK-NOT: call fast <{{[0-9]+}} x double> @{{.*}}atan(<{{[0-9]+}} x double> %{{[0-9]+}}) +entry: + br label %for.body + +for.body: + %i = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %dummyload2 = load double, double* %A, align 8 + %arrayidx.i24 = getelementptr inbounds %type, %type* %B, i64 %i, i32 0, i32 0 + %_15 = load double, double* %arrayidx.i24, align 8 + %call10 = tail call fast double @atan(double %_15) #0 + %inc = add i64 %i, 1 + %cmp = icmp ugt i64 1000, %inc + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +declare double @atan(double) local_unnamed_addr +declare <2 x double> @vector_atan(<2 x double>) #0 +attributes #0 = { nounwind readnone "vector-function-abi-variant"="_ZGV_LLVM_N2v_atan(vector_atan)" } diff --git a/llvm/test/Transforms/MergeFunc/shufflevector.ll b/llvm/test/Transforms/MergeFunc/shufflevector.ll new file mode 100644 index 0000000000000..244fd5c8c9eaa --- /dev/null +++ b/llvm/test/Transforms/MergeFunc/shufflevector.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mergefunc < %s | FileCheck %s + +define internal <2 x i32> @test1(<2 x i32> %v1, <2 x i32> %v2) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[X:%.*]] = shufflevector <2 x i32> [[V1:%.*]], <2 x i32> [[V2:%.*]], <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[X]] +; + %x = shufflevector <2 x i32> %v1, <2 x i32> %v2, <2 x i32> + ret <2 x i32> %x +} + +; Same mask as test1. +define internal <2 x i32> @test2(<2 x i32> %v1, <2 x i32> %v2) { + %x = shufflevector <2 x i32> %v1, <2 x i32> %v2, <2 x i32> + ret <2 x i32> %x +} + +; Different mask than test1, don't merge. +define internal <2 x i32> @test3(<2 x i32> %v1, <2 x i32> %v2) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[X:%.*]] = shufflevector <2 x i32> [[V1:%.*]], <2 x i32> [[V2:%.*]], <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[X]] +; + %x = shufflevector <2 x i32> %v1, <2 x i32> %v2, <2 x i32> + ret <2 x i32> %x +} + +define void @caller(<2 x i32> %v1, <2 x i32> %v2) { +; CHECK-LABEL: @caller( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @test1(<2 x i32> [[V1:%.*]], <2 x i32> [[V2:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @test1(<2 x i32> [[V1]], <2 x i32> [[V2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @test3(<2 x i32> [[V1]], <2 x i32> [[V2]]) +; CHECK-NEXT: ret void +; + call <2 x i32> @test1(<2 x i32> %v1, <2 x i32> %v2) + call <2 x i32> @test2(<2 x i32> %v1, <2 x i32> %v2) + call <2 x i32> @test3(<2 x i32> %v1, <2 x i32> %v2) + ret void +} diff --git a/llvm/test/Transforms/PGOProfile/cspgo_profile_summary.ll b/llvm/test/Transforms/PGOProfile/cspgo_profile_summary.ll index b3098336f5efa..8bd2fb9e348d0 100644 --- a/llvm/test/Transforms/PGOProfile/cspgo_profile_summary.ll +++ b/llvm/test/Transforms/PGOProfile/cspgo_profile_summary.ll @@ -142,7 +142,7 @@ entry: ; CSPGOSUMMARY: {{![0-9]+}} = !{!"MaxFunctionCount", i64 800000} ; CSPGOSUMMARY: {{![0-9]+}} = !{!"NumCounts", i64 14} ; CSPGOSUMMARY: {{![0-9]+}} = !{!"NumFunctions", i64 8} -; CSPGOSUMMARY: {{![0-9]+}} = !{!"DetailedSummary", !10} +; CSPGOSUMMARY: {{![0-9]+}} = !{!"DetailedSummary", !{{[0-9]+}}} ; CSPGOSUMMARY: {{![0-9]+}} = !{i32 1, !"CSProfileSummary", !{{[0-9]+}}} ; CSPGOSUMMARY: {{![0-9]+}} = !{!"ProfileFormat", !"CSInstrProf"} ; CSPGOSUMMARY: {{![0-9]+}} = !{!"TotalCount", i64 1299950} diff --git a/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail.ll b/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail.ll new file mode 100644 index 0000000000000..e79e533c4af20 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail.ll @@ -0,0 +1,68 @@ +; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM +; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = common global i32* ()* null, align 8 + +declare i32* @func1() + +declare i32* @func2() + +declare i32* @func3() + +declare i32* @func4() + +define i32* @bar() { +entry: + %tmp = load i32* ()*, i32* ()** @foo, align 8 +; ICALL-PROM: [[CMP1:%[0-9]+]] = icmp eq i32* ()* %tmp, @func4 +; ICALL-PROM: br i1 [[CMP1]], label %if.true.direct_targ, label %[[L1:[0-9]+]], !prof [[BRANCH_WEIGHT1:![0-9]+]] +; ICALL-PROM: if.true.direct_targ: +; ICALL-PROM: [[DIRCALL_RET1:%[0-9]+]] = musttail call i32* @func4() +; ICALL-PROM: ret i32* [[DIRCALL_RET1]] +; ICALL-PROM: [[L1]]: +; ICALL-PROM: [[CMP2:%[0-9]+]] = icmp eq i32* ()* %tmp, @func2 +; ICALL-PROM: br i1 [[CMP2]], label %if.true.direct_targ1, label %[[L2:[0-9]+]], !prof [[BRANCH_WEIGHT2:![0-9]+]] +; ICALL-PROM: if.true.direct_targ1: +; ICALL-PROM: [[DIRCALL_RET2:%[0-9]+]] = musttail call i32* @func2() +; ICALL-PROM: ret i32* [[DIRCALL_RET2]] +; ICALL-PROM: [[L2]]: +; ICALL-PROM: [[CMP3:%[0-9]+]] = icmp eq i32* ()* %tmp, @func3 +; ICALL-PROM: br i1 [[CMP3]], label %if.true.direct_targ2, label %[[L3:[0-9]+]], !prof [[BRANCH_WEIGHT3:![0-9]+]] +; ICALL-PROM: if.true.direct_targ2: +; ICALL-PROM: [[DIRCALL_RET3:%[0-9]+]] = musttail call i32* @func3() +; ICALL-PROM: ret i32* [[DIRCALL_RET3]] +; ICALL-PROM: [[L3]]: +; ICALL-PROM: %call = musttail call i32* %tmp() +; ICALL-PROM: ret i32* %call + %call = musttail call i32* %tmp(), !prof !1 + ret i32* %call +} + +define i64* @bar2() { +entry: + %tmp = load i32* ()*, i32* ()** @foo, align 8 +; ICALL-PROM: [[CMP1:%[0-9]+]] = icmp eq i32* ()* %tmp, @func4 +; ICALL-PROM: br i1 [[CMP1]], label %if.true.direct_targ, label %[[L4:[0-9]+]], !prof [[BRANCH_WEIGHT4:![0-9]+]] +; ICALL-PROM: if.true.direct_targ: +; ICALL-PROM: [[DIRCALL_RET1:%[0-9]+]] = musttail call i32* @func4() +; ICALL-PROM: [[DIRCALL_RET2:%[0-9]+]] = bitcast i32* [[DIRCALL_RET1]] to i64* +; ICALL-PROM: ret i64* [[DIRCALL_RET2]] +; ICALL-PROM: [[L4]]: +; ICALL-PROM: %call = musttail call i32* %tmp() +; ICALL-PROM: %rv = bitcast i32* %call to i64* +; ICALL-PROM: ret i64* %rv + %call = musttail call i32* %tmp(), !prof !2 + %rv = bitcast i32* %call to i64* + ret i64* %rv +} + +!1 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10} +!2 = !{!"VP", i32 0, i64 100, i64 7651369219802541373, i64 100} + +; ICALL-PROM: [[BRANCH_WEIGHT1]] = !{!"branch_weights", i32 1030, i32 570} +; ICALL-PROM: [[BRANCH_WEIGHT2]] = !{!"branch_weights", i32 410, i32 160} +; ICALL-PROM: [[BRANCH_WEIGHT3]] = !{!"branch_weights", i32 150, i32 10} +; ICALL-PROM: [[BRANCH_WEIGHT4]] = !{!"branch_weights", i32 100, i32 0} diff --git a/llvm/test/Transforms/PGOProfile/unreachable_bb.ll b/llvm/test/Transforms/PGOProfile/unreachable_bb.ll index 1b7fe4b05697e..96aca895ac44c 100644 --- a/llvm/test/Transforms/PGOProfile/unreachable_bb.ll +++ b/llvm/test/Transforms/PGOProfile/unreachable_bb.ll @@ -16,7 +16,7 @@ return: declare void @bar() ;USE: !0 = !{i32 1, !"ProfileSummary", !1} -;USE: !1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +;USE: !1 = !{!2, !3, !4, !5, !6, !7, !8, !9, !10} ;USE: !2 = !{!"ProfileFormat", !"InstrProf"} ;USE: !3 = !{!"TotalCount", i64 0} diff --git a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll new file mode 100644 index 0000000000000..5a9d4442d9e33 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -O2 -preserve-alignment-assumptions-during-inlining=0 < %s | FileCheck %s --check-prefixes=CHECK,ASSUMPTIONS-OFF,FALLBACK-0 +; RUN: opt -S -O2 -preserve-alignment-assumptions-during-inlining=1 < %s | FileCheck %s --check-prefixes=CHECK,ASSUMPTIONS-ON,FALLBACK-1 +; RUN: opt -S -O2 < %s | FileCheck %s --check-prefixes=CHECK,ASSUMPTIONS-OFF,FALLBACK-DEFAULT + +target datalayout = "e-p:64:64-p5:32:32-A5" + +; This illustrates an optimization difference caused by instruction counting +; heuristics, which are affected by the additional instructions of the +; alignment assumption. + +define internal i1 @callee1(i1 %c, i64* align 8 %ptr) { + store volatile i64 0, i64* %ptr + ret i1 %c +} + +define void @caller1(i1 %c, i64* align 1 %ptr) { +; ASSUMPTIONS-OFF-LABEL: @caller1( +; ASSUMPTIONS-OFF-NEXT: br i1 [[C:%.*]], label [[TRUE2:%.*]], label [[FALSE2:%.*]] +; ASSUMPTIONS-OFF: true2: +; ASSUMPTIONS-OFF-NEXT: store volatile i64 0, i64* [[PTR:%.*]], align 8 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 2, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: ret void +; ASSUMPTIONS-OFF: false2: +; ASSUMPTIONS-OFF-NEXT: store volatile i64 1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 0, i64* [[PTR]], align 8 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: store volatile i64 3, i64* [[PTR]], align 4 +; ASSUMPTIONS-OFF-NEXT: ret void +; +; ASSUMPTIONS-ON-LABEL: @caller1( +; ASSUMPTIONS-ON-NEXT: br i1 [[C:%.*]], label [[TRUE1:%.*]], label [[FALSE1:%.*]] +; ASSUMPTIONS-ON: true1: +; ASSUMPTIONS-ON-NEXT: [[C_PR:%.*]] = phi i1 [ false, [[FALSE1]] ], [ true, [[TMP0:%.*]] ] +; ASSUMPTIONS-ON-NEXT: [[PTRINT:%.*]] = ptrtoint i64* [[PTR:%.*]] to i64 +; ASSUMPTIONS-ON-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7 +; ASSUMPTIONS-ON-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; ASSUMPTIONS-ON-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; ASSUMPTIONS-ON-NEXT: store volatile i64 0, i64* [[PTR]], align 8 +; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 +; ASSUMPTIONS-ON-NEXT: br i1 [[C_PR]], label [[TRUE2:%.*]], label [[FALSE2:%.*]] +; ASSUMPTIONS-ON: false1: +; ASSUMPTIONS-ON-NEXT: store volatile i64 1, i64* [[PTR]], align 4 +; ASSUMPTIONS-ON-NEXT: br label [[TRUE1]] +; ASSUMPTIONS-ON: true2: +; ASSUMPTIONS-ON-NEXT: store volatile i64 2, i64* [[PTR]], align 8 +; ASSUMPTIONS-ON-NEXT: ret void +; ASSUMPTIONS-ON: false2: +; ASSUMPTIONS-ON-NEXT: store volatile i64 3, i64* [[PTR]], align 8 +; ASSUMPTIONS-ON-NEXT: ret void +; + br i1 %c, label %true1, label %false1 + +true1: + %c2 = call i1 @callee1(i1 %c, i64* %ptr) + store volatile i64 -1, i64* %ptr + store volatile i64 -1, i64* %ptr + store volatile i64 -1, i64* %ptr + store volatile i64 -1, i64* %ptr + store volatile i64 -1, i64* %ptr + br i1 %c2, label %true2, label %false2 + +false1: + store volatile i64 1, i64* %ptr + br label %true1 + +true2: + store volatile i64 2, i64* %ptr + ret void + +false2: + store volatile i64 3, i64* %ptr + ret void +} + +; This test illustrates that alignment assumptions may prevent SROA. +; See PR45763. + +define internal void @callee2(i64* noalias sret align 8 %arg) { + store i64 0, i64* %arg, align 8 + ret void +} + +define amdgpu_kernel void @caller2() { +; ASSUMPTIONS-OFF-LABEL: @caller2( +; ASSUMPTIONS-OFF-NEXT: ret void +; +; ASSUMPTIONS-ON-LABEL: @caller2( +; ASSUMPTIONS-ON-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8, addrspace(5) +; ASSUMPTIONS-ON-NEXT: [[CAST:%.*]] = addrspacecast i64 addrspace(5)* [[ALLOCA]] to i64* +; ASSUMPTIONS-ON-NEXT: [[PTRINT:%.*]] = ptrtoint i64* [[CAST]] to i64 +; ASSUMPTIONS-ON-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7 +; ASSUMPTIONS-ON-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 [[MASKCOND]]) +; ASSUMPTIONS-ON-NEXT: ret void +; + %alloca = alloca i64, align 8, addrspace(5) + %cast = addrspacecast i64 addrspace(5)* %alloca to i64* + call void @callee2(i64* sret align 8 %cast) + ret void +} diff --git a/llvm/test/Transforms/SCCP/preserve-analysis.ll b/llvm/test/Transforms/SCCP/preserve-analysis.ll index 8d34e7195b95d..ab68be771e2a2 100644 --- a/llvm/test/Transforms/SCCP/preserve-analysis.ll +++ b/llvm/test/Transforms/SCCP/preserve-analysis.ll @@ -8,6 +8,7 @@ ; CHECK: Dominator Tree Construction ; CHECK: Natural Loop Information ; CHECK: Sparse Conditional Constant Propagation +; CHECK: Post-Dominator Tree Construction ; CHECK-NOT: Dominator Tree Construction ; CHECK-NOT: Natural Loop Information ; CHECK-NOT: Globals Alias Analysis diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll index c44a8524edfe5..319d4775c5ebe 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll @@ -356,3 +356,166 @@ define i64 @load64le_nop_shift(i8* %arg) { %o7 = or i64 %o6, %s7 ret i64 %o7 } + +define void @PR39538(i8* %t0, i32* %t1) { +; CHECK-LABEL: @PR39538( +; CHECK-NEXT: [[T6:%.*]] = getelementptr inbounds i8, i8* [[T0:%.*]], i64 1 +; CHECK-NEXT: [[T11:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 2 +; CHECK-NEXT: [[T16:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 3 +; CHECK-NEXT: [[T20:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 4 +; CHECK-NEXT: [[T24:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 5 +; CHECK-NEXT: [[T29:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 6 +; CHECK-NEXT: [[T34:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 7 +; CHECK-NEXT: [[T39:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 8 +; CHECK-NEXT: [[T43:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 9 +; CHECK-NEXT: [[T48:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 10 +; CHECK-NEXT: [[T53:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 11 +; CHECK-NEXT: [[T58:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 12 +; CHECK-NEXT: [[T62:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 13 +; CHECK-NEXT: [[T67:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 14 +; CHECK-NEXT: [[T72:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 15 +; CHECK-NEXT: [[T38:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 1 +; CHECK-NEXT: [[T57:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 2 +; CHECK-NEXT: [[T76:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 3 +; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[T0]], align 1 +; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[T6]], align 1 +; CHECK-NEXT: [[T12:%.*]] = load i8, i8* [[T11]], align 1 +; CHECK-NEXT: [[T17:%.*]] = load i8, i8* [[T16]], align 1 +; CHECK-NEXT: [[T21:%.*]] = load i8, i8* [[T20]], align 1 +; CHECK-NEXT: [[T25:%.*]] = load i8, i8* [[T24]], align 1 +; CHECK-NEXT: [[T30:%.*]] = load i8, i8* [[T29]], align 1 +; CHECK-NEXT: [[T35:%.*]] = load i8, i8* [[T34]], align 1 +; CHECK-NEXT: [[T40:%.*]] = load i8, i8* [[T39]], align 1 +; CHECK-NEXT: [[T44:%.*]] = load i8, i8* [[T43]], align 1 +; CHECK-NEXT: [[T49:%.*]] = load i8, i8* [[T48]], align 1 +; CHECK-NEXT: [[T54:%.*]] = load i8, i8* [[T53]], align 1 +; CHECK-NEXT: [[T59:%.*]] = load i8, i8* [[T58]], align 1 +; CHECK-NEXT: [[T63:%.*]] = load i8, i8* [[T62]], align 1 +; CHECK-NEXT: [[T68:%.*]] = load i8, i8* [[T67]], align 1 +; CHECK-NEXT: [[T73:%.*]] = load i8, i8* [[T72]], align 1 +; CHECK-NEXT: [[T4:%.*]] = zext i8 [[T3]] to i32 +; CHECK-NEXT: [[T8:%.*]] = zext i8 [[T7]] to i32 +; CHECK-NEXT: [[T13:%.*]] = zext i8 [[T12]] to i32 +; CHECK-NEXT: [[T18:%.*]] = zext i8 [[T17]] to i32 +; CHECK-NEXT: [[T22:%.*]] = zext i8 [[T21]] to i32 +; CHECK-NEXT: [[T26:%.*]] = zext i8 [[T25]] to i32 +; CHECK-NEXT: [[T31:%.*]] = zext i8 [[T30]] to i32 +; CHECK-NEXT: [[T36:%.*]] = zext i8 [[T35]] to i32 +; CHECK-NEXT: [[T41:%.*]] = zext i8 [[T40]] to i32 +; CHECK-NEXT: [[T45:%.*]] = zext i8 [[T44]] to i32 +; CHECK-NEXT: [[T50:%.*]] = zext i8 [[T49]] to i32 +; CHECK-NEXT: [[T55:%.*]] = zext i8 [[T54]] to i32 +; CHECK-NEXT: [[T60:%.*]] = zext i8 [[T59]] to i32 +; CHECK-NEXT: [[T64:%.*]] = zext i8 [[T63]] to i32 +; CHECK-NEXT: [[T69:%.*]] = zext i8 [[T68]] to i32 +; CHECK-NEXT: [[T74:%.*]] = zext i8 [[T73]] to i32 +; CHECK-NEXT: [[T5:%.*]] = shl nuw i32 [[T4]], 24 +; CHECK-NEXT: [[T23:%.*]] = shl nuw i32 [[T22]], 24 +; CHECK-NEXT: [[T42:%.*]] = shl nuw i32 [[T41]], 24 +; CHECK-NEXT: [[T61:%.*]] = shl nuw i32 [[T60]], 24 +; CHECK-NEXT: [[T9:%.*]] = shl nuw nsw i32 [[T8]], 16 +; CHECK-NEXT: [[T27:%.*]] = shl nuw nsw i32 [[T26]], 16 +; CHECK-NEXT: [[T46:%.*]] = shl nuw nsw i32 [[T45]], 16 +; CHECK-NEXT: [[T65:%.*]] = shl nuw nsw i32 [[T64]], 16 +; CHECK-NEXT: [[T14:%.*]] = shl nuw nsw i32 [[T13]], 8 +; CHECK-NEXT: [[T32:%.*]] = shl nuw nsw i32 [[T31]], 8 +; CHECK-NEXT: [[T51:%.*]] = shl nuw nsw i32 [[T50]], 8 +; CHECK-NEXT: [[T70:%.*]] = shl nuw nsw i32 [[T69]], 8 +; CHECK-NEXT: [[T10:%.*]] = or i32 [[T9]], [[T5]] +; CHECK-NEXT: [[T15:%.*]] = or i32 [[T10]], [[T14]] +; CHECK-NEXT: [[T19:%.*]] = or i32 [[T15]], [[T18]] +; CHECK-NEXT: [[T28:%.*]] = or i32 [[T27]], [[T23]] +; CHECK-NEXT: [[T33:%.*]] = or i32 [[T28]], [[T32]] +; CHECK-NEXT: [[T37:%.*]] = or i32 [[T33]], [[T36]] +; CHECK-NEXT: [[T47:%.*]] = or i32 [[T46]], [[T42]] +; CHECK-NEXT: [[T52:%.*]] = or i32 [[T47]], [[T51]] +; CHECK-NEXT: [[T56:%.*]] = or i32 [[T52]], [[T55]] +; CHECK-NEXT: [[T66:%.*]] = or i32 [[T65]], [[T61]] +; CHECK-NEXT: [[T71:%.*]] = or i32 [[T66]], [[T70]] +; CHECK-NEXT: [[T75:%.*]] = or i32 [[T71]], [[T74]] +; CHECK-NEXT: store i32 [[T19]], i32* [[T1]], align 4 +; CHECK-NEXT: store i32 [[T37]], i32* [[T38]], align 4 +; CHECK-NEXT: store i32 [[T56]], i32* [[T57]], align 4 +; CHECK-NEXT: store i32 [[T75]], i32* [[T76]], align 4 +; CHECK-NEXT: ret void +; + %t6 = getelementptr inbounds i8, i8* %t0, i64 1 + %t11 = getelementptr inbounds i8, i8* %t0, i64 2 + %t16 = getelementptr inbounds i8, i8* %t0, i64 3 + %t20 = getelementptr inbounds i8, i8* %t0, i64 4 + %t24 = getelementptr inbounds i8, i8* %t0, i64 5 + %t29 = getelementptr inbounds i8, i8* %t0, i64 6 + %t34 = getelementptr inbounds i8, i8* %t0, i64 7 + %t39 = getelementptr inbounds i8, i8* %t0, i64 8 + %t43 = getelementptr inbounds i8, i8* %t0, i64 9 + %t48 = getelementptr inbounds i8, i8* %t0, i64 10 + %t53 = getelementptr inbounds i8, i8* %t0, i64 11 + %t58 = getelementptr inbounds i8, i8* %t0, i64 12 + %t62 = getelementptr inbounds i8, i8* %t0, i64 13 + %t67 = getelementptr inbounds i8, i8* %t0, i64 14 + %t72 = getelementptr inbounds i8, i8* %t0, i64 15 + %t38 = getelementptr inbounds i32, i32* %t1, i64 1 + %t57 = getelementptr inbounds i32, i32* %t1, i64 2 + %t76 = getelementptr inbounds i32, i32* %t1, i64 3 + %t3 = load i8, i8* %t0, align 1 + %t7 = load i8, i8* %t6, align 1 + %t12 = load i8, i8* %t11, align 1 + %t17 = load i8, i8* %t16, align 1 + %t21 = load i8, i8* %t20, align 1 + %t25 = load i8, i8* %t24, align 1 + %t30 = load i8, i8* %t29, align 1 + %t35 = load i8, i8* %t34, align 1 + %t40 = load i8, i8* %t39, align 1 + %t44 = load i8, i8* %t43, align 1 + %t49 = load i8, i8* %t48, align 1 + %t54 = load i8, i8* %t53, align 1 + %t59 = load i8, i8* %t58, align 1 + %t63 = load i8, i8* %t62, align 1 + %t68 = load i8, i8* %t67, align 1 + %t73 = load i8, i8* %t72, align 1 + %t4 = zext i8 %t3 to i32 + %t8 = zext i8 %t7 to i32 + %t13 = zext i8 %t12 to i32 + %t18 = zext i8 %t17 to i32 + %t22 = zext i8 %t21 to i32 + %t26 = zext i8 %t25 to i32 + %t31 = zext i8 %t30 to i32 + %t36 = zext i8 %t35 to i32 + %t41 = zext i8 %t40 to i32 + %t45 = zext i8 %t44 to i32 + %t50 = zext i8 %t49 to i32 + %t55 = zext i8 %t54 to i32 + %t60 = zext i8 %t59 to i32 + %t64 = zext i8 %t63 to i32 + %t69 = zext i8 %t68 to i32 + %t74 = zext i8 %t73 to i32 + %t5 = shl nuw i32 %t4, 24 + %t23 = shl nuw i32 %t22, 24 + %t42 = shl nuw i32 %t41, 24 + %t61 = shl nuw i32 %t60, 24 + %t9 = shl nuw nsw i32 %t8, 16 + %t27 = shl nuw nsw i32 %t26, 16 + %t46 = shl nuw nsw i32 %t45, 16 + %t65 = shl nuw nsw i32 %t64, 16 + %t14 = shl nuw nsw i32 %t13, 8 + %t32 = shl nuw nsw i32 %t31, 8 + %t51 = shl nuw nsw i32 %t50, 8 + %t70 = shl nuw nsw i32 %t69, 8 + %t10 = or i32 %t9, %t5 + %t15 = or i32 %t10, %t14 + %t19 = or i32 %t15, %t18 + %t28 = or i32 %t27, %t23 + %t33 = or i32 %t28, %t32 + %t37 = or i32 %t33, %t36 + %t47 = or i32 %t46, %t42 + %t52 = or i32 %t47, %t51 + %t56 = or i32 %t52, %t55 + %t66 = or i32 %t65, %t61 + %t71 = or i32 %t66, %t70 + %t75 = or i32 %t71, %t74 + store i32 %t19, i32* %t1, align 4 + store i32 %t37, i32* %t38, align 4 + store i32 %t56, i32* %t57, align 4 + store i32 %t75, i32* %t76, align 4 + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll index dafab8ee72c2c..216cd7a686934 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll @@ -211,38 +211,11 @@ define void @fptoui_8f64_8i32() #0 { } define void @fptoui_8f64_8i16() #0 { -; SSE-LABEL: @fptoui_8f64_8i16( -; SSE-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8 -; SSE-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8 -; SSE-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8 -; SSE-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8 -; SSE-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8 -; SSE-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8 -; SSE-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8 -; SSE-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8 -; SSE-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i16 -; SSE-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i16 -; SSE-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i16 -; SSE-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i16 -; SSE-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i16 -; SSE-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i16 -; SSE-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i16 -; SSE-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i16 -; SSE-NEXT: store i16 [[CVT0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2 -; SSE-NEXT: store i16 [[CVT1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2 -; SSE-NEXT: store i16 [[CVT2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2 -; SSE-NEXT: store i16 [[CVT3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2 -; SSE-NEXT: store i16 [[CVT4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2 -; SSE-NEXT: store i16 [[CVT5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2 -; SSE-NEXT: store i16 [[CVT6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2 -; SSE-NEXT: store i16 [[CVT7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2 -; SSE-NEXT: ret void -; -; AVX-LABEL: @fptoui_8f64_8i16( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8 -; AVX-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16> -; AVX-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2 -; AVX-NEXT: ret void +; CHECK-LABEL: @fptoui_8f64_8i16( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16> +; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2 +; CHECK-NEXT: ret void ; %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8 %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8 @@ -522,38 +495,11 @@ define void @fptoui_8f32_8i32() #0 { } define void @fptoui_8f32_8i16() #0 { -; SSE-LABEL: @fptoui_8f32_8i16( -; SSE-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4 -; SSE-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4 -; SSE-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4 -; SSE-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4 -; SSE-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4 -; SSE-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4 -; SSE-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4 -; SSE-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4 -; SSE-NEXT: [[CVT0:%.*]] = fptoui float [[A0]] to i16 -; SSE-NEXT: [[CVT1:%.*]] = fptoui float [[A1]] to i16 -; SSE-NEXT: [[CVT2:%.*]] = fptoui float [[A2]] to i16 -; SSE-NEXT: [[CVT3:%.*]] = fptoui float [[A3]] to i16 -; SSE-NEXT: [[CVT4:%.*]] = fptoui float [[A4]] to i16 -; SSE-NEXT: [[CVT5:%.*]] = fptoui float [[A5]] to i16 -; SSE-NEXT: [[CVT6:%.*]] = fptoui float [[A6]] to i16 -; SSE-NEXT: [[CVT7:%.*]] = fptoui float [[A7]] to i16 -; SSE-NEXT: store i16 [[CVT0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2 -; SSE-NEXT: store i16 [[CVT1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2 -; SSE-NEXT: store i16 [[CVT2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2 -; SSE-NEXT: store i16 [[CVT3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2 -; SSE-NEXT: store i16 [[CVT4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2 -; SSE-NEXT: store i16 [[CVT5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2 -; SSE-NEXT: store i16 [[CVT6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2 -; SSE-NEXT: store i16 [[CVT7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2 -; SSE-NEXT: ret void -; -; AVX-LABEL: @fptoui_8f32_8i16( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4 -; AVX-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i16> -; AVX-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2 -; AVX-NEXT: ret void +; CHECK-LABEL: @fptoui_8f32_8i16( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i16> +; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2 +; CHECK-NEXT: ret void ; %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4 %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll index 1a8d2bcec55ff..2f630ffe9f46e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll @@ -13,34 +13,21 @@ target triple = "x86_64-unknown-linux-gnu" ; zero-extend the roots back to their original sizes. ; define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8* %ptr) { -; SSE-LABEL: @PR31243_zext( -; SSE-NEXT: entry: -; SSE-NEXT: [[TMP0:%.*]] = or i8 [[V0:%.*]], 1 -; SSE-NEXT: [[TMP1:%.*]] = or i8 [[V1:%.*]], 1 -; SSE-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64 -; SSE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP2]] -; SSE-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i64 -; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[TMP3]] -; SSE-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP4]], align 1 -; SSE-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP5]], align 1 -; SSE-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]] -; SSE-NEXT: ret i8 [[TMP8]] -; -; AVX-LABEL: @PR31243_zext( -; AVX-NEXT: entry: -; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> undef, i8 [[V0:%.*]], i32 0 -; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i32 1 -; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], -; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64 -; AVX-NEXT: [[TMPE4:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP4]] -; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64 -; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[TMP6]] -; AVX-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMPE4]], align 1 -; AVX-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP5]], align 1 -; AVX-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]] -; AVX-NEXT: ret i8 [[TMP8]] +; CHECK-LABEL: @PR31243_zext( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> undef, i8 [[V0:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64 +; CHECK-NEXT: [[TMPE4:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMPE4]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP5]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]] +; CHECK-NEXT: ret i8 [[TMP8]] ; entry: %tmp0 = zext i8 %v0 to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 50b4e8a1c4047..28bc95e2f4ca6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -12,84 +12,70 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SUB_I]] to i8 -; CHECK-NEXT: [[CONV_I_I1199:%.*]] = and i8 [[TMP1]], 1 -; CHECK-NEXT: store i8 [[CONV_I_I1199]], i8* [[TMP0]], align 1 -; CHECK-NEXT: [[SHR_I_I:%.*]] = lshr i32 [[CONV31_I]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[SHR_I_I]] to i8 -; CHECK-NEXT: [[CONV_1_I_I:%.*]] = and i8 [[TMP2]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_1_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 1 -; CHECK-NEXT: store i8 [[CONV_1_I_I]], i8* [[ARRAYIDX_I_I7_1_I_I]], align 1 -; CHECK-NEXT: [[SHR_1_I_I:%.*]] = lshr i32 [[CONV31_I]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[SHR_1_I_I]] to i8 -; CHECK-NEXT: [[CONV_2_I_I:%.*]] = and i8 [[TMP3]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_2_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 2 -; CHECK-NEXT: store i8 [[CONV_2_I_I]], i8* [[ARRAYIDX_I_I7_2_I_I]], align 1 -; CHECK-NEXT: [[SHR_2_I_I:%.*]] = lshr i32 [[CONV31_I]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[SHR_2_I_I]] to i8 -; CHECK-NEXT: [[CONV_3_I_I:%.*]] = and i8 [[TMP4]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_3_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 3 -; CHECK-NEXT: store i8 [[CONV_3_I_I]], i8* [[ARRAYIDX_I_I7_3_I_I]], align 1 -; CHECK-NEXT: [[SHR_3_I_I:%.*]] = lshr i32 [[CONV31_I]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[SHR_3_I_I]] to i8 -; CHECK-NEXT: [[CONV_4_I_I:%.*]] = and i8 [[TMP5]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_4_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 4 -; CHECK-NEXT: store i8 [[CONV_4_I_I]], i8* [[ARRAYIDX_I_I7_4_I_I]], align 1 -; CHECK-NEXT: [[SHR_4_I_I:%.*]] = lshr i32 [[CONV31_I]], 5 -; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[SHR_4_I_I]] to i8 -; CHECK-NEXT: [[CONV_5_I_I:%.*]] = and i8 [[TMP6]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_5_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 5 -; CHECK-NEXT: store i8 [[CONV_5_I_I]], i8* [[ARRAYIDX_I_I7_5_I_I]], align 1 -; CHECK-NEXT: [[SHR_5_I_I:%.*]] = lshr i32 [[CONV31_I]], 6 -; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[SHR_5_I_I]] to i8 -; CHECK-NEXT: [[CONV_6_I_I:%.*]] = and i8 [[TMP7]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_6_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 6 -; CHECK-NEXT: store i8 [[CONV_6_I_I]], i8* [[ARRAYIDX_I_I7_6_I_I]], align 1 -; CHECK-NEXT: [[SHR_6_I_I:%.*]] = lshr i32 [[CONV31_I]], 7 -; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[SHR_6_I_I]] to i8 -; CHECK-NEXT: [[CONV_7_I_I:%.*]] = and i8 [[TMP8]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_7_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 7 -; CHECK-NEXT: store i8 [[CONV_7_I_I]], i8* [[ARRAYIDX_I_I7_7_I_I]], align 1 -; CHECK-NEXT: [[SHR_7_I_I:%.*]] = lshr i32 [[CONV31_I]], 8 -; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[SHR_7_I_I]] to i8 -; CHECK-NEXT: [[CONV_8_I_I:%.*]] = and i8 [[TMP9]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[CONV31_I]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[CONV31_I]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[CONV31_I]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[CONV31_I]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[CONV31_I]], i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[CONV31_I]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[CONV31_I]], i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = lshr <8 x i32> [[TMP8]], ; CHECK-NEXT: [[ARRAYIDX_I_I7_8_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 8 -; CHECK-NEXT: store i8 [[CONV_8_I_I]], i8* [[ARRAYIDX_I_I7_8_I_I]], align 1 -; CHECK-NEXT: [[SHR_8_I_I:%.*]] = lshr i32 [[CONV31_I]], 9 -; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[SHR_8_I_I]] to i8 -; CHECK-NEXT: [[CONV_9_I_I:%.*]] = and i8 [[TMP10]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_9_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 9 -; CHECK-NEXT: store i8 [[CONV_9_I_I]], i8* [[ARRAYIDX_I_I7_9_I_I]], align 1 -; CHECK-NEXT: [[SHR_9_I_I:%.*]] = lshr i32 [[CONV31_I]], 10 -; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[SHR_9_I_I]] to i8 -; CHECK-NEXT: [[CONV_10_I_I:%.*]] = and i8 [[TMP11]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_10_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 10 -; CHECK-NEXT: store i8 [[CONV_10_I_I]], i8* [[ARRAYIDX_I_I7_10_I_I]], align 1 -; CHECK-NEXT: [[SHR_10_I_I:%.*]] = lshr i32 [[CONV31_I]], 11 -; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[SHR_10_I_I]] to i8 -; CHECK-NEXT: [[CONV_11_I_I:%.*]] = and i8 [[TMP12]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_11_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 11 -; CHECK-NEXT: store i8 [[CONV_11_I_I]], i8* [[ARRAYIDX_I_I7_11_I_I]], align 1 -; CHECK-NEXT: [[SHR_11_I_I:%.*]] = lshr i32 [[CONV31_I]], 12 -; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[SHR_11_I_I]] to i8 -; CHECK-NEXT: [[CONV_12_I_I:%.*]] = and i8 [[TMP13]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> undef, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[CONV31_I]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[CONV31_I]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[CONV31_I]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[TMP13]], ; CHECK-NEXT: [[ARRAYIDX_I_I7_12_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 12 -; CHECK-NEXT: store i8 [[CONV_12_I_I]], i8* [[ARRAYIDX_I_I7_12_I_I]], align 1 ; CHECK-NEXT: [[SHR_12_I_I:%.*]] = lshr i32 [[CONV31_I]], 13 -; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[SHR_12_I_I]] to i8 -; CHECK-NEXT: [[CONV_13_I_I:%.*]] = and i8 [[TMP14]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_13_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 13 -; CHECK-NEXT: store i8 [[CONV_13_I_I]], i8* [[ARRAYIDX_I_I7_13_I_I]], align 1 ; CHECK-NEXT: [[SHR_13_I_I:%.*]] = lshr i32 [[CONV31_I]], 14 -; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[SHR_13_I_I]] to i8 -; CHECK-NEXT: [[CONV_14_I_I:%.*]] = and i8 [[TMP15]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I7_14_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 14 -; CHECK-NEXT: store i8 [[CONV_14_I_I]], i8* [[ARRAYIDX_I_I7_14_I_I]], align 1 ; CHECK-NEXT: [[SHR_14_I_I:%.*]] = lshr i32 [[CONV31_I]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[SHR_14_I_I]] to i8 -; CHECK-NEXT: [[CONV_15_I_I:%.*]] = and i8 [[TMP16]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> undef, i32 [[SUB_I]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP16]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[TMP18]], i32 2 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP9]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x i32> [[TMP19]], i32 [[TMP20]], i32 3 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP9]], i32 3 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[TMP22]], i32 4 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP9]], i32 4 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[TMP24]], i32 5 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP9]], i32 5 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <16 x i32> [[TMP25]], i32 [[TMP26]], i32 6 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP9]], i32 6 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <16 x i32> [[TMP27]], i32 [[TMP28]], i32 7 +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i32> [[TMP9]], i32 7 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i32> [[TMP29]], i32 [[TMP30]], i32 8 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <16 x i32> [[TMP31]], i32 [[TMP32]], i32 9 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i32> [[TMP14]], i32 1 +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i32> [[TMP33]], i32 [[TMP34]], i32 10 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i32> [[TMP14]], i32 2 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x i32> [[TMP35]], i32 [[TMP36]], i32 11 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i32> [[TMP14]], i32 3 +; CHECK-NEXT: [[TMP39:%.*]] = insertelement <16 x i32> [[TMP37]], i32 [[TMP38]], i32 12 +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x i32> [[TMP39]], i32 [[SHR_12_I_I]], i32 13 +; CHECK-NEXT: [[TMP41:%.*]] = insertelement <16 x i32> [[TMP40]], i32 [[SHR_13_I_I]], i32 14 +; CHECK-NEXT: [[TMP42:%.*]] = insertelement <16 x i32> [[TMP41]], i32 [[SHR_14_I_I]], i32 15 +; CHECK-NEXT: [[TMP43:%.*]] = trunc <16 x i32> [[TMP42]] to <16 x i8> +; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i8> [[TMP43]], ; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15 -; CHECK-NEXT: store i8 [[CONV_15_I_I]], i8* [[ARRAYIDX_I_I7_15_I_I]], align 1 +; CHECK-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* +; CHECK-NEXT: store <16 x i8> [[TMP44]], <16 x i8>* [[TMP45]], align 1 ; CHECK-NEXT: unreachable ; CHECK: if.end50.i: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll index 702c979ca766c..779d5260506af 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -5,31 +5,30 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4 ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[A1:%.*]] -; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP0]], [[A2:%.*]] -; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[TMP0]], [[A3:%.*]] -; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP0]], [[A4:%.*]] -; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[TMP0]], [[A5:%.*]] -; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[TMP0]], [[A6:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARR]], align 4 -; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[TMP1]], [[A7:%.*]] -; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[TMP1]], [[A8:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[ADD2]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[ADD2]] -; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], [[ADD4]] -; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 [[ADD4]] -; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], [[ADD6]] -; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 [[ADD6]] -; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], [[ADD8]] -; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 [[ADD8]] -; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], [[ADD10]] -; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 [[ADD10]] -; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], [[ADD12]] -; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 [[ADD12]] -; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], [[ADD14]] -; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 [[ADD14]] -; CHECK-NEXT: ret i32 [[COND44]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <2 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[REORDER_SHUFFLE]], <2 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: ret i32 [[TMP11]] ; entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 1 @@ -64,35 +63,32 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-LABEL: @foo1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[A1:%.*]] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 2 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP1]], [[A2:%.*]] ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4 -; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[TMP2]], [[A3:%.*]] -; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP0]], [[A4:%.*]] -; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[TMP0]], [[A5:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARR]], align 4 -; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[TMP3]], [[A6:%.*]] -; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[TMP1]], [[A7:%.*]] -; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[TMP0]], [[A8:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[ADD2]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[ADD2]] -; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], [[ADD4]] -; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 [[ADD4]] -; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], [[ADD6]] -; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 [[ADD6]] -; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], [[ADD8]] -; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 [[ADD8]] -; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], [[ADD10]] -; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 [[ADD10]] -; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], [[ADD12]] -; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 [[ADD12]] -; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], [[ADD14]] -; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 [[ADD14]] -; CHECK-NEXT: ret i32 [[COND44]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[REORDER_SHUFFLE]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: ret i32 [[TMP11]] ; entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 1 @@ -131,35 +127,32 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a ; CHECK-LABEL: @foo2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[A1:%.*]] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 2 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP1]], [[A2:%.*]] -; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[TMP0]], [[A3:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARR]], align 4 -; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP2]], [[A4:%.*]] ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i64 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 -; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[TMP3]], [[A5:%.*]] -; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[TMP2]], [[A6:%.*]] -; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[TMP1]], [[A7:%.*]] -; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[TMP3]], [[A8:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[ADD2]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[ADD2]] -; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], [[ADD4]] -; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 [[ADD4]] -; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], [[ADD6]] -; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 [[ADD6]] -; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], [[ADD8]] -; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 [[ADD8]] -; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], [[ADD10]] -; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 [[ADD10]] -; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], [[ADD12]] -; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 [[ADD12]] -; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], [[ADD14]] -; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 [[ADD14]] -; CHECK-NEXT: ret i32 [[COND44]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[REORDER_SHUFFLE]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[A1:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A2:%.*]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i32> [[TMP10]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP10]], <8 x i32> [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i32> [[RDX_MINMAX_SELECT]], <8 x i32> [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; CHECK-NEXT: ret i32 [[TMP11]] ; entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3 diff --git a/llvm/test/Transforms/SpeculateAroundPHIs/basic-x86.ll b/llvm/test/Transforms/SpeculateAroundPHIs/basic-x86.ll index 1edba0e5e98c9..46757694fbde6 100644 --- a/llvm/test/Transforms/SpeculateAroundPHIs/basic-x86.ll +++ b/llvm/test/Transforms/SpeculateAroundPHIs/basic-x86.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Test the basic functionality of speculating around PHI nodes based on reduced ; cost of the constant operands to the PHI nodes using the x86 cost model. ; @@ -7,185 +8,197 @@ target triple = "x86_64-unknown-unknown" define i32 @test_basic(i1 %flag, i32 %arg) { -; CHECK-LABEL: define i32 @test_basic( +; CHECK-LABEL: @test_basic( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[ARG:%.*]], 7 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: b: +; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[ARG]], 11 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] +; CHECK-NEXT: ret i32 [[SUM_PHI]] +; entry: br i1 %flag, label %a, label %b -; CHECK: br i1 %flag, label %a, label %b a: br label %exit -; CHECK: a: -; CHECK-NEXT: %[[SUM_A:.*]] = add i32 %arg, 7 -; CHECK-NEXT: br label %exit b: br label %exit -; CHECK: b: -; CHECK-NEXT: %[[SUM_B:.*]] = add i32 %arg, 11 -; CHECK-NEXT: br label %exit exit: %p = phi i32 [ 7, %a ], [ 11, %b ] %sum = add i32 %arg, %p ret i32 %sum -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %[[SUM_A]], %a ], [ %[[SUM_B]], %b ] -; CHECK-NEXT: ret i32 %[[PHI]] } ; Check that we handle commuted operands and get the constant onto the RHS. define i32 @test_commuted(i1 %flag, i32 %arg) { -; CHECK-LABEL: define i32 @test_commuted( +; CHECK-LABEL: @test_commuted( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[ARG:%.*]], 7 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: b: +; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[ARG]], 11 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] +; CHECK-NEXT: ret i32 [[SUM_PHI]] +; entry: br i1 %flag, label %a, label %b -; CHECK: br i1 %flag, label %a, label %b a: br label %exit -; CHECK: a: -; CHECK-NEXT: %[[SUM_A:.*]] = add i32 %arg, 7 -; CHECK-NEXT: br label %exit b: br label %exit -; CHECK: b: -; CHECK-NEXT: %[[SUM_B:.*]] = add i32 %arg, 11 -; CHECK-NEXT: br label %exit exit: %p = phi i32 [ 7, %a ], [ 11, %b ] %sum = add i32 %p, %arg ret i32 %sum -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %[[SUM_A]], %a ], [ %[[SUM_B]], %b ] -; CHECK-NEXT: ret i32 %[[PHI]] } define i32 @test_split_crit_edge(i1 %flag, i32 %arg) { -; CHECK-LABEL: define i32 @test_split_crit_edge( +; CHECK-LABEL: @test_split_crit_edge( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[ENTRY_EXIT_CRIT_EDGE:%.*]], label [[A:%.*]] +; CHECK: entry.exit_crit_edge: +; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[ARG:%.*]], 7 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: a: +; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[ARG]], 11 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[ENTRY_EXIT_CRIT_EDGE]] ], [ [[SUM_1]], [[A]] ] +; CHECK-NEXT: ret i32 [[SUM_PHI]] +; entry: br i1 %flag, label %exit, label %a -; CHECK: entry: -; CHECK-NEXT: br i1 %flag, label %[[ENTRY_SPLIT:.*]], label %a -; -; CHECK: [[ENTRY_SPLIT]]: -; CHECK-NEXT: %[[SUM_ENTRY_SPLIT:.*]] = add i32 %arg, 7 -; CHECK-NEXT: br label %exit a: br label %exit -; CHECK: a: -; CHECK-NEXT: %[[SUM_A:.*]] = add i32 %arg, 11 -; CHECK-NEXT: br label %exit exit: %p = phi i32 [ 7, %entry ], [ 11, %a ] %sum = add i32 %arg, %p ret i32 %sum -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %[[SUM_ENTRY_SPLIT]], %[[ENTRY_SPLIT]] ], [ %[[SUM_A]], %a ] -; CHECK-NEXT: ret i32 %[[PHI]] } define i32 @test_no_spec_dominating_inst(i1 %flag, i32* %ptr) { -; CHECK-LABEL: define i32 @test_no_spec_dominating_inst( +; CHECK-LABEL: @test_no_spec_dominating_inst( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[PTR:%.*]] +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[LOAD]], 7 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: b: +; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[LOAD]], 11 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] +; CHECK-NEXT: ret i32 [[SUM_PHI]] +; entry: %load = load i32, i32* %ptr br i1 %flag, label %a, label %b -; CHECK: %[[LOAD:.*]] = load i32, i32* %ptr -; CHECK-NEXT: br i1 %flag, label %a, label %b a: br label %exit -; CHECK: a: -; CHECK-NEXT: %[[SUM_A:.*]] = add i32 %[[LOAD]], 7 -; CHECK-NEXT: br label %exit b: br label %exit -; CHECK: b: -; CHECK-NEXT: %[[SUM_B:.*]] = add i32 %[[LOAD]], 11 -; CHECK-NEXT: br label %exit exit: %p = phi i32 [ 7, %a ], [ 11, %b ] %sum = add i32 %load, %p ret i32 %sum -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %[[SUM_A]], %a ], [ %[[SUM_B]], %b ] -; CHECK-NEXT: ret i32 %[[PHI]] } ; We have special logic handling PHI nodes, make sure it doesn't get confused ; by a dominating PHI. define i32 @test_no_spec_dominating_phi(i1 %flag1, i1 %flag2, i32 %x, i32 %y) { -; CHECK-LABEL: define i32 @test_no_spec_dominating_phi( +; CHECK-LABEL: @test_no_spec_dominating_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG1:%.*]], label [[X_BLOCK:%.*]], label [[Y_BLOCK:%.*]] +; CHECK: x.block: +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: y.block: +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[XY_PHI:%.*]] = phi i32 [ [[X:%.*]], [[X_BLOCK]] ], [ [[Y:%.*]], [[Y_BLOCK]] ] +; CHECK-NEXT: br i1 [[FLAG2:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[XY_PHI]], 7 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: b: +; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[XY_PHI]], 11 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] +; CHECK-NEXT: ret i32 [[SUM_PHI]] +; entry: br i1 %flag1, label %x.block, label %y.block -; CHECK: entry: -; CHECK-NEXT: br i1 %flag1, label %x.block, label %y.block x.block: br label %merge -; CHECK: x.block: -; CHECK-NEXT: br label %merge y.block: br label %merge -; CHECK: y.block: -; CHECK-NEXT: br label %merge merge: %xy.phi = phi i32 [ %x, %x.block ], [ %y, %y.block ] br i1 %flag2, label %a, label %b -; CHECK: merge: -; CHECK-NEXT: %[[XY_PHI:.*]] = phi i32 [ %x, %x.block ], [ %y, %y.block ] -; CHECK-NEXT: br i1 %flag2, label %a, label %b a: br label %exit -; CHECK: a: -; CHECK-NEXT: %[[SUM_A:.*]] = add i32 %[[XY_PHI]], 7 -; CHECK-NEXT: br label %exit b: br label %exit -; CHECK: b: -; CHECK-NEXT: %[[SUM_B:.*]] = add i32 %[[XY_PHI]], 11 -; CHECK-NEXT: br label %exit exit: %p = phi i32 [ 7, %a ], [ 11, %b ] %sum = add i32 %xy.phi, %p ret i32 %sum -; CHECK: exit: -; CHECK-NEXT: %[[SUM_PHI:.*]] = phi i32 [ %[[SUM_A]], %a ], [ %[[SUM_B]], %b ] -; CHECK-NEXT: ret i32 %[[SUM_PHI]] } ; Ensure that we will speculate some number of "free" instructions on the given ; architecture even though they are unrelated to the PHI itself. define i32 @test_speculate_free_insts(i1 %flag, i64 %arg) { -; CHECK-LABEL: define i32 @test_speculate_free_insts( +; CHECK-LABEL: @test_speculate_free_insts( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: [[T1_0:%.*]] = trunc i64 [[ARG:%.*]] to i48 +; CHECK-NEXT: [[T2_0:%.*]] = trunc i48 [[T1_0]] to i32 +; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[T2_0]], 7 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: b: +; CHECK-NEXT: [[T1_1:%.*]] = trunc i64 [[ARG]] to i48 +; CHECK-NEXT: [[T2_1:%.*]] = trunc i48 [[T1_1]] to i32 +; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[T2_1]], 11 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] +; CHECK-NEXT: ret i32 [[SUM_PHI]] +; entry: br i1 %flag, label %a, label %b -; CHECK: br i1 %flag, label %a, label %b a: br label %exit -; CHECK: a: -; CHECK-NEXT: %[[T1_A:.*]] = trunc i64 %arg to i48 -; CHECK-NEXT: %[[T2_A:.*]] = trunc i48 %[[T1_A]] to i32 -; CHECK-NEXT: %[[SUM_A:.*]] = add i32 %[[T2_A]], 7 -; CHECK-NEXT: br label %exit b: br label %exit -; CHECK: b: -; CHECK-NEXT: %[[T1_B:.*]] = trunc i64 %arg to i48 -; CHECK-NEXT: %[[T2_B:.*]] = trunc i48 %[[T1_B]] to i32 -; CHECK-NEXT: %[[SUM_B:.*]] = add i32 %[[T2_B]], 11 -; CHECK-NEXT: br label %exit exit: %p = phi i32 [ 7, %a ], [ 11, %b ] @@ -193,58 +206,67 @@ exit: %t2 = trunc i48 %t1 to i32 %sum = add i32 %t2, %p ret i32 %sum -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %[[SUM_A]], %a ], [ %[[SUM_B]], %b ] -; CHECK-NEXT: ret i32 %[[PHI]] } define i32 @test_speculate_free_phis(i1 %flag, i32 %arg1, i32 %arg2) { -; CHECK-LABEL: define i32 @test_speculate_free_phis( +; CHECK-LABEL: @test_speculate_free_phis( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[ARG1:%.*]], 7 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: b: +; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[ARG2:%.*]], 11 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] +; CHECK-NEXT: [[P2:%.*]] = phi i32 [ [[ARG1]], [[A]] ], [ [[ARG2]], [[B]] ] +; CHECK-NEXT: ret i32 [[SUM_PHI]] +; entry: br i1 %flag, label %a, label %b -; CHECK: br i1 %flag, label %a, label %b a: br label %exit -; CHECK: a: -; CHECK-NEXT: %[[SUM_A:.*]] = add i32 %arg1, 7 -; CHECK-NEXT: br label %exit b: br label %exit -; CHECK: b: -; CHECK-NEXT: %[[SUM_B:.*]] = add i32 %arg2, 11 -; CHECK-NEXT: br label %exit +; We don't DCE the now unused PHI node... exit: %p1 = phi i32 [ 7, %a ], [ 11, %b ] %p2 = phi i32 [ %arg1, %a ], [ %arg2, %b ] %sum = add i32 %p2, %p1 ret i32 %sum -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %[[SUM_A]], %a ], [ %[[SUM_B]], %b ] -; We don't DCE the now unused PHI node... -; CHECK-NEXT: %{{.*}} = phi i32 [ %arg1, %a ], [ %arg2, %b ] -; CHECK-NEXT: ret i32 %[[PHI]] } ; We shouldn't speculate multiple uses even if each individually looks ; profitable because of the total cost. define i32 @test_no_spec_multi_uses(i1 %flag, i32 %arg1, i32 %arg2, i32 %arg3) { -; CHECK-LABEL: define i32 @test_no_spec_multi_uses( +; CHECK-LABEL: @test_no_spec_multi_uses( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: b: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ] +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ARG1:%.*]], [[P]] +; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ARG2:%.*]], [[P]] +; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ARG3:%.*]], [[P]] +; CHECK-NEXT: [[SUM1:%.*]] = add i32 [[ADD1]], [[ADD2]] +; CHECK-NEXT: [[SUM2:%.*]] = add i32 [[SUM1]], [[ADD3]] +; CHECK-NEXT: ret i32 [[SUM2]] +; entry: br i1 %flag, label %a, label %b -; CHECK: br i1 %flag, label %a, label %b a: br label %exit -; CHECK: a: -; CHECK-NEXT: br label %exit b: br label %exit -; CHECK: b: -; CHECK-NEXT: br label %exit exit: %p = phi i32 [ 7, %a ], [ 11, %b ] @@ -254,37 +276,34 @@ exit: %sum1 = add i32 %add1, %add2 %sum2 = add i32 %sum1, %add3 ret i32 %sum2 -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ 7, %a ], [ 11, %b ] -; CHECK-NEXT: %[[ADD1:.*]] = add i32 %arg1, %[[PHI]] -; CHECK-NEXT: %[[ADD2:.*]] = add i32 %arg2, %[[PHI]] -; CHECK-NEXT: %[[ADD3:.*]] = add i32 %arg3, %[[PHI]] -; CHECK-NEXT: %[[SUM1:.*]] = add i32 %[[ADD1]], %[[ADD2]] -; CHECK-NEXT: %[[SUM2:.*]] = add i32 %[[SUM1]], %[[ADD3]] -; CHECK-NEXT: ret i32 %[[SUM2]] } define i32 @test_multi_phis1(i1 %flag, i32 %arg) { -; CHECK-LABEL: define i32 @test_multi_phis1( +; CHECK-LABEL: @test_multi_phis1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: [[SUM1_0:%.*]] = add i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[SUM2_0:%.*]] = add i32 [[SUM1_0]], 3 +; CHECK-NEXT: [[SUM3_0:%.*]] = add i32 [[SUM2_0]], 5 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: b: +; CHECK-NEXT: [[SUM1_1:%.*]] = add i32 [[ARG]], 2 +; CHECK-NEXT: [[SUM2_1:%.*]] = add i32 [[SUM1_1]], 4 +; CHECK-NEXT: [[SUM3_1:%.*]] = add i32 [[SUM2_1]], 6 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM3_PHI:%.*]] = phi i32 [ [[SUM3_0]], [[A]] ], [ [[SUM3_1]], [[B]] ] +; CHECK-NEXT: ret i32 [[SUM3_PHI]] +; entry: br i1 %flag, label %a, label %b -; CHECK: br i1 %flag, label %a, label %b a: br label %exit -; CHECK: a: -; CHECK-NEXT: %[[SUM_A1:.*]] = add i32 %arg, 1 -; CHECK-NEXT: %[[SUM_A2:.*]] = add i32 %[[SUM_A1]], 3 -; CHECK-NEXT: %[[SUM_A3:.*]] = add i32 %[[SUM_A2]], 5 -; CHECK-NEXT: br label %exit b: br label %exit -; CHECK: b: -; CHECK-NEXT: %[[SUM_B1:.*]] = add i32 %arg, 2 -; CHECK-NEXT: %[[SUM_B2:.*]] = add i32 %[[SUM_B1]], 4 -; CHECK-NEXT: %[[SUM_B3:.*]] = add i32 %[[SUM_B2]], 6 -; CHECK-NEXT: br label %exit exit: %p1 = phi i32 [ 1, %a ], [ 2, %b ] @@ -294,33 +313,35 @@ exit: %sum2 = add i32 %sum1, %p2 %sum3 = add i32 %sum2, %p3 ret i32 %sum3 -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %[[SUM_A3]], %a ], [ %[[SUM_B3]], %b ] -; CHECK-NEXT: ret i32 %[[PHI]] } ; Check that the order of the PHIs doesn't impact the behavior. define i32 @test_multi_phis2(i1 %flag, i32 %arg) { -; CHECK-LABEL: define i32 @test_multi_phis2( +; CHECK-LABEL: @test_multi_phis2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: [[SUM1_0:%.*]] = add i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[SUM2_0:%.*]] = add i32 [[SUM1_0]], 3 +; CHECK-NEXT: [[SUM3_0:%.*]] = add i32 [[SUM2_0]], 5 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: b: +; CHECK-NEXT: [[SUM1_1:%.*]] = add i32 [[ARG]], 2 +; CHECK-NEXT: [[SUM2_1:%.*]] = add i32 [[SUM1_1]], 4 +; CHECK-NEXT: [[SUM3_1:%.*]] = add i32 [[SUM2_1]], 6 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM3_PHI:%.*]] = phi i32 [ [[SUM3_0]], [[A]] ], [ [[SUM3_1]], [[B]] ] +; CHECK-NEXT: ret i32 [[SUM3_PHI]] +; entry: br i1 %flag, label %a, label %b -; CHECK: br i1 %flag, label %a, label %b a: br label %exit -; CHECK: a: -; CHECK-NEXT: %[[SUM_A1:.*]] = add i32 %arg, 1 -; CHECK-NEXT: %[[SUM_A2:.*]] = add i32 %[[SUM_A1]], 3 -; CHECK-NEXT: %[[SUM_A3:.*]] = add i32 %[[SUM_A2]], 5 -; CHECK-NEXT: br label %exit b: br label %exit -; CHECK: b: -; CHECK-NEXT: %[[SUM_B1:.*]] = add i32 %arg, 2 -; CHECK-NEXT: %[[SUM_B2:.*]] = add i32 %[[SUM_B1]], 4 -; CHECK-NEXT: %[[SUM_B3:.*]] = add i32 %[[SUM_B2]], 6 -; CHECK-NEXT: br label %exit exit: %p3 = phi i32 [ 5, %a ], [ 6, %b ] @@ -330,36 +351,34 @@ exit: %sum2 = add i32 %sum1, %p2 %sum3 = add i32 %sum2, %p3 ret i32 %sum3 -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %[[SUM_A3]], %a ], [ %[[SUM_B3]], %b ] -; CHECK-NEXT: ret i32 %[[PHI]] } define i32 @test_no_spec_indirectbr(i1 %flag, i32 %arg) { -; CHECK-LABEL: define i32 @test_no_spec_indirectbr( +; CHECK-LABEL: @test_no_spec_indirectbr( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: indirectbr i8* undef, [label %exit] +; CHECK: b: +; CHECK-NEXT: indirectbr i8* undef, [label %exit] +; CHECK: exit: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ] +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]] +; CHECK-NEXT: ret i32 [[SUM]] +; entry: br i1 %flag, label %a, label %b -; CHECK: entry: -; CHECK-NEXT: br i1 %flag, label %a, label %b a: indirectbr i8* undef, [label %exit] -; CHECK: a: -; CHECK-NEXT: indirectbr i8* undef, [label %exit] b: indirectbr i8* undef, [label %exit] -; CHECK: b: -; CHECK-NEXT: indirectbr i8* undef, [label %exit] exit: %p = phi i32 [ 7, %a ], [ 11, %b ] %sum = add i32 %arg, %p ret i32 %sum -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ 7, %a ], [ 11, %b ] -; CHECK-NEXT: %[[SUM:.*]] = add i32 %arg, %[[PHI]] -; CHECK-NEXT: ret i32 %[[SUM]] } declare void @g() @@ -369,66 +388,79 @@ declare i32 @__gxx_personality_v0(...) ; FIXME: We should be able to handle this case -- only the exceptional edge is ; impossible to split. define i32 @test_no_spec_invoke_continue(i1 %flag, i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-LABEL: define i32 @test_no_spec_invoke_continue( +; CHECK-LABEL: @test_no_spec_invoke_continue( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: to label [[EXIT:%.*]] unwind label [[LPAD:%.*]] +; CHECK: b: +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: to label [[EXIT]] unwind label [[LPAD]] +; CHECK: exit: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ] +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]] +; CHECK-NEXT: ret i32 [[SUM]] +; CHECK: lpad: +; CHECK-NEXT: [[LP:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: resume { i8*, i32 } undef +; entry: br i1 %flag, label %a, label %b -; CHECK: entry: -; CHECK-NEXT: br i1 %flag, label %a, label %b a: invoke void @g() - to label %exit unwind label %lpad -; CHECK: a: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label %exit unwind label %lpad + to label %exit unwind label %lpad b: invoke void @g() - to label %exit unwind label %lpad -; CHECK: b: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label %exit unwind label %lpad + to label %exit unwind label %lpad exit: %p = phi i32 [ 7, %a ], [ 11, %b ] %sum = add i32 %arg, %p ret i32 %sum -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ 7, %a ], [ 11, %b ] -; CHECK-NEXT: %[[SUM:.*]] = add i32 %arg, %[[PHI]] -; CHECK-NEXT: ret i32 %[[SUM]] lpad: %lp = landingpad { i8*, i32 } - cleanup + cleanup resume { i8*, i32 } undef } define i32 @test_no_spec_landingpad(i32 %arg, i32* %ptr) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-LABEL: define i32 @test_no_spec_landingpad( +; CHECK-LABEL: @test_no_spec_landingpad( +; CHECK-NEXT: entry: +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont: +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: to label [[EXIT:%.*]] unwind label [[LPAD]] +; CHECK: lpad: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[ENTRY:%.*]] ], [ 11, [[INVOKE_CONT]] ] +; CHECK-NEXT: [[LP:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]] +; CHECK-NEXT: store i32 [[SUM]], i32* [[PTR:%.*]] +; CHECK-NEXT: resume { i8*, i32 } undef +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; entry: invoke void @g() - to label %invoke.cont unwind label %lpad -; CHECK: entry: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label %invoke.cont unwind label %lpad + to label %invoke.cont unwind label %lpad invoke.cont: invoke void @g() - to label %exit unwind label %lpad -; CHECK: invoke.cont: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label %exit unwind label %lpad + to label %exit unwind label %lpad lpad: %p = phi i32 [ 7, %entry ], [ 11, %invoke.cont ] %lp = landingpad { i8*, i32 } - cleanup + cleanup %sum = add i32 %arg, %p store i32 %sum, i32* %ptr resume { i8*, i32 } undef -; CHECK: lpad: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ 7, %entry ], [ 11, %invoke.cont ] exit: ret i32 0 @@ -437,20 +469,29 @@ exit: declare i32 @__CxxFrameHandler3(...) define i32 @test_no_spec_cleanuppad(i32 %arg, i32* %ptr) personality i32 (...)* @__CxxFrameHandler3 { -; CHECK-LABEL: define i32 @test_no_spec_cleanuppad( +; CHECK-LABEL: @test_no_spec_cleanuppad( +; CHECK-NEXT: entry: +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont: +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: to label [[EXIT:%.*]] unwind label [[LPAD]] +; CHECK: lpad: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[ENTRY:%.*]] ], [ 11, [[INVOKE_CONT]] ] +; CHECK-NEXT: [[CP:%.*]] = cleanuppad within none [] +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]] +; CHECK-NEXT: store i32 [[SUM]], i32* [[PTR:%.*]] +; CHECK-NEXT: cleanupret from [[CP]] unwind to caller +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; entry: invoke void @g() - to label %invoke.cont unwind label %lpad -; CHECK: entry: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label %invoke.cont unwind label %lpad + to label %invoke.cont unwind label %lpad invoke.cont: invoke void @g() - to label %exit unwind label %lpad -; CHECK: invoke.cont: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label %exit unwind label %lpad + to label %exit unwind label %lpad lpad: %p = phi i32 [ 7, %entry ], [ 11, %invoke.cont ] @@ -458,8 +499,6 @@ lpad: %sum = add i32 %arg, %p store i32 %sum, i32* %ptr cleanupret from %cp unwind to caller -; CHECK: lpad: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ 7, %entry ], [ 11, %invoke.cont ] exit: ret i32 0 @@ -469,21 +508,28 @@ exit: ; for us to handle but in an unreachable region and with non-PHI use-def ; cycles. define i32 @test_unreachable_non_phi_cycles(i1 %flag, i32 %arg) { -; CHECK-LABEL: define i32 @test_unreachable_non_phi_cycles( +; CHECK-LABEL: @test_unreachable_non_phi_cycles( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 42 +; CHECK: a: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: b: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[A:%.*]] ], [ 11, [[B:%.*]] ] +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SUM:%.*]] to i64 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[ZEXT]] to i32 +; CHECK-NEXT: [[SUM]] = add i32 [[TRUNC]], [[P]] +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A]], label [[B]] +; entry: ret i32 42 -; CHECK: entry: -; CHECK-NEXT: ret i32 42 a: br label %exit -; CHECK: a: -; CHECK-NEXT: br label %exit b: br label %exit -; CHECK: b: -; CHECK-NEXT: br label %exit exit: %p = phi i32 [ 7, %a ], [ 11, %b ] @@ -491,12 +537,6 @@ exit: %trunc = trunc i64 %zext to i32 %sum = add i32 %trunc, %p br i1 %flag, label %a, label %b -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ 7, %a ], [ 11, %b ] -; CHECK-NEXT: %[[ZEXT:.*]] = zext i32 %[[SUM:.*]] to i64 -; CHECK-NEXT: %[[TRUNC:.*]] = trunc i64 %[[ZEXT]] to i32 -; CHECK-NEXT: %[[SUM]] = add i32 %[[TRUNC]], %[[PHI]] -; CHECK-NEXT: br i1 %flag, label %a, label %b } ; Check that we don't speculate in the face of an expensive immediate. There @@ -508,88 +548,92 @@ exit: ; happen to be any way to use free-to-speculate instructions here so that it ; would be the only interesting property. define i64 @test_expensive_imm(i32 %flag, i64 %arg) { -; CHECK-LABEL: define i64 @test_expensive_imm( +; CHECK-LABEL: @test_expensive_imm( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[FLAG:%.*]], label [[A:%.*]] [ +; CHECK-NEXT: i32 1, label [[B:%.*]] +; CHECK-NEXT: i32 2, label [[C:%.*]] +; CHECK-NEXT: i32 3, label [[D:%.*]] +; CHECK-NEXT: ] +; CHECK: a: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: b: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: c: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: d: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[P:%.*]] = phi i64 [ 4294967296, [[A]] ], [ 1, [[B]] ], [ 1, [[C]] ], [ 1, [[D]] ] +; CHECK-NEXT: [[SUM1:%.*]] = add i64 [[ARG:%.*]], [[P]] +; CHECK-NEXT: [[SUM2:%.*]] = add i64 [[SUM1]], [[P]] +; CHECK-NEXT: ret i64 [[SUM2]] +; entry: switch i32 %flag, label %a [ - i32 1, label %b - i32 2, label %c - i32 3, label %d + i32 1, label %b + i32 2, label %c + i32 3, label %d ] -; CHECK: switch i32 %flag, label %a [ -; CHECK-NEXT: i32 1, label %b -; CHECK-NEXT: i32 2, label %c -; CHECK-NEXT: i32 3, label %d -; CHECK-NEXT: ] a: br label %exit -; CHECK: a: -; CHECK-NEXT: br label %exit b: br label %exit -; CHECK: b: -; CHECK-NEXT: br label %exit c: br label %exit -; CHECK: c: -; CHECK-NEXT: br label %exit d: br label %exit -; CHECK: d: -; CHECK-NEXT: br label %exit exit: %p = phi i64 [ 4294967296, %a ], [ 1, %b ], [ 1, %c ], [ 1, %d ] %sum1 = add i64 %arg, %p %sum2 = add i64 %sum1, %p ret i64 %sum2 -; CHECK: exit: -; CHECK-NEXT: %[[PHI:.*]] = phi i64 [ {{[0-9]+}}, %a ], [ 1, %b ], [ 1, %c ], [ 1, %d ] -; CHECK-NEXT: %[[SUM1:.*]] = add i64 %arg, %[[PHI]] -; CHECK-NEXT: %[[SUM2:.*]] = add i64 %[[SUM1]], %[[PHI]] -; CHECK-NEXT: ret i64 %[[SUM2]] } define i32 @test_no_spec_non_postdominating_uses(i1 %flag1, i1 %flag2, i32 %arg) { -; CHECK-LABEL: define i32 @test_no_spec_non_postdominating_uses( +; CHECK-LABEL: @test_no_spec_non_postdominating_uses( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG1:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: [[SUM1_0:%.*]] = add i32 [[ARG:%.*]], 7 +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: b: +; CHECK-NEXT: [[SUM1_1:%.*]] = add i32 [[ARG]], 11 +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[SUM1_PHI:%.*]] = phi i32 [ [[SUM1_0]], [[A]] ], [ [[SUM1_1]], [[B]] ] +; CHECK-NEXT: [[P2:%.*]] = phi i32 [ 13, [[A]] ], [ 42, [[B]] ] +; CHECK-NEXT: br i1 [[FLAG2:%.*]], label [[EXIT1:%.*]], label [[EXIT2:%.*]] +; CHECK: exit1: +; CHECK-NEXT: ret i32 [[SUM1_PHI]] +; CHECK: exit2: +; CHECK-NEXT: [[SUM2:%.*]] = add i32 [[ARG]], [[P2]] +; CHECK-NEXT: ret i32 [[SUM2]] +; entry: br i1 %flag1, label %a, label %b -; CHECK: br i1 %flag1, label %a, label %b a: br label %merge -; CHECK: a: -; CHECK-NEXT: %[[SUM_A:.*]] = add i32 %arg, 7 -; CHECK-NEXT: br label %merge b: br label %merge -; CHECK: b: -; CHECK-NEXT: %[[SUM_B:.*]] = add i32 %arg, 11 -; CHECK-NEXT: br label %merge merge: %p1 = phi i32 [ 7, %a ], [ 11, %b ] %p2 = phi i32 [ 13, %a ], [ 42, %b ] %sum1 = add i32 %arg, %p1 br i1 %flag2, label %exit1, label %exit2 -; CHECK: merge: -; CHECK-NEXT: %[[PHI1:.*]] = phi i32 [ %[[SUM_A]], %a ], [ %[[SUM_B]], %b ] -; CHECK-NEXT: %[[PHI2:.*]] = phi i32 [ 13, %a ], [ 42, %b ] -; CHECK-NEXT: br i1 %flag2, label %exit1, label %exit2 exit1: ret i32 %sum1 -; CHECK: exit1: -; CHECK-NEXT: ret i32 %[[PHI1]] exit2: %sum2 = add i32 %arg, %p2 ret i32 %sum2 -; CHECK: exit2: -; CHECK-NEXT: %[[SUM2:.*]] = add i32 %arg, %[[PHI2]] -; CHECK-NEXT: ret i32 %[[SUM2]] } diff --git a/llvm/test/Transforms/Util/cg-updater-dead-function-callees.ll b/llvm/test/Transforms/Util/cg-updater-dead-function-callees.ll new file mode 100644 index 0000000000000..8ae0b8e3c5d4a --- /dev/null +++ b/llvm/test/Transforms/Util/cg-updater-dead-function-callees.ll @@ -0,0 +1,31 @@ +; RUN: opt -inline -attributor-cgscc -tailcallelim -S %s | FileCheck %s +; +; CHECK: define void @foo() +; CHECK: declare i32 @baz() +; CHECK-NOT: void @goo() +; CHECK-NOT: void @bar() + +define void @foo() { + call fastcc void @bar() + ret void +} + +define internal fastcc void @goo() { + call fastcc void @bar() + ret void +} + +define internal fastcc void @bar() { + %call = call i32 @baz() + %cond = icmp eq i32 %call, 0 + br i1 %cond, label %if.then, label %if.end + +if.then: + call fastcc void @goo() + br label %if.end + +if.end: + ret void +} + +declare i32 @baz() diff --git a/llvm/test/Verifier/dilocation-in-wrong-place.ll b/llvm/test/Verifier/dilocation-in-wrong-place.ll new file mode 100644 index 0000000000000..a63af77227e49 --- /dev/null +++ b/llvm/test/Verifier/dilocation-in-wrong-place.ll @@ -0,0 +1,26 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: DILocation not allowed within this metadata node +; CHECK-NEXT: [[unknownMD:![0-9]+]] = distinct !{[[unknownMD]], [[dbgMD:![0-9]+]]} +; CHECK-NEXT: [[dbgMD]] = !DILocation + +define void @f() !dbg !5 { + ret void, !dbg !10, !unknown_md !11 +} + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!3, !3} +!llvm.module.flags = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "loop.ll", directory: "/") +!2 = !{} +!3 = !{i32 1} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "f", linkageName: "f", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !7) +!6 = !DISubroutineType(types: !2) +!7 = !{!8} +!8 = !DILocalVariable(name: "1", scope: !5, file: !1, line: 1, type: !9) +!9 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) +!10 = !DILocation(line: 1, column: 1, scope: !5) +!11 = !{!11, !10} diff --git a/llvm/test/Verifier/preallocated-invalid.ll b/llvm/test/Verifier/preallocated-invalid.ll new file mode 100644 index 0000000000000..770199d653a2e --- /dev/null +++ b/llvm/test/Verifier/preallocated-invalid.ll @@ -0,0 +1,118 @@ +; RUN: not opt -S %s -verify 2>&1 | FileCheck %s + +declare token @llvm.call.preallocated.setup(i32) +declare i8* @llvm.call.preallocated.arg(token, i32) + +; Fake LLVM intrinsic to return a token +declare token @llvm.what() + +declare void @foo0() +declare void @foo1(i32* preallocated(i32)) +declare void @foo2(i32* preallocated(i32), i32*, i32* preallocated(i32)) +declare i32 @blackbox() + +; CHECK: llvm.call.preallocated.arg must be called with a "preallocated" call site attribute +define void @preallocated_arg_missing_preallocated_attribute() { + %cs = call token @llvm.call.preallocated.setup(i32 1) + %x = call i8* @llvm.call.preallocated.arg(token %cs, i32 0) + %y = bitcast i8* %x to i32* + call void @foo1(i32* preallocated(i32) %y) ["preallocated"(token %cs)] + ret void +} + +; CHECK: preallocated as a call site attribute can only be on llvm.call.preallocated.arg +define void @preallocated_call_site_attribute_not_on_arg() { + call void @foo0() preallocated(i32) + ret void +} + +; CHECK: "preallocated" argument must be a token from llvm.call.preallocated.setup +define void @preallocated_bundle_token() { + %i = call i32 @blackbox() + call void @foo0() ["preallocated"(i32 %i)] + ret void +} + +; CHECK: "preallocated" argument must be a token from llvm.call.preallocated.setup +define void @preallocated_bundle_token_from_setup() { + %cs = call token @llvm.what() + call void @foo0() ["preallocated"(token %cs)] + ret void +} + +; CHECK: Expected exactly one preallocated bundle operand +define void @preallocated_bundle_one_token() { + %cs0 = call token @llvm.call.preallocated.setup(i32 0) + %cs1 = call token @llvm.call.preallocated.setup(i32 0) + call void @foo0() ["preallocated"(token %cs0, token %cs1)] + ret void +} + +; CHECK: Multiple preallocated operand bundles +define void @preallocated_multiple_bundles() { + %cs0 = call token @llvm.call.preallocated.setup(i32 0) + %cs1 = call token @llvm.call.preallocated.setup(i32 0) + call void @foo0() ["preallocated"(token %cs0), "preallocated"(token %cs1)] + ret void +} + +; CHECK: Can have at most one call +define void @preallocated_one_call() { + %cs = call token @llvm.call.preallocated.setup(i32 1) + %x = call i8* @llvm.call.preallocated.arg(token %cs, i32 0) preallocated(i32) + %y = bitcast i8* %x to i32* + call void @foo1(i32* preallocated(i32) %y) ["preallocated"(token %cs)] + call void @foo1(i32* preallocated(i32) %y) ["preallocated"(token %cs)] + ret void +} + +; CHECK: must be a constant +define void @preallocated_setup_constant() { + %ac = call i32 @blackbox() + %cs = call token @llvm.call.preallocated.setup(i32 %ac) + ret void +} + +; CHECK: must be between 0 and corresponding +define void @preallocated_setup_arg_index_in_bounds() { + %cs = call token @llvm.call.preallocated.setup(i32 2) + %a0 = call i8* @llvm.call.preallocated.arg(token %cs, i32 2) preallocated(i32) + ret void +} + +; CHECK: Attribute 'preallocated' type does not match parameter +define void @preallocated_attribute_type_mismatch() { + %cs = call token @llvm.call.preallocated.setup(i32 1) + %x = call i8* @llvm.call.preallocated.arg(token %cs, i32 0) preallocated(i32) + %y = bitcast i8* %x to i32* + call void @foo1(i32* preallocated(i8) %y) ["preallocated"(token %cs)] + ret void +} + +; CHECK: preallocated operand requires a preallocated bundle +define void @preallocated_require_bundle() { + %cs = call token @llvm.call.preallocated.setup(i32 1) + %x = call i8* @llvm.call.preallocated.arg(token %cs, i32 0) preallocated(i32) + %y = bitcast i8* %x to i32* + call void @foo1(i32* preallocated(i32) %y) + ret void +} + +; CHECK: arg size must be equal to number of preallocated arguments +define void @preallocated_num_args() { + %cs = call token @llvm.call.preallocated.setup(i32 3) + %x = call i8* @llvm.call.preallocated.arg(token %cs, i32 0) preallocated(i32) + %x1 = bitcast i8* %x to i32* + %y = call i8* @llvm.call.preallocated.arg(token %cs, i32 1) preallocated(i32) + %y1 = bitcast i8* %y to i32* + %a = inttoptr i32 0 to i32* + call void @foo2(i32* preallocated(i32) %x1, i32* %a, i32* preallocated(i32) %y1) ["preallocated"(token %cs)] + ret void +} + +; CHECK: token argument must be a llvm.call.preallocated.setup +define void @preallocated_arg_token() { + %t = call token @llvm.what() + %x = call i8* @llvm.call.preallocated.arg(token %t, i32 1) preallocated(i32) + ret void +} diff --git a/llvm/test/Verifier/preallocated-valid.ll b/llvm/test/Verifier/preallocated-valid.ll new file mode 100644 index 0000000000000..07f748ca8678b --- /dev/null +++ b/llvm/test/Verifier/preallocated-valid.ll @@ -0,0 +1,40 @@ +; RUN: opt -S %s -verify + +declare token @llvm.call.preallocated.setup(i32) +declare i8* @llvm.call.preallocated.arg(token, i32) + +declare void @foo1(i32* preallocated(i32)) +declare void @foo2(i32* preallocated(i32), i32*, i32* preallocated(i32)) + +define void @preallocated() { + %cs = call token @llvm.call.preallocated.setup(i32 1) + %x = call i8* @llvm.call.preallocated.arg(token %cs, i32 0) preallocated(i32) + %y = bitcast i8* %x to i32* + call void @foo1(i32* preallocated(i32) %y) ["preallocated"(token %cs)] + ret void +} + +define void @preallocated_indirect(void (i32*)* %f) { + %cs = call token @llvm.call.preallocated.setup(i32 1) + %x = call i8* @llvm.call.preallocated.arg(token %cs, i32 0) preallocated(i32) + %y = bitcast i8* %x to i32* + call void %f(i32* preallocated(i32) %y) ["preallocated"(token %cs)] + ret void +} + +define void @preallocated_setup_without_call() { + %cs = call token @llvm.call.preallocated.setup(i32 1) + %a0 = call i8* @llvm.call.preallocated.arg(token %cs, i32 0) preallocated(i32) + ret void +} + +define void @preallocated_num_args() { + %cs = call token @llvm.call.preallocated.setup(i32 2) + %x = call i8* @llvm.call.preallocated.arg(token %cs, i32 0) preallocated(i32) + %x1 = bitcast i8* %x to i32* + %y = call i8* @llvm.call.preallocated.arg(token %cs, i32 1) preallocated(i32) + %y1 = bitcast i8* %y to i32* + %a = inttoptr i32 0 to i32* + call void @foo2(i32* preallocated(i32) %x1, i32* %a, i32* preallocated(i32) %y1) ["preallocated"(token %cs)] + ret void +} diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index ee9e5941a39ea..3797830862f76 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -12,7 +12,7 @@ config.llvm_shlib_dir = path(r"@SHLIBDIR@") config.llvm_shlib_ext = "@SHLIBEXT@" config.llvm_exe_ext = "@EXEEXT@" config.lit_tools_dir = path(r"@LLVM_LIT_TOOLS_DIR@") -config.python_executable = "@PYTHON_EXECUTABLE@" +config.python_executable = "@Python3_EXECUTABLE@" config.gold_executable = "@GOLD_EXECUTABLE@" config.ld64_executable = "@LD64_EXECUTABLE@" config.ocamlfind_executable = "@OCAMLFIND@" diff --git a/llvm/test/tools/dsymutil/cmdline.test b/llvm/test/tools/dsymutil/cmdline.test index 701de29637dd8..e66f4a589fba3 100644 --- a/llvm/test/tools/dsymutil/cmdline.test +++ b/llvm/test/tools/dsymutil/cmdline.test @@ -14,17 +14,18 @@ HELP: -no-swiftmodule-timestamp HELP: -num-threads HELP: -object-prefix-map HELP: -oso-prepend-path -HELP: -o +HELP: {{ -o }} HELP: -papertrail HELP: -remarks-output-format HELP: -remarks-prepend-path HELP: -symbol-map HELP: -symtab +HELP: {{ -S }} HELP: -toolchain HELP: -update HELP: -verbose HELP: -verify -HELP: -y +HELP: {{ -y }} HELP-NOT: -reverse-iterate RUN: dsymutil --version 2>&1 | FileCheck --check-prefix=VERSION %s diff --git a/llvm/test/tools/gold/X86/thinlto.ll b/llvm/test/tools/gold/X86/thinlto.ll index ebe9b56a2f85c..51609ebb7918c 100644 --- a/llvm/test/tools/gold/X86/thinlto.ll +++ b/llvm/test/tools/gold/X86/thinlto.ll @@ -32,6 +32,7 @@ ; Ensure gold generates an index as well as a binary with save-temps in ThinLTO mode. ; First force single-threaded mode +; RUN: rm -f %t4* ; RUN: %gold -plugin %llvmshlibdir/LLVMgold%shlibext \ ; RUN: -m elf_x86_64 \ ; RUN: --plugin-opt=save-temps \ @@ -40,6 +41,8 @@ ; RUN: -shared %t.o %t2.o -o %t4 ; RUN: llvm-bcanalyzer -dump %t4.index.bc | FileCheck %s --check-prefix=COMBINED ; RUN: llvm-nm %t4 | FileCheck %s --check-prefix=NM +; Ensure ld does not emit empty combined module in default. +; RUN: ls %t4.o* | count 2 ; Check with --no-map-whole-files ; RUN: %gold -plugin %llvmshlibdir/LLVMgold%shlibext \ @@ -72,6 +75,8 @@ ; RUN: -shared %t.o %t2.o -o %t4 ; RUN: llvm-nm %t5.o1 | FileCheck %s --check-prefix=NM2 ; RUN: llvm-nm %t5.o2 | FileCheck %s --check-prefix=NM2 +; Ensure ld emits empty combined module if specific obj-path. +; RUN: ls %t5.o* | count 3 ; Test to ensure that thinlto-index-only with obj-path creates the file. ; RUN: rm -f %t5.o %t5.o1 diff --git a/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll b/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll index 7521806c624ff..832ecbe219cac 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll +++ b/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll @@ -1,89 +1,89 @@ ; RUN: llc -debug-entry-values %s -o - -filetype=obj \ ; RUN: | llvm-dwarfdump -statistics - | FileCheck %s -; -; CHECK: "entry value scope bytes covered":5 -; CHECK: "formal params scope bytes total":20 -; CHECK: "formal params scope bytes covered":20 -; CHECK: "formal params entry value scope bytes covered":5 -; CHECK: "vars scope bytes total":90 -; CHECK: "vars scope bytes covered":60 -; CHECK: "vars entry value scope bytes covered":0 -; CHECK: "total variables procesed by location statistics":6 -; CHECK: "variables with 0% of its scope covered":1 -; CHECK: "variables with (0%,10%) of its scope covered":0 -; CHECK: "variables with [10%,20%) of its scope covered":0 -; CHECK: "variables with [20%,30%) of its scope covered":0 -; CHECK: "variables with [30%,40%) of its scope covered":0 -; CHECK: "variables with [40%,50%) of its scope covered":0 -; CHECK: "variables with [50%,60%) of its scope covered":1 -; CHECK: "variables with [60%,70%) of its scope covered":0 -; CHECK: "variables with [70%,80%) of its scope covered":0 -; CHECK: "variables with [80%,90%) of its scope covered":1 -; CHECK: "variables with [90%,100%) of its scope covered":0 -; CHECK: "variables with 100% of its scope covered":3 -; CHECK: "variables (excluding the debug entry values) with 0% of its scope covered":1 -; CHECK: "variables (excluding the debug entry values) with (0%,10%) of its scope covered":0 -; CHECK: "variables (excluding the debug entry values) with [10%,20%) of its scope covered":0 -; CHECK: "variables (excluding the debug entry values) with [20%,30%) of its scope covered":0 -; CHECK: "variables (excluding the debug entry values) with [30%,40%) of its scope covered":0 -; CHECK: "variables (excluding the debug entry values) with [40%,50%) of its scope covered":0 -; CHECK: "variables (excluding the debug entry values) with [50%,60%) of its scope covered":2 -; CHECK: "variables (excluding the debug entry values) with [60%,70%) of its scope covered":0 -; CHECK: "variables (excluding the debug entry values) with [70%,80%) of its scope covered":0 -; CHECK: "variables (excluding the debug entry values) with [80%,90%) of its scope covered":1 -; CHECK: "variables (excluding the debug entry values) with [90%,100%) of its scope covered":0 -; CHECK: "variables (excluding the debug entry values) with 100% of its scope covered":2 -; CHECK: "total params procesed by location statistics":2 -; CHECK: "params with 0% of its scope covered":0 -; CHECK: "params with (0%,10%) of its scope covered":0 -; CHECK: "params with [10%,20%) of its scope covered":0 -; CHECK: "params with [20%,30%) of its scope covered":0 -; CHECK: "params with [30%,40%) of its scope covered":0 -; CHECK: "params with [40%,50%) of its scope covered":0 -; CHECK: "params with [50%,60%) of its scope covered":0 -; CHECK: "params with [60%,70%) of its scope covered":0 -; CHECK: "params with [70%,80%) of its scope covered":0 -; CHECK: "params with [80%,90%) of its scope covered":0 -; CHECK: "params with [90%,100%) of its scope covered":0 -; CHECK: "params with 100% of its scope covered":2 -; CHECK: "params (excluding the debug entry values) with 0% of its scope covered":0 -; CHECK: "params (excluding the debug entry values) with (0%,10%) of its scope covered":0 -; CHECK: "params (excluding the debug entry values) with [10%,20%) of its scope covered":0 -; CHECK: "params (excluding the debug entry values) with [20%,30%) of its scope covered":0 -; CHECK: "params (excluding the debug entry values) with [30%,40%) of its scope covered":0 -; CHECK: "params (excluding the debug entry values) with [40%,50%) of its scope covered":0 -; CHECK: "params (excluding the debug entry values) with [50%,60%) of its scope covered":1 -; CHECK: "params (excluding the debug entry values) with [60%,70%) of its scope covered":0 -; CHECK: "params (excluding the debug entry values) with [70%,80%) of its scope covered":0 -; CHECK: "params (excluding the debug entry values) with [80%,90%) of its scope covered":0 -; CHECK: "params (excluding the debug entry values) with [90%,100%) of its scope covered":0 -; CHECK: "params (excluding the debug entry values) with 100% of its scope covered":1 -; CHECK: "total vars procesed by location statistics":4 -; CHECK: "vars with 0% of its scope covered":1 -; CHECK: "vars with (0%,10%) of its scope covered":0 -; CHECK: "vars with [10%,20%) of its scope covered":0 -; CHECK: "vars with [20%,30%) of its scope covered":0 -; CHECK: "vars with [30%,40%) of its scope covered":0 -; CHECK: "vars with [40%,50%) of its scope covered":0 -; CHECK: "vars with [50%,60%) of its scope covered":1 -; CHECK: "vars with [60%,70%) of its scope covered":0 -; CHECK: "vars with [70%,80%) of its scope covered":0 -; CHECK: "vars with [80%,90%) of its scope covered":1 -; CHECK: "vars with [90%,100%) of its scope covered":0 -; CHECK: "vars with 100% of its scope covered":1 -; CHECK: "vars (excluding the debug entry values) with 0% of its scope covered":1 -; CHECK: "vars (excluding the debug entry values) with (0%,10%) of its scope covered":0 -; CHECK: "vars (excluding the debug entry values) with [10%,20%) of its scope covered":0 -; CHECK: "vars (excluding the debug entry values) with [20%,30%) of its scope covered":0 -; CHECK: "vars (excluding the debug entry values) with [30%,40%) of its scope covered":0 -; CHECK: "vars (excluding the debug entry values) with [40%,50%) of its scope covered":0 -; CHECK: "vars (excluding the debug entry values) with [50%,60%) of its scope covered":1 -; CHECK: "vars (excluding the debug entry values) with [60%,70%) of its scope covered":0 -; CHECK: "vars (excluding the debug entry values) with [70%,80%) of its scope covered":0 -; CHECK: "vars (excluding the debug entry values) with [80%,90%) of its scope covered":1 -; CHECK: "vars (excluding the debug entry values) with [90%,100%) of its scope covered":0 -; CHECK: "vars (excluding the debug entry values) with 100% of its scope covered":1 -; + +; CHECK: "sum_all_variables(#bytes in parent scope covered by DW_OP_entry_value)":5 +; CHECK: "sum_all_params(#bytes in parent scope)":20 +; CHECK: "sum_all_params(#bytes in parent scope covered by DW_AT_location)":20 +; CHECK: "sum_all_params(#bytes in parent scope covered by DW_OP_entry_value)":5 +; CHECK: "sum_all_local_vars(#bytes in parent scope)":90 +; CHECK: "sum_all_local_vars(#bytes in parent scope covered by DW_AT_location)":60 +; CHECK: "sum_all_local_vars(#bytes in parent scope covered by DW_OP_entry_value)":0 +; CHECK: "#variables processed by location statistics":6 +; CHECK: "#variables with 0% of parent scope covered by DW_AT_location":1 +; CHECK: "#variables with (0%,10%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables with [10%,20%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables with [20%,30%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables with [30%,40%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables with [40%,50%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables with [50%,60%) of parent scope covered by DW_AT_location":1 +; CHECK: "#variables with [60%,70%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables with [70%,80%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables with [80%,90%) of parent scope covered by DW_AT_location":1 +; CHECK: "#variables with [90%,100%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables with 100% of parent scope covered by DW_AT_location":3 +; CHECK: "#variables - entry values with 0% of parent scope covered by DW_AT_location":1 +; CHECK: "#variables - entry values with (0%,10%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables - entry values with [10%,20%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables - entry values with [20%,30%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables - entry values with [30%,40%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables - entry values with [40%,50%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables - entry values with [50%,60%) of parent scope covered by DW_AT_location":2 +; CHECK: "#variables - entry values with [60%,70%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables - entry values with [70%,80%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables - entry values with [80%,90%) of parent scope covered by DW_AT_location":1 +; CHECK: "#variables - entry values with [90%,100%) of parent scope covered by DW_AT_location":0 +; CHECK: "#variables - entry values with 100% of parent scope covered by DW_AT_location":2 +; CHECK: "#params processed by location statistics":2 +; CHECK: "#params with 0% of parent scope covered by DW_AT_location":0 +; CHECK: "#params with (0%,10%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params with [10%,20%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params with [20%,30%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params with [30%,40%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params with [40%,50%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params with [50%,60%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params with [60%,70%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params with [70%,80%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params with [80%,90%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params with [90%,100%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params with 100% of parent scope covered by DW_AT_location":2 +; CHECK: "#params - entry values with 0% of parent scope covered by DW_AT_location":0 +; CHECK: "#params - entry values with (0%,10%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params - entry values with [10%,20%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params - entry values with [20%,30%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params - entry values with [30%,40%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params - entry values with [40%,50%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params - entry values with [50%,60%) of parent scope covered by DW_AT_location":1 +; CHECK: "#params - entry values with [60%,70%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params - entry values with [70%,80%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params - entry values with [80%,90%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params - entry values with [90%,100%) of parent scope covered by DW_AT_location":0 +; CHECK: "#params - entry values with 100% of parent scope covered by DW_AT_location":1 +; CHECK: "#local vars processed by location statistics":4 +; CHECK: "#local vars with 0% of parent scope covered by DW_AT_location":1 +; CHECK: "#local vars with (0%,10%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars with [10%,20%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars with [20%,30%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars with [30%,40%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars with [40%,50%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars with [50%,60%) of parent scope covered by DW_AT_location":1 +; CHECK: "#local vars with [60%,70%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars with [70%,80%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars with [80%,90%) of parent scope covered by DW_AT_location":1 +; CHECK: "#local vars with [90%,100%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars with 100% of parent scope covered by DW_AT_location":1 +; CHECK: "#local vars - entry values with 0% of parent scope covered by DW_AT_location":1 +; CHECK: "#local vars - entry values with (0%,10%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars - entry values with [10%,20%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars - entry values with [20%,30%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars - entry values with [30%,40%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars - entry values with [40%,50%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars - entry values with [50%,60%) of parent scope covered by DW_AT_location":1 +; CHECK: "#local vars - entry values with [60%,70%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars - entry values with [70%,80%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars - entry values with [80%,90%) of parent scope covered by DW_AT_location":1 +; CHECK: "#local vars - entry values with [90%,100%) of parent scope covered by DW_AT_location":0 +; CHECK: "#local vars - entry values with 100% of parent scope covered by DW_AT_location":1 + ; The source code of the test case: ; extern void fn3(int *); ; extern void fn2 (int); @@ -102,7 +102,7 @@ ; fn2 (a); ; u --; ; } -; + ; __attribute__((noinline)) ; int f() ; { @@ -112,7 +112,7 @@ ; fn1 (l, k); ; return 0; ; } -; + ; ModuleID = 'test.c' source_filename = "test.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/tools/llvm-dwarfdump/X86/statistics-base-address.s b/llvm/test/tools/llvm-dwarfdump/X86/statistics-base-address.s index 1a933f5a0994f..ca9b7c9781e1a 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/statistics-base-address.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/statistics-base-address.s @@ -5,8 +5,8 @@ # RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj -o %t # RUN: llvm-dwarfdump --statistics %t | FileCheck %s -# CHECK: "vars scope bytes total":12 -# CHECK: "vars scope bytes covered":8 +# CHECK: "sum_all_local_vars(#bytes in parent scope)":12 +# CHECK: "sum_all_local_vars(#bytes in parent scope covered by DW_AT_location)":8 .text diff --git a/llvm/test/tools/llvm-dwarfdump/X86/statistics-dwo.test b/llvm/test/tools/llvm-dwarfdump/X86/statistics-dwo.test index 1ca1503f4e70b..1b8e63986758d 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/statistics-dwo.test +++ b/llvm/test/tools/llvm-dwarfdump/X86/statistics-dwo.test @@ -69,28 +69,28 @@ RUN: llvm-dwarfdump --statistics statistics-fib.split-dwarf.o | FileCheck %s # } # -CHECK: "version":4 -CHECK: "source functions":3 -CHECK: "source functions with location":3 -CHECK: "inlined functions":7 -CHECK: "inlined funcs with abstract origins":7 -CHECK: "unique source variables":9 -CHECK: "source variables":30 +CHECK: "version":5 +CHECK: "#functions":3 +CHECK: "#functions with location":3 +CHECK: "#inlined functions":7 +CHECK: "#inlined functions with abstract origins":7 +CHECK: "#unique source variables":9 +CHECK: "#source variables":30 # Ideally the value below would be 33 but currently it's not. -CHECK: "variables with location":22 -CHECK: "call site entries":7 -CHECK: "scope bytes total":2817 -CHECK: "scope bytes covered":1160 -CHECK: "total function size":594 -CHECK: "total inlined function size":345 -CHECK: "total formal params":12 -CHECK: "formal params with source location":12 -CHECK: "formal params with type":12 -CHECK: "formal params with binary location":12 -CHECK: "total vars":18 -CHECK: "vars with source location":18 -CHECK: "vars with type":18 +CHECK: "#source variables with location":22 +CHECK: "#call site entries":7 +CHECK: "sum_all_variables(#bytes in parent scope)":2817 +CHECK: "sum_all_variables(#bytes in parent scope covered by DW_AT_location)":1160 +CHECK: "#bytes witin functions":594 +CHECK: "#bytes witin inlined functions":345 +CHECK: "#params":12 +CHECK: "#params with source location":12 +CHECK: "#params with type":12 +CHECK: "#params with binary location":12 +CHECK: "#local vars":18 +CHECK: "#local vars with source location":18 +CHECK: "#local vars with type":18 # Ideally the value below would be 18, but currently it's not. -CHECK: "vars with binary location":10 +CHECK: "#local vars with binary location":10 diff --git a/llvm/test/tools/llvm-dwarfdump/X86/statistics-v3.test b/llvm/test/tools/llvm-dwarfdump/X86/statistics-v3.test index a16ae8c30ced0..257849b46f3c1 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/statistics-v3.test +++ b/llvm/test/tools/llvm-dwarfdump/X86/statistics-v3.test @@ -64,28 +64,28 @@ RUN: llvm-dwarfdump --statistics %t-statistics-fib.o | FileCheck %s # } # -CHECK: "version":4 -CHECK: "source functions":3 -CHECK: "source functions with location":3 -CHECK: "inlined functions":8 -CHECK: "inlined funcs with abstract origins":8 -CHECK: "unique source variables":9 -CHECK: "source variables":33 +CHECK: "version":5 +CHECK: "#functions":3 +CHECK: "#functions with location":3 +CHECK: "#inlined functions":8 +CHECK: "#inlined functions with abstract origins":8 +CHECK: "#unique source variables":9 +CHECK: "#source variables":33 # Ideally the value below would be 33 but currently it's not. -CHECK: "variables with location":24 -CHECK: "call site entries":8 -CHECK: "scope bytes total":3072 -CHECK: "scope bytes covered":1188 -CHECK: "total function size":636 -CHECK: "total inlined function size":388 -CHECK: "total formal params":13 -CHECK: "formal params with source location":13 -CHECK: "formal params with type":13 -CHECK: "formal params with binary location":13 -CHECK: "total vars":20 -CHECK: "vars with source location":20 -CHECK: "vars with type":20 +CHECK: "#source variables with location":24 +CHECK: "#call site entries":8 +CHECK: "sum_all_variables(#bytes in parent scope)":3072 +CHECK: "sum_all_variables(#bytes in parent scope covered by DW_AT_location)":1188 +CHECK: "#bytes witin functions":636 +CHECK: "#bytes witin inlined functions":388 +CHECK: "#params":13 +CHECK: "#params with source location":13 +CHECK: "#params with type":13 +CHECK: "#params with binary location":13 +CHECK: "#local vars":20 +CHECK: "#local vars with source location":20 +CHECK: "#local vars with type":20 # Ideally the value below would be 20, but currently it's not. -CHECK: "vars with binary location":11 +CHECK: "#local vars with binary location":11 diff --git a/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll b/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll index 33a091ca811c6..2683db3efbaba 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll +++ b/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll @@ -1,6 +1,6 @@ ; RUN: llc -O0 %s -o - -filetype=obj \ ; RUN: | llvm-dwarfdump -statistics - | FileCheck %s -; CHECK: "version":4 +; CHECK: "version":5 ; namespace test { ; extern int a; @@ -35,24 +35,24 @@ ; - non-constant member S:fn, ; - arguments of S:fn. -; CHECK: "unique source variables":9 +; CHECK: "#unique source variables":9 ; +1 extra inline i. -; CHECK: "source variables":10 +; CHECK: "#source variables":10 ; -1 square::i -; CHECK: "variables with location":9 -; CHECK: "scope bytes total":[[BYTES:[0-9]+]] +; CHECK: "#source variables with location":9 +; CHECK: "sum_all_local_vars(#bytes in parent scope)":[[BYTES:[0-9]+]] ; Because of the dbg.value in the middle of the function, the pc range coverage ; must be below 100%. -; CHECK-NOT: "scope bytes covered":0 -; CHECK-NOT: "scope bytes covered":[[BYTES]] -; CHECK: "scope bytes covered": -; CHECK: "total function size":[[FUNCSIZE:[0-9]+]] -; CHECK: "total inlined function size":[[INLINESIZE:[0-9]+]] -; CHECK: "size of __debug_info":380 -; CHECK: "size of __debug_loc":35 -; CHECK: "size of __debug_abbrev":303 -; CHECK: "size of __debug_line":117 -; CHECK: "size of __debug_str":204 +; CHECK-NOT: "sum_all_local_vars(#bytes in parent scope covered by DW_AT_location)":0 +; CHECK-NOT "sum_all_local_vars(#bytes in parent scope covered by DW_AT_location)":[[BYTES]] +; CHECK: "sum_all_local_vars(#bytes in parent scope covered by DW_AT_location)": +; CHECK: "#bytes witin functions":[[FUNCSIZE:[0-9]+]] +; CHECK: "#bytes witin inlined functions":[[INLINESIZE:[0-9]+]] +; CHECK: "#bytes in __debug_info":380 +; CHECK: "#bytes in __debug_loc":35 +; CHECK: "#bytes in __debug_abbrev":303 +; CHECK: "#bytes in __debug_line":117 +; CHECK: "#bytes in __debug_str":204 ; ModuleID = '/tmp/quality.cpp' source_filename = "/tmp/quality.cpp" diff --git a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll index d126757398ffe..7f7e3438c14f5 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll +++ b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll @@ -12,8 +12,8 @@ ; foo(&local1, arg2, 10, 15, arg3 + 3, arg1 + arg2); ; } ; -; CHECK: "call site DIEs":2 -; CHECK-SAME: "call site parameter DIEs":6 +; CHECK: "#call site DIEs":2 +; CHECK-SAME: "#call site parameter DIEs":6 ; ; ModuleID = 'test.c' source_filename = "test.c" diff --git a/llvm/test/tools/llvm-dwarfdump/X86/stats-inlining-multi-cu.ll b/llvm/test/tools/llvm-dwarfdump/X86/stats-inlining-multi-cu.ll index e6e193c2004da..0615e867fca18 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/stats-inlining-multi-cu.ll +++ b/llvm/test/tools/llvm-dwarfdump/X86/stats-inlining-multi-cu.ll @@ -3,11 +3,11 @@ ; Test that abstract origins in multiple CUs are uniqued. -; CHECK: "source functions":4, -; CHECK-SAME: "inlined functions":2, -; CHECK-SAME: "unique source variables":4 -; CHECK-SAME: "source variables":6 -; CHECK-SAME: "variables with location":6 +; CHECK: "#functions":4, +; CHECK-SAME: "#inlined functions":2, +; CHECK-SAME: "#unique source variables":4 +; CHECK-SAME: "#source variables":6 +; CHECK-SAME: "#source variables with location":6 ;header.h: ;extern "C" int getchar(); diff --git a/llvm/test/tools/llvm-dwarfdump/X86/stats-inlining-single-cu.ll b/llvm/test/tools/llvm-dwarfdump/X86/stats-inlining-single-cu.ll index 5a4c3541a5c44..c56e6a55049a3 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/stats-inlining-single-cu.ll +++ b/llvm/test/tools/llvm-dwarfdump/X86/stats-inlining-single-cu.ll @@ -4,11 +4,11 @@ ; This test serves as a baseline / sanity-check for stats-inlining-multi-cu.ll ; The results for both tests should be identical. -; CHECK: "source functions":4, -; CHECK-SAME: "inlined functions":2, -; CHECK-SAME: "unique source variables":4 -; CHECK-SAME: "source variables":6 -; CHECK-SAME: "variables with location":6 +; CHECK: "#functions":4, +; CHECK-SAME: "#inlined functions":2, +; CHECK-SAME: "#unique source variables":4 +; CHECK-SAME: "#source variables":6 +; CHECK-SAME: "#source variables with location":6 ;header.h: ;extern "C" int getchar(); diff --git a/llvm/test/tools/llvm-dwarfdump/X86/stats-mulitple-cu-out-of-line.ll b/llvm/test/tools/llvm-dwarfdump/X86/stats-mulitple-cu-out-of-line.ll index 53c202babb231..cc644d1bdb5c6 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/stats-mulitple-cu-out-of-line.ll +++ b/llvm/test/tools/llvm-dwarfdump/X86/stats-mulitple-cu-out-of-line.ll @@ -18,12 +18,12 @@ ; #include "test.h" ; int far() { return foo(42); } -; CHECK: "source functions":3 -; CHECK-SAME: "source functions with location":3 -; CHECK-SAME: "inlined functions":0 -; CHECK-SAME: "unique source variables":1 -; CHECK-SAME: "source variables":2 -; CHECK-SAME: "variables with location":2 +; CHECK: "#functions":3 +; CHECK-SAME: "#functions with location":3 +; CHECK-SAME: "#inlined functions":0 +; CHECK-SAME: "#unique source variables":1 +; CHECK-SAME: "#source variables":2 +; CHECK-SAME: "#source variables with location":2 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/tools/llvm-dwarfdump/X86/stats-multiple-cu-members.ll b/llvm/test/tools/llvm-dwarfdump/X86/stats-multiple-cu-members.ll index 5cc529e9dc0d4..c5ea22c48b45f 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/stats-multiple-cu-members.ll +++ b/llvm/test/tools/llvm-dwarfdump/X86/stats-multiple-cu-members.ll @@ -16,8 +16,8 @@ ; #include "test.h" ; s S2; -; CHECK: "source variables":4 -; CHECK-SAME: "variables with location":4 +; CHECK: "#source variables":4 +; CHECK-SAME: "#source variables with location":4 source_filename = "llvm-link" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/tools/llvm-dwarfdump/X86/stats-multiple-cu-same-name.ll b/llvm/test/tools/llvm-dwarfdump/X86/stats-multiple-cu-same-name.ll index bfcc1b8bdfe78..6edda0512212e 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/stats-multiple-cu-same-name.ll +++ b/llvm/test/tools/llvm-dwarfdump/X86/stats-multiple-cu-same-name.ll @@ -3,9 +3,9 @@ ; Test that statistics distinguish functions with the same name. -; CHECK: "source functions":4, -; CHECK-SAME: "unique source variables":2 -; CHECK-SAME: "source variables":2 +; CHECK: "#functions":4, +; CHECK-SAME: "#unique source variables":2 +; CHECK-SAME: "#source variables":2 ; $ cat test1.cpp ; static int foo(int a) { diff --git a/llvm/test/tools/llvm-gsymutil/X86/macho-invalid-section-offset.yaml b/llvm/test/tools/llvm-gsymutil/X86/macho-invalid-section-offset.yaml new file mode 100644 index 0000000000000..e9afbb673fde2 --- /dev/null +++ b/llvm/test/tools/llvm-gsymutil/X86/macho-invalid-section-offset.yaml @@ -0,0 +1,36 @@ +## This test ensures that LLVM will not crash when converting a Mach-O object +## file with a malformed symbol whose n_sect points to an invalid offset. + +# RUN: yaml2obj %s -o %t +# RUN: not llvm-gsymutil --convert %t -o %t.o 2>&1 | FileCheck %s + +# CHECK: Loaded 0 functions from symbol table. + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x0000000A + ncmds: 1 + sizeofcmds: 100 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 4096 + nsyms: 1 + stroff: 4144 + strsize: 6 +LinkEditData: + NameList: + - n_strx: 2 # _foo + n_type: 0x0e + n_sect: 3 # Points to an invalid offset. + n_desc: 0 + n_value: 1234 + StringTable: + - '' + - '' + - _foo diff --git a/llvm/test/tools/llvm-lib/no-inputs.test b/llvm/test/tools/llvm-lib/no-inputs.test index 95d6555d58c6c..22e096f78ce07 100644 --- a/llvm/test/tools/llvm-lib/no-inputs.test +++ b/llvm/test/tools/llvm-lib/no-inputs.test @@ -1,2 +1,7 @@ -RUN: llvm-lib -out:%t.a -RUN: test ! -e %t.a +RUN: rm -f %t.lib +RUN: llvm-lib -out:%t.lib +RUN: test ! -e %t.lib + +RUN: llvm-lib /llvmlibempty -out:%t.lib +RUN: FileCheck %s < %t.lib +CHECK: ! diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s index 0cd5b6ef9b7d9..cf0e1bff34f77 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st1.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 st1 {v0.s}[0], [sp] st1 {v0.2s}, [sp] diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s index 94ac16da6d849..b4d2b582e1435 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st2.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 st2 {v0.s, v1.s}[0], [sp] st2 {v0.2s, v1.2s}, [sp] diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s index 564e408c4d144..29f8079acd8cb 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st3.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 st3 {v0.s, v1.s, v2.s}[0], [sp] st3 {v0.2s, v1.2s, v2.2s}, [sp] diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s index 37283f973bc4f..7aa69b0f34d2f 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/asimd-st4.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -mtriple=aarch64-linux-gnu -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 st4 {v0.s, v1.s, v2.s, v3.s}[0], [sp] st4 {v0.2s, v1.2s, v2.2s, v3.2s}, [sp] diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s index 55d1d60252b76..5b7004b817ff7 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/float-store.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 stur d0, [sp, #2] stur q0, [sp, #16] diff --git a/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s b/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s index b86cdac50e6e2..3c7d412995be1 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s +++ b/llvm/test/tools/llvm-mca/AArch64/Exynos/store.s @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M3 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M4 -# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,M5 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M3 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m4 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M4 +# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m5 -resource-pressure=false -noalias=false < %s | FileCheck %s -check-prefixes=ALL,M5 stur x0, [sp, #8] strb w0, [sp], #1 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s b/llvm/test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s index adf6c10d74936..b600e387459fb 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s @@ -47,12 +47,12 @@ movaps %xmm3, (%rbx) # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 208 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.92 -# CHECK-NEXT: IPC: 1.92 +# CHECK-NEXT: uOps Per Cycle: 1.93 +# CHECK-NEXT: IPC: 1.93 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -72,22 +72,21 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (16.3%) -# CHECK-NEXT: 2, 148 (71.2%) -# CHECK-NEXT: 4, 26 (12.5%) +# CHECK-NEXT: 0, 33 (15.9%) +# CHECK-NEXT: 2, 148 (71.5%) +# CHECK-NEXT: 4, 26 (12.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 10 (4.8%) -# CHECK-NEXT: 2, 195 (93.8%) +# CHECK-NEXT: 0, 7 (3.4%) +# CHECK-NEXT: 2, 200 (96.6%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -116,16 +115,16 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movb %spl, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movb (%rcx), %bpl -# CHECK-NEXT: - - - - - - 0.95 0.05 movb (%rdx), %sil -# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movb %dil, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movb (%rdx), %sil +# CHECK-NEXT: - - - - 1.00 - 1.00 - movb %dil, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movb %spl, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movb (%rcx), %bpl -# CHECK-NEXT: [0,2] D=eeeeeER. movb (%rdx), %sil -# CHECK-NEXT: [0,3] D======eER movb %dil, (%rbx) +# CHECK: [0,0] DeER . . movb %spl, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movb (%rcx), %bpl +# CHECK-NEXT: [0,2] D=eeeeeER movb (%rdx), %sil +# CHECK-NEXT: [0,3] D=eE----R movb %dil, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -137,19 +136,19 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movb (%rdx), %sil -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movb %dil, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.0 # CHECK: [1] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 208 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.92 -# CHECK-NEXT: IPC: 1.92 +# CHECK-NEXT: uOps Per Cycle: 1.93 +# CHECK-NEXT: IPC: 1.93 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -169,22 +168,21 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (16.3%) -# CHECK-NEXT: 2, 148 (71.2%) -# CHECK-NEXT: 4, 26 (12.5%) +# CHECK-NEXT: 0, 33 (15.9%) +# CHECK-NEXT: 2, 148 (71.5%) +# CHECK-NEXT: 4, 26 (12.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 10 (4.8%) -# CHECK-NEXT: 2, 195 (93.8%) +# CHECK-NEXT: 0, 7 (3.4%) +# CHECK-NEXT: 2, 200 (96.6%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -213,16 +211,16 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movw %sp, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movw (%rcx), %bp -# CHECK-NEXT: - - - - - - 0.95 0.05 movw (%rdx), %si -# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movw %di, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movw (%rdx), %si +# CHECK-NEXT: - - - - 1.00 - 1.00 - movw %di, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movw %sp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movw (%rcx), %bp -# CHECK-NEXT: [0,2] D=eeeeeER. movw (%rdx), %si -# CHECK-NEXT: [0,3] D======eER movw %di, (%rbx) +# CHECK: [0,0] DeER . . movw %sp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movw (%rcx), %bp +# CHECK-NEXT: [0,2] D=eeeeeER movw (%rdx), %si +# CHECK-NEXT: [0,3] D=eE----R movw %di, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -234,19 +232,19 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movw (%rdx), %si -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movw %di, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movw %di, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.0 # CHECK: [2] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 208 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.92 -# CHECK-NEXT: IPC: 1.92 +# CHECK-NEXT: uOps Per Cycle: 1.93 +# CHECK-NEXT: IPC: 1.93 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -266,22 +264,21 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (16.3%) -# CHECK-NEXT: 2, 148 (71.2%) -# CHECK-NEXT: 4, 26 (12.5%) +# CHECK-NEXT: 0, 33 (15.9%) +# CHECK-NEXT: 2, 148 (71.5%) +# CHECK-NEXT: 4, 26 (12.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 10 (4.8%) -# CHECK-NEXT: 2, 195 (93.8%) +# CHECK-NEXT: 0, 7 (3.4%) +# CHECK-NEXT: 2, 200 (96.6%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -310,16 +307,16 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movl %esp, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movl (%rcx), %ebp -# CHECK-NEXT: - - - - - - 0.95 0.05 movl (%rdx), %esi -# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movl %edi, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movl (%rdx), %esi +# CHECK-NEXT: - - - - 1.00 - 1.00 - movl %edi, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movl %esp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movl (%rcx), %ebp -# CHECK-NEXT: [0,2] D=eeeeeER. movl (%rdx), %esi -# CHECK-NEXT: [0,3] D======eER movl %edi, (%rbx) +# CHECK: [0,0] DeER . . movl %esp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movl (%rcx), %ebp +# CHECK-NEXT: [0,2] D=eeeeeER movl (%rdx), %esi +# CHECK-NEXT: [0,3] D=eE----R movl %edi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -331,19 +328,19 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movl (%rdx), %esi -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movl %edi, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.0 # CHECK: [3] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 208 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.92 -# CHECK-NEXT: IPC: 1.92 +# CHECK-NEXT: uOps Per Cycle: 1.93 +# CHECK-NEXT: IPC: 1.93 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -363,22 +360,21 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (16.3%) -# CHECK-NEXT: 2, 148 (71.2%) -# CHECK-NEXT: 4, 26 (12.5%) +# CHECK-NEXT: 0, 33 (15.9%) +# CHECK-NEXT: 2, 148 (71.5%) +# CHECK-NEXT: 4, 26 (12.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 10 (4.8%) -# CHECK-NEXT: 2, 195 (93.8%) +# CHECK-NEXT: 0, 7 (3.4%) +# CHECK-NEXT: 2, 200 (96.6%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -407,16 +403,16 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movq %rsp, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movq (%rcx), %rbp -# CHECK-NEXT: - - - - - - 0.95 0.05 movq (%rdx), %rsi -# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movq %rdi, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movq (%rdx), %rsi +# CHECK-NEXT: - - - - 1.00 - 1.00 - movq %rdi, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movq %rsp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movq (%rcx), %rbp -# CHECK-NEXT: [0,2] D=eeeeeER. movq (%rdx), %rsi -# CHECK-NEXT: [0,3] D======eER movq %rdi, (%rbx) +# CHECK: [0,0] DeER . . movq %rsp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movq (%rcx), %rbp +# CHECK-NEXT: [0,2] D=eeeeeER movq (%rdx), %rsi +# CHECK-NEXT: [0,3] D=eE----R movq %rdi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -428,19 +424,19 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movq (%rdx), %rsi -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movq %rdi, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.0 # CHECK: [4] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 208 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.92 -# CHECK-NEXT: IPC: 1.92 +# CHECK-NEXT: uOps Per Cycle: 1.93 +# CHECK-NEXT: IPC: 1.93 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -460,22 +456,21 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (71.0%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (16.3%) -# CHECK-NEXT: 2, 148 (71.2%) -# CHECK-NEXT: 4, 26 (12.5%) +# CHECK-NEXT: 0, 33 (15.9%) +# CHECK-NEXT: 2, 148 (71.5%) +# CHECK-NEXT: 4, 26 (12.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 10 (4.8%) -# CHECK-NEXT: 2, 195 (93.8%) +# CHECK-NEXT: 0, 7 (3.4%) +# CHECK-NEXT: 2, 200 (96.6%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -504,16 +499,16 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movd %mm0, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movd (%rcx), %mm1 -# CHECK-NEXT: - - - - - - 0.95 0.05 movd (%rdx), %mm2 -# CHECK-NEXT: - - - - 1.00 - 0.05 0.95 movd %mm3, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movd (%rdx), %mm2 +# CHECK-NEXT: - - - - 1.00 - 1.00 - movd %mm3, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movd %mm0, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movd (%rcx), %mm1 -# CHECK-NEXT: [0,2] D=eeeeeER. movd (%rdx), %mm2 -# CHECK-NEXT: [0,3] D======eER movd %mm3, (%rbx) +# CHECK: [0,0] DeER . . movd %mm0, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movd (%rcx), %mm1 +# CHECK-NEXT: [0,2] D=eeeeeER movd (%rdx), %mm2 +# CHECK-NEXT: [0,3] D=eE----R movd %mm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -525,19 +520,19 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 4.0 movd %mm3, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.0 # CHECK: [5] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 209 +# CHECK-NEXT: Total Cycles: 208 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.91 -# CHECK-NEXT: IPC: 1.91 +# CHECK-NEXT: uOps Per Cycle: 1.92 +# CHECK-NEXT: IPC: 1.92 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -557,22 +552,21 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.3%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 147 (70.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 35 (16.7%) -# CHECK-NEXT: 2, 148 (70.8%) -# CHECK-NEXT: 4, 26 (12.4%) +# CHECK-NEXT: 0, 34 (16.3%) +# CHECK-NEXT: 2, 148 (71.2%) +# CHECK-NEXT: 4, 26 (12.5%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (1.4%) -# CHECK-NEXT: 1, 12 (5.7%) -# CHECK-NEXT: 2, 194 (92.8%) +# CHECK-NEXT: 0, 8 (3.8%) +# CHECK-NEXT: 2, 200 (96.2%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -601,17 +595,16 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - - - 1.00 - - 1.00 movaps %xmm0, (%rax) # CHECK-NEXT: - - - - - - 1.00 - movaps (%rcx), %xmm1 -# CHECK-NEXT: - - - - - - 0.94 0.06 movaps (%rdx), %xmm2 -# CHECK-NEXT: - - - - 1.00 - 0.06 0.94 movaps %xmm3, (%rbx) +# CHECK-NEXT: - - - - - - - 1.00 movaps (%rdx), %xmm2 +# CHECK-NEXT: - - - - 1.00 - 1.00 - movaps %xmm3, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . movaps %xmm0, (%rax) -# CHECK-NEXT: [0,1] DeeeeeeER . movaps (%rcx), %xmm1 -# CHECK-NEXT: [0,2] D=eeeeeeER. movaps (%rdx), %xmm2 -# CHECK-NEXT: [0,3] D=======eER movaps %xmm3, (%rbx) +# CHECK: [0,0] DeER . . movaps %xmm0, (%rax) +# CHECK-NEXT: [0,1] DeeeeeeER. movaps (%rcx), %xmm1 +# CHECK-NEXT: [0,2] D=eeeeeeER movaps (%rdx), %xmm2 +# CHECK-NEXT: [0,3] D=eE-----R movaps %xmm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -623,5 +616,5 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movaps (%rdx), %xmm2 -# CHECK-NEXT: 3. 1 8.0 0.0 0.0 movaps %xmm3, (%rbx) -# CHECK-NEXT: 1 3.0 1.0 0.0 +# CHECK-NEXT: 3. 1 2.0 0.0 5.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 1.5 1.0 1.3 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/store-throughput.s b/llvm/test/tools/llvm-mca/X86/Barcelona/store-throughput.s index 08a9c47302267..7d1fb6c246309 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/store-throughput.s @@ -135,10 +135,10 @@ movaps %xmm3, (%rbx) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movb %bpl, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movb %sil, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movb %dil, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movb %bpl, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movb %sil, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [1] Code Region @@ -232,10 +232,10 @@ movaps %xmm3, (%rbx) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movw %bp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movw %si, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movw %di, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movw %bp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movw %si, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movw %di, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [2] Code Region @@ -329,10 +329,10 @@ movaps %xmm3, (%rbx) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movl %ebp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movl %esi, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movl %edi, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movl %ebp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movl %esi, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [3] Code Region @@ -426,10 +426,10 @@ movaps %xmm3, (%rbx) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movq %rbp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movq %rsi, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movq %rdi, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movq %rbp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movq %rsi, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [4] Code Region @@ -620,7 +620,7 @@ movaps %xmm3, (%rbx) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movaps %xmm1, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movaps %xmm2, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movaps %xmm3, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movaps %xmm1, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movaps %xmm2, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s index f326028e12ab5..4b8f9e7e06cda 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s @@ -72,23 +72,24 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 171 (55.9%) +# CHECK-NEXT: 2, 85 (27.8%) +# CHECK-NEXT: 3, 1 (0.3%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 200 (65.4%) +# CHECK-NEXT: 2, 100 (32.7%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -99,8 +100,8 @@ movaps %xmm3, (%rbx) # CHECK: [1] [2] [3] [4] # CHECK-NEXT: PdEX 36 40 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 19 22 40 -# CHECK-NEXT: PdStore 20 23 24 +# CHECK-NEXT: PdLoad 21 24 40 +# CHECK-NEXT: PdStore 18 21 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -133,18 +134,18 @@ movaps %xmm3, (%rbx) # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movb %spl, (%rax) +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movb %spl, (%rax) # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movb (%rcx), %bpl # CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movb (%rdx), %sil -# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movb %dil, (%rbx) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movb %dil, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movb %spl, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movb (%rcx), %bpl -# CHECK-NEXT: [0,2] D=eeeeeER. movb (%rdx), %sil -# CHECK-NEXT: [0,3] D======eER movb %dil, (%rbx) +# CHECK: [0,0] DeER . . movb %spl, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movb (%rcx), %bpl +# CHECK-NEXT: [0,2] D=eeeeeER movb (%rdx), %sil +# CHECK-NEXT: [0,3] D==eE---R movb %dil, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -156,8 +157,8 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movb (%rdx), %sil -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movb %dil, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 1.8 1.3 0.8 # CHECK: [1] Code Region @@ -188,23 +189,24 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 171 (55.9%) +# CHECK-NEXT: 2, 85 (27.8%) +# CHECK-NEXT: 3, 1 (0.3%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 200 (65.4%) +# CHECK-NEXT: 2, 100 (32.7%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -215,8 +217,8 @@ movaps %xmm3, (%rbx) # CHECK: [1] [2] [3] [4] # CHECK-NEXT: PdEX 36 40 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 19 22 40 -# CHECK-NEXT: PdStore 20 23 24 +# CHECK-NEXT: PdLoad 21 24 40 +# CHECK-NEXT: PdStore 18 21 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -249,18 +251,18 @@ movaps %xmm3, (%rbx) # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movw %sp, (%rax) +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movw %sp, (%rax) # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movw (%rcx), %bp # CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movw (%rdx), %si -# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movw %di, (%rbx) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movw %di, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movw %sp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movw (%rcx), %bp -# CHECK-NEXT: [0,2] D=eeeeeER. movw (%rdx), %si -# CHECK-NEXT: [0,3] D======eER movw %di, (%rbx) +# CHECK: [0,0] DeER . . movw %sp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movw (%rcx), %bp +# CHECK-NEXT: [0,2] D=eeeeeER movw (%rdx), %si +# CHECK-NEXT: [0,3] D==eE---R movw %di, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -272,8 +274,8 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movw (%rdx), %si -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movw %di, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movw %di, (%rbx) +# CHECK-NEXT: 1 1.8 1.3 0.8 # CHECK: [2] Code Region @@ -304,23 +306,24 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 171 (55.9%) +# CHECK-NEXT: 2, 85 (27.8%) +# CHECK-NEXT: 3, 1 (0.3%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 200 (65.4%) +# CHECK-NEXT: 2, 100 (32.7%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -331,8 +334,8 @@ movaps %xmm3, (%rbx) # CHECK: [1] [2] [3] [4] # CHECK-NEXT: PdEX 36 40 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 19 22 40 -# CHECK-NEXT: PdStore 20 23 24 +# CHECK-NEXT: PdLoad 21 24 40 +# CHECK-NEXT: PdStore 18 21 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -365,18 +368,18 @@ movaps %xmm3, (%rbx) # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movl %esp, (%rax) +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movl %esp, (%rax) # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movl (%rcx), %ebp # CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movl (%rdx), %esi -# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movl %edi, (%rbx) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movl %edi, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movl %esp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movl (%rcx), %ebp -# CHECK-NEXT: [0,2] D=eeeeeER. movl (%rdx), %esi -# CHECK-NEXT: [0,3] D======eER movl %edi, (%rbx) +# CHECK: [0,0] DeER . . movl %esp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movl (%rcx), %ebp +# CHECK-NEXT: [0,2] D=eeeeeER movl (%rdx), %esi +# CHECK-NEXT: [0,3] D==eE---R movl %edi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -388,8 +391,8 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movl (%rdx), %esi -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movl %edi, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 1.8 1.3 0.8 # CHECK: [3] Code Region @@ -420,23 +423,24 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 256 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 171 (55.9%) +# CHECK-NEXT: 2, 85 (27.8%) +# CHECK-NEXT: 3, 1 (0.3%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 200 (65.4%) +# CHECK-NEXT: 2, 100 (32.7%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -447,8 +451,8 @@ movaps %xmm3, (%rbx) # CHECK: [1] [2] [3] [4] # CHECK-NEXT: PdEX 36 40 40 # CHECK-NEXT: PdFPU 0 0 64 -# CHECK-NEXT: PdLoad 19 22 40 -# CHECK-NEXT: PdStore 20 23 24 +# CHECK-NEXT: PdLoad 21 24 40 +# CHECK-NEXT: PdStore 18 21 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -481,18 +485,18 @@ movaps %xmm3, (%rbx) # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: 0.96 0.04 - - - - - - - - - - - - - - - - - - - - 1.00 movq %rsp, (%rax) +# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - - - 1.00 movq %rsp, (%rax) # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - - 2.00 - - movq (%rcx), %rbp # CHECK-NEXT: - 2.00 - - - - - - - - - - - - - - - - - 2.00 - - - movq (%rdx), %rsi -# CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movq %rdi, (%rbx) +# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movq %rdi, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movq %rsp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movq (%rcx), %rbp -# CHECK-NEXT: [0,2] D=eeeeeER. movq (%rdx), %rsi -# CHECK-NEXT: [0,3] D======eER movq %rdi, (%rbx) +# CHECK: [0,0] DeER . . movq %rsp, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movq (%rcx), %rbp +# CHECK-NEXT: [0,2] D=eeeeeER movq (%rdx), %rsi +# CHECK-NEXT: [0,3] D==eE---R movq %rdi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -504,14 +508,14 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movq (%rdx), %rsi -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movq %rdi, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 3.0 1.0 3.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 1.8 1.3 0.8 # CHECK: [4] Code Region # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 554 +# CHECK-NEXT: Total Cycles: 553 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -536,24 +540,24 @@ movaps %xmm3, (%rbx) # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 55 (9.9%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 57 (10.3%) # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 437 (78.9%) +# CHECK-NEXT: SQ - Store queue full: 432 (78.1%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 365 (65.9%) +# CHECK-NEXT: 0, 364 (65.8%) # CHECK-NEXT: 1, 88 (15.9%) -# CHECK-NEXT: 2, 3 (0.5%) -# CHECK-NEXT: 3, 86 (15.5%) -# CHECK-NEXT: 4, 12 (2.2%) +# CHECK-NEXT: 2, 4 (0.7%) +# CHECK-NEXT: 3, 84 (15.2%) +# CHECK-NEXT: 4, 13 (2.4%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 253 (45.7%) -# CHECK-NEXT: 1, 202 (36.5%) -# CHECK-NEXT: 2, 99 (17.9%) +# CHECK-NEXT: 0, 253 (45.8%) +# CHECK-NEXT: 1, 200 (36.2%) +# CHECK-NEXT: 2, 100 (18.1%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -599,18 +603,17 @@ movaps %xmm3, (%rbx) # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - - - 3.00 - - - - 1.00 movd %mm0, (%rax) -# CHECK-NEXT: 1.53 1.47 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1 -# CHECK-NEXT: 1.47 1.53 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2 +# CHECK-NEXT: 1.50 1.50 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - movd (%rcx), %mm1 +# CHECK-NEXT: 1.50 1.50 - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rdx), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: 0 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . movd %mm0, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movd (%rcx), %mm1 -# CHECK-NEXT: [0,2] D=eeeeeER . movd (%rdx), %mm2 -# CHECK-NEXT: [0,3] D======eeER movd %mm3, (%rbx) +# CHECK: [0,0] DeeER. . movd %mm0, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movd (%rcx), %mm1 +# CHECK-NEXT: [0,2] D=eeeeeER movd (%rdx), %mm2 +# CHECK-NEXT: [0,3] D===eeE-R movd %mm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -622,8 +625,8 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 4.0 1.0 1.0 movd %mm3, (%rbx) +# CHECK-NEXT: 1 2.0 1.3 0.3 # CHECK: [5] Code Region @@ -668,9 +671,9 @@ movaps %xmm3, (%rbx) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 104 (25.7%) -# CHECK-NEXT: 1, 202 (49.9%) -# CHECK-NEXT: 2, 99 (24.4%) +# CHECK-NEXT: 0, 105 (25.9%) +# CHECK-NEXT: 1, 200 (49.4%) +# CHECK-NEXT: 2, 100 (24.7%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -679,10 +682,10 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: [4] Total number of buffer entries. # CHECK: [1] [2] [3] [4] -# CHECK-NEXT: PdEX 37 40 40 -# CHECK-NEXT: PdFPU 37 40 64 -# CHECK-NEXT: PdLoad 19 22 40 -# CHECK-NEXT: PdStore 20 22 24 +# CHECK-NEXT: PdEX 36 40 40 +# CHECK-NEXT: PdFPU 36 40 64 +# CHECK-NEXT: PdLoad 20 23 40 +# CHECK-NEXT: PdStore 19 21 24 # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -721,12 +724,12 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movaps %xmm3, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movaps %xmm0, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movaps (%rcx), %xmm1 -# CHECK-NEXT: [0,2] D=eeeeeER. movaps (%rdx), %xmm2 -# CHECK-NEXT: [0,3] D======eER movaps %xmm3, (%rbx) +# CHECK: [0,0] DeER . . movaps %xmm0, (%rax) +# CHECK-NEXT: [0,1] DeeeeeER. movaps (%rcx), %xmm1 +# CHECK-NEXT: [0,2] D=eeeeeER movaps (%rdx), %xmm2 +# CHECK-NEXT: [0,3] D===eE--R movaps %xmm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -738,5 +741,5 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movaps (%rdx), %xmm2 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movaps %xmm3, (%rbx) -# CHECK-NEXT: 1 2.8 1.0 0.0 +# CHECK-NEXT: 3. 1 4.0 2.0 2.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 2.0 1.5 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s b/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s index fb96ce5d75610..e1753784c7e64 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s @@ -101,9 +101,9 @@ vmovaps %xmm0, 48(%rdi) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0 # CHECK-NEXT: 1. 1 7.0 1.0 0.0 vmovaps %xmm0, (%rdi) # CHECK-NEXT: 2. 1 1.0 1.0 2.0 vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: 3. 1 8.0 0.0 0.0 vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: 3. 1 8.0 1.0 0.0 vmovaps %xmm0, 16(%rdi) # CHECK-NEXT: 4. 1 3.0 3.0 0.0 vmovaps 32(%rsi), %xmm0 # CHECK-NEXT: 5. 1 9.0 1.0 0.0 vmovaps %xmm0, 32(%rdi) # CHECK-NEXT: 6. 1 3.0 3.0 2.0 vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: 7. 1 10.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) -# CHECK-NEXT: 1 5.3 1.3 0.5 +# CHECK-NEXT: 7. 1 10.0 1.0 0.0 vmovaps %xmm0, 48(%rdi) +# CHECK-NEXT: 1 5.3 1.5 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s index 067301b06a513..c00b1c92a544b 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s @@ -159,10 +159,10 @@ vmovaps %ymm3, (%rbx) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movb %bpl, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movb %sil, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movb %dil, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movb %bpl, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movb %sil, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [1] Code Region @@ -273,10 +273,10 @@ vmovaps %ymm3, (%rbx) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movw %bp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movw %si, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movw %di, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movw %bp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movw %si, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movw %di, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [2] Code Region @@ -387,10 +387,10 @@ vmovaps %ymm3, (%rbx) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movl %ebp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movl %esi, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movl %edi, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movl %ebp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movl %esi, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [3] Code Region @@ -501,10 +501,10 @@ vmovaps %ymm3, (%rbx) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movq %rbp, (%rcx) -# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movq %rsi, (%rdx) -# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movq %rdi, (%rbx) -# CHECK-NEXT: 1 2.5 0.3 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movq %rbp, (%rcx) +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 movq %rsi, (%rdx) +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 2.5 1.0 0.0 # CHECK: [4] Code Region @@ -732,10 +732,10 @@ vmovaps %ymm3, (%rbx) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax) -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movaps %xmm1, (%rcx) -# CHECK-NEXT: 2. 1 4.0 1.0 0.0 movaps %xmm2, (%rdx) -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 movaps %xmm3, (%rbx) -# CHECK-NEXT: 1 3.0 0.5 0.0 +# CHECK-NEXT: 1. 1 2.0 1.0 0.0 movaps %xmm1, (%rcx) +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 movaps %xmm2, (%rdx) +# CHECK-NEXT: 3. 1 5.0 1.0 0.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 3.0 1.3 0.0 # CHECK: [6] Code Region @@ -846,7 +846,7 @@ vmovaps %ymm3, (%rbx) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps %ymm0, (%rax) -# CHECK-NEXT: 1. 1 2.0 1.0 0.0 vmovaps %ymm1, (%rcx) -# CHECK-NEXT: 2. 1 35.0 33.0 0.0 vmovaps %ymm2, (%rdx) -# CHECK-NEXT: 3. 1 36.0 1.0 0.0 vmovaps %ymm3, (%rbx) -# CHECK-NEXT: 1 18.5 9.0 0.0 +# CHECK-NEXT: 1. 1 2.0 2.0 0.0 vmovaps %ymm1, (%rcx) +# CHECK-NEXT: 2. 1 35.0 34.0 0.0 vmovaps %ymm2, (%rdx) +# CHECK-NEXT: 3. 1 36.0 2.0 0.0 vmovaps %ymm3, (%rbx) +# CHECK-NEXT: 1 18.5 9.8 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/independent-load-stores.s b/llvm/test/tools/llvm-mca/X86/BtVer2/independent-load-stores.s new file mode 100644 index 0000000000000..bd202b604458f --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/independent-load-stores.s @@ -0,0 +1,146 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS + + addq $44, 64(%r14) + addq $44, 128(%r14) + addq $44, 192(%r14) + addq $44, 256(%r14) + addq $44, 320(%r14) + addq $44, 384(%r14) + addq $44, 448(%r14) + addq $44, 512(%r14) + addq $44, 576(%r14) + addq $44, 640(%r14) + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1000 + +# NOALIAS-NEXT: Total Cycles: 1008 +# YESALIAS-NEXT: Total Cycles: 6003 + +# ALL-NEXT: Total uOps: 1000 + +# ALL: Dispatch Width: 2 + +# NOALIAS-NEXT: uOps Per Cycle: 0.99 +# NOALIAS-NEXT: IPC: 0.99 + +# YESALIAS-NEXT: uOps Per Cycle: 0.17 +# YESALIAS-NEXT: IPC: 0.17 + +# ALL-NEXT: Block RThroughput: 10.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 6 1.00 * * addq $44, 64(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 128(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 192(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 256(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 320(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 384(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 448(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 512(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 576(%r14) +# ALL-NEXT: 1 6 1.00 * * addq $44, 640(%r14) + +# ALL: Resources: +# ALL-NEXT: [0] - JALU0 +# ALL-NEXT: [1] - JALU1 +# ALL-NEXT: [2] - JDiv +# ALL-NEXT: [3] - JFPA +# ALL-NEXT: [4] - JFPM +# ALL-NEXT: [5] - JFPU0 +# ALL-NEXT: [6] - JFPU1 +# ALL-NEXT: [7] - JLAGU +# ALL-NEXT: [8] - JMul +# ALL-NEXT: [9] - JSAGU +# ALL-NEXT: [10] - JSTC +# ALL-NEXT: [11] - JVALU0 +# ALL-NEXT: [12] - JVALU1 +# ALL-NEXT: [13] - JVIMUL + +# ALL: Resource pressure per iteration: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# ALL-NEXT: 5.00 5.00 - - - - - 10.00 - 10.00 - - - - + +# ALL: Resource pressure by instruction: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 64(%r14) +# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 128(%r14) +# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 192(%r14) +# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 256(%r14) +# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 320(%r14) +# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 384(%r14) +# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 448(%r14) +# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 512(%r14) +# ALL-NEXT: - 1.00 - - - - - 1.00 - 1.00 - - - - addq $44, 576(%r14) +# ALL-NEXT: 1.00 - - - - - - 1.00 - 1.00 - - - - addq $44, 640(%r14) + +# ALL: Timeline view: + +# NOALIAS-NEXT: 01234567 +# NOALIAS-NEXT: Index 0123456789 + +# YESALIAS-NEXT: 0123456789 0123456789 0123456789 +# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 012 + +# NOALIAS: [0,0] DeeeeeeER . . . addq $44, 64(%r14) +# NOALIAS-NEXT: [0,1] D=eeeeeeER. . . addq $44, 128(%r14) +# NOALIAS-NEXT: [0,2] .D=eeeeeeER . . addq $44, 192(%r14) +# NOALIAS-NEXT: [0,3] .D==eeeeeeER . . addq $44, 256(%r14) +# NOALIAS-NEXT: [0,4] . D==eeeeeeER . . addq $44, 320(%r14) +# NOALIAS-NEXT: [0,5] . D===eeeeeeER . . addq $44, 384(%r14) +# NOALIAS-NEXT: [0,6] . D===eeeeeeER. . addq $44, 448(%r14) +# NOALIAS-NEXT: [0,7] . D====eeeeeeER . addq $44, 512(%r14) +# NOALIAS-NEXT: [0,8] . D====eeeeeeER. addq $44, 576(%r14) +# NOALIAS-NEXT: [0,9] . D=====eeeeeeER addq $44, 640(%r14) + +# YESALIAS: [0,0] DeeeeeeER . . . . . . . . . . . . addq $44, 64(%r14) +# YESALIAS-NEXT: [0,1] D======eeeeeeER. . . . . . . . . . . addq $44, 128(%r14) +# YESALIAS-NEXT: [0,2] .D===========eeeeeeER . . . . . . . . . addq $44, 192(%r14) +# YESALIAS-NEXT: [0,3] .D=================eeeeeeER . . . . . . . . addq $44, 256(%r14) +# YESALIAS-NEXT: [0,4] . D======================eeeeeeER . . . . . . . addq $44, 320(%r14) +# YESALIAS-NEXT: [0,5] . D============================eeeeeeER . . . . . . addq $44, 384(%r14) +# YESALIAS-NEXT: [0,6] . D=================================eeeeeeER. . . . . addq $44, 448(%r14) +# YESALIAS-NEXT: [0,7] . D=======================================eeeeeeER . . . addq $44, 512(%r14) +# YESALIAS-NEXT: [0,8] . D============================================eeeeeeER . . addq $44, 576(%r14) +# YESALIAS-NEXT: [0,9] . D==================================================eeeeeeER addq $44, 640(%r14) + +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14) + +# NOALIAS-NEXT: 1. 1 2.0 1.0 0.0 addq $44, 128(%r14) +# NOALIAS-NEXT: 2. 1 2.0 1.0 0.0 addq $44, 192(%r14) +# NOALIAS-NEXT: 3. 1 3.0 1.0 0.0 addq $44, 256(%r14) +# NOALIAS-NEXT: 4. 1 3.0 1.0 0.0 addq $44, 320(%r14) +# NOALIAS-NEXT: 5. 1 4.0 1.0 0.0 addq $44, 384(%r14) +# NOALIAS-NEXT: 6. 1 4.0 1.0 0.0 addq $44, 448(%r14) +# NOALIAS-NEXT: 7. 1 5.0 1.0 0.0 addq $44, 512(%r14) +# NOALIAS-NEXT: 8. 1 5.0 1.0 0.0 addq $44, 576(%r14) +# NOALIAS-NEXT: 9. 1 6.0 1.0 0.0 addq $44, 640(%r14) +# NOALIAS-NEXT: 1 3.5 1.0 0.0 + +# YESALIAS-NEXT: 1. 1 7.0 0.0 0.0 addq $44, 128(%r14) +# YESALIAS-NEXT: 2. 1 12.0 0.0 0.0 addq $44, 192(%r14) +# YESALIAS-NEXT: 3. 1 18.0 0.0 0.0 addq $44, 256(%r14) +# YESALIAS-NEXT: 4. 1 23.0 0.0 0.0 addq $44, 320(%r14) +# YESALIAS-NEXT: 5. 1 29.0 0.0 0.0 addq $44, 384(%r14) +# YESALIAS-NEXT: 6. 1 34.0 0.0 0.0 addq $44, 448(%r14) +# YESALIAS-NEXT: 7. 1 40.0 0.0 0.0 addq $44, 512(%r14) +# YESALIAS-NEXT: 8. 1 45.0 0.0 0.0 addq $44, 576(%r14) +# YESALIAS-NEXT: 9. 1 51.0 0.0 0.0 addq $44, 640(%r14) +# YESALIAS-NEXT: 1 26.0 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s b/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s index 64b6490861c2a..691f530be7b07 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s @@ -21,12 +21,12 @@ imul %ecx, %ecx # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 10 -# CHECK-NEXT: Total Cycles: 27 +# CHECK-NEXT: Total Cycles: 24 # CHECK-NEXT: Total uOps: 16 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.59 -# CHECK-NEXT: IPC: 0.37 +# CHECK-NEXT: uOps Per Cycle: 0.67 +# CHECK-NEXT: IPC: 0.42 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -74,18 +74,18 @@ imul %ecx, %ecx # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 - -# CHECK: [0,0] DeeeeeeeeeeeER . . .. xaddl %ecx, (%rsp) -# CHECK-NEXT: [0,1] . D=eE-------R . . .. addl %ecx, %ecx -# CHECK-NEXT: [0,2] . D==eE-------R. . .. addl %ecx, %ecx -# CHECK-NEXT: [0,3] . D==eeeE----R. . .. imull %ecx, %ecx -# CHECK-NEXT: [0,4] . D=====eeeE--R . .. imull %ecx, %ecx -# CHECK-NEXT: [1,0] . D=======eeeeeeeeeeeER.. xaddl %ecx, (%rsp) -# CHECK-NEXT: [1,1] . .D========eE-------R.. addl %ecx, %ecx -# CHECK-NEXT: [1,2] . .D=========eE-------R. addl %ecx, %ecx -# CHECK-NEXT: [1,3] . . D=========eeeE----R. imull %ecx, %ecx -# CHECK-NEXT: [1,4] . . D============eeeE--R imull %ecx, %ecx +# CHECK-NEXT: Index 0123456789 0123 + +# CHECK: [0,0] DeeeeeeeeeeeER . . . xaddl %ecx, (%rsp) +# CHECK-NEXT: [0,1] . D=eE-------R . . . addl %ecx, %ecx +# CHECK-NEXT: [0,2] . D==eE-------R. . . addl %ecx, %ecx +# CHECK-NEXT: [0,3] . D==eeeE----R. . . imull %ecx, %ecx +# CHECK-NEXT: [0,4] . D=====eeeE--R . . imull %ecx, %ecx +# CHECK-NEXT: [1,0] . D====eeeeeeeeeeeER . xaddl %ecx, (%rsp) +# CHECK-NEXT: [1,1] . .D=====eE-------R . addl %ecx, %ecx +# CHECK-NEXT: [1,2] . .D======eE-------R. addl %ecx, %ecx +# CHECK-NEXT: [1,3] . . D======eeeE----R. imull %ecx, %ecx +# CHECK-NEXT: [1,4] . . D=========eeeE--R imull %ecx, %ecx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -94,12 +94,12 @@ imul %ecx, %ecx # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 4.5 0.5 0.0 xaddl %ecx, (%rsp) -# CHECK-NEXT: 1. 2 5.5 0.0 7.0 addl %ecx, %ecx -# CHECK-NEXT: 2. 2 6.5 0.0 7.0 addl %ecx, %ecx -# CHECK-NEXT: 3. 2 6.5 0.0 4.0 imull %ecx, %ecx -# CHECK-NEXT: 4. 2 9.5 0.0 2.0 imull %ecx, %ecx -# CHECK-NEXT: 2 6.5 0.1 4.0 +# CHECK-NEXT: 0. 2 3.0 0.5 0.0 xaddl %ecx, (%rsp) +# CHECK-NEXT: 1. 2 4.0 0.0 7.0 addl %ecx, %ecx +# CHECK-NEXT: 2. 2 5.0 0.0 7.0 addl %ecx, %ecx +# CHECK-NEXT: 3. 2 5.0 0.0 4.0 imull %ecx, %ecx +# CHECK-NEXT: 4. 2 8.0 0.0 2.0 imull %ecx, %ecx +# CHECK-NEXT: 2 5.0 0.1 4.0 # CHECK: [1] Code Region diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/independent-load-stores.s b/llvm/test/tools/llvm-mca/X86/Haswell/independent-load-stores.s new file mode 100644 index 0000000000000..a5ad56b6a7496 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Haswell/independent-load-stores.s @@ -0,0 +1,142 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS + + addq $44, 64(%r14) + addq $44, 128(%r14) + addq $44, 192(%r14) + addq $44, 256(%r14) + addq $44, 320(%r14) + addq $44, 384(%r14) + addq $44, 448(%r14) + addq $44, 512(%r14) + addq $44, 576(%r14) + addq $44, 640(%r14) + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1000 + +# NOALIAS-NEXT: Total Cycles: 1009 +# YESALIAS-NEXT: Total Cycles: 7003 + +# ALL-NEXT: Total uOps: 3000 + +# ALL: Dispatch Width: 4 + +# NOALIAS-NEXT: uOps Per Cycle: 2.97 +# NOALIAS-NEXT: IPC: 0.99 + +# YESALIAS-NEXT: uOps Per Cycle: 0.43 +# YESALIAS-NEXT: IPC: 0.14 + +# ALL-NEXT: Block RThroughput: 10.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 3 7 1.00 * * addq $44, 64(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 128(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 192(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 256(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 320(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 384(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 448(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 512(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 576(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 640(%r14) + +# ALL: Resources: +# ALL-NEXT: [0] - HWDivider +# ALL-NEXT: [1] - HWFPDivider +# ALL-NEXT: [2] - HWPort0 +# ALL-NEXT: [3] - HWPort1 +# ALL-NEXT: [4] - HWPort2 +# ALL-NEXT: [5] - HWPort3 +# ALL-NEXT: [6] - HWPort4 +# ALL-NEXT: [7] - HWPort5 +# ALL-NEXT: [8] - HWPort6 +# ALL-NEXT: [9] - HWPort7 + +# ALL: Resource pressure per iteration: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +# ALL-NEXT: - - 2.50 2.50 6.66 6.67 10.00 2.50 2.50 6.67 + +# ALL: Resource pressure by instruction: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 64(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 128(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 192(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 256(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.66 1.00 - 0.50 0.67 addq $44, 320(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.67 1.00 0.50 - 0.66 addq $44, 384(%r14) +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 448(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 512(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 576(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 640(%r14) + +# ALL: Timeline view: + +# NOALIAS-NEXT: 012345678 +# NOALIAS-NEXT: Index 0123456789 + +# YESALIAS-NEXT: 0123456789 0123456789 0123456789 012 +# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789 + +# NOALIAS: [0,0] DeeeeeeeER. . . addq $44, 64(%r14) +# NOALIAS-NEXT: [0,1] .DeeeeeeeER . . addq $44, 128(%r14) +# NOALIAS-NEXT: [0,2] . DeeeeeeeER . . addq $44, 192(%r14) +# NOALIAS-NEXT: [0,3] . DeeeeeeeER . . addq $44, 256(%r14) +# NOALIAS-NEXT: [0,4] . DeeeeeeeER . . addq $44, 320(%r14) +# NOALIAS-NEXT: [0,5] . DeeeeeeeER. . addq $44, 384(%r14) +# NOALIAS-NEXT: [0,6] . .DeeeeeeeER . addq $44, 448(%r14) +# NOALIAS-NEXT: [0,7] . . DeeeeeeeER . addq $44, 512(%r14) +# NOALIAS-NEXT: [0,8] . . DeeeeeeeER. addq $44, 576(%r14) +# NOALIAS-NEXT: [0,9] . . DeeeeeeeER addq $44, 640(%r14) + +# YESALIAS: [0,0] DeeeeeeeER. . . . . . . . . . . . . . addq $44, 64(%r14) +# YESALIAS-NEXT: [0,1] .D======eeeeeeeER . . . . . . . . . . . . addq $44, 128(%r14) +# YESALIAS-NEXT: [0,2] . D============eeeeeeeER . . . . . . . . . . . addq $44, 192(%r14) +# YESALIAS-NEXT: [0,3] . D==================eeeeeeeER . . . . . . . . . addq $44, 256(%r14) +# YESALIAS-NEXT: [0,4] . D========================eeeeeeeER . . . . . . . . addq $44, 320(%r14) +# YESALIAS-NEXT: [0,5] . D==============================eeeeeeeER. . . . . . . addq $44, 384(%r14) +# YESALIAS-NEXT: [0,6] . .D====================================eeeeeeeER . . . . . addq $44, 448(%r14) +# YESALIAS-NEXT: [0,7] . . D==========================================eeeeeeeER . . . . addq $44, 512(%r14) +# YESALIAS-NEXT: [0,8] . . D================================================eeeeeeeER . . addq $44, 576(%r14) +# YESALIAS-NEXT: [0,9] . . D======================================================eeeeeeeER addq $44, 640(%r14) + +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14) + +# NOALIAS-NEXT: 1. 1 1.0 1.0 0.0 addq $44, 128(%r14) +# NOALIAS-NEXT: 2. 1 1.0 1.0 0.0 addq $44, 192(%r14) +# NOALIAS-NEXT: 3. 1 1.0 1.0 0.0 addq $44, 256(%r14) +# NOALIAS-NEXT: 4. 1 1.0 1.0 0.0 addq $44, 320(%r14) +# NOALIAS-NEXT: 5. 1 1.0 1.0 0.0 addq $44, 384(%r14) +# NOALIAS-NEXT: 6. 1 1.0 1.0 0.0 addq $44, 448(%r14) +# NOALIAS-NEXT: 7. 1 1.0 1.0 0.0 addq $44, 512(%r14) +# NOALIAS-NEXT: 8. 1 1.0 1.0 0.0 addq $44, 576(%r14) +# NOALIAS-NEXT: 9. 1 1.0 1.0 0.0 addq $44, 640(%r14) +# NOALIAS-NEXT: 1 1.0 1.0 0.0 + +# YESALIAS-NEXT: 1. 1 7.0 0.0 0.0 addq $44, 128(%r14) +# YESALIAS-NEXT: 2. 1 13.0 0.0 0.0 addq $44, 192(%r14) +# YESALIAS-NEXT: 3. 1 19.0 0.0 0.0 addq $44, 256(%r14) +# YESALIAS-NEXT: 4. 1 25.0 0.0 0.0 addq $44, 320(%r14) +# YESALIAS-NEXT: 5. 1 31.0 0.0 0.0 addq $44, 384(%r14) +# YESALIAS-NEXT: 6. 1 37.0 0.0 0.0 addq $44, 448(%r14) +# YESALIAS-NEXT: 7. 1 43.0 0.0 0.0 addq $44, 512(%r14) +# YESALIAS-NEXT: 8. 1 49.0 0.0 0.0 addq $44, 576(%r14) +# YESALIAS-NEXT: 9. 1 55.0 0.0 0.0 addq $44, 640(%r14) +# YESALIAS-NEXT: 1 28.0 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/independent-load-stores.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/independent-load-stores.s new file mode 100644 index 0000000000000..03d7bcd079a33 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/independent-load-stores.s @@ -0,0 +1,142 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS + + addq $44, 64(%r14) + addq $44, 128(%r14) + addq $44, 192(%r14) + addq $44, 256(%r14) + addq $44, 320(%r14) + addq $44, 384(%r14) + addq $44, 448(%r14) + addq $44, 512(%r14) + addq $44, 576(%r14) + addq $44, 640(%r14) + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1000 + +# NOALIAS-NEXT: Total Cycles: 1009 +# YESALIAS-NEXT: Total Cycles: 7003 + +# ALL-NEXT: Total uOps: 3000 + +# ALL: Dispatch Width: 6 + +# NOALIAS-NEXT: uOps Per Cycle: 2.97 +# NOALIAS-NEXT: IPC: 0.99 + +# YESALIAS-NEXT: uOps Per Cycle: 0.43 +# YESALIAS-NEXT: IPC: 0.14 + +# ALL-NEXT: Block RThroughput: 10.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 3 7 1.00 * * addq $44, 64(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 128(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 192(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 256(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 320(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 384(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 448(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 512(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 576(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 640(%r14) + +# ALL: Resources: +# ALL-NEXT: [0] - SKLDivider +# ALL-NEXT: [1] - SKLFPDivider +# ALL-NEXT: [2] - SKLPort0 +# ALL-NEXT: [3] - SKLPort1 +# ALL-NEXT: [4] - SKLPort2 +# ALL-NEXT: [5] - SKLPort3 +# ALL-NEXT: [6] - SKLPort4 +# ALL-NEXT: [7] - SKLPort5 +# ALL-NEXT: [8] - SKLPort6 +# ALL-NEXT: [9] - SKLPort7 + +# ALL: Resource pressure per iteration: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +# ALL-NEXT: - - 2.50 2.50 6.66 6.67 10.00 2.50 2.50 6.67 + +# ALL: Resource pressure by instruction: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 64(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 128(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 192(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 256(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.66 1.00 - 0.50 0.67 addq $44, 320(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.67 1.00 0.50 - 0.66 addq $44, 384(%r14) +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 448(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 512(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 576(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 640(%r14) + +# ALL: Timeline view: + +# NOALIAS-NEXT: 012345678 +# NOALIAS-NEXT: Index 0123456789 + +# YESALIAS-NEXT: 0123456789 0123456789 0123456789 012 +# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789 + +# NOALIAS: [0,0] DeeeeeeeER. . . addq $44, 64(%r14) +# NOALIAS-NEXT: [0,1] D=eeeeeeeER . . addq $44, 128(%r14) +# NOALIAS-NEXT: [0,2] .D=eeeeeeeER . . addq $44, 192(%r14) +# NOALIAS-NEXT: [0,3] .D==eeeeeeeER . . addq $44, 256(%r14) +# NOALIAS-NEXT: [0,4] . D==eeeeeeeER . . addq $44, 320(%r14) +# NOALIAS-NEXT: [0,5] . D===eeeeeeeER. . addq $44, 384(%r14) +# NOALIAS-NEXT: [0,6] . D===eeeeeeeER . addq $44, 448(%r14) +# NOALIAS-NEXT: [0,7] . D====eeeeeeeER . addq $44, 512(%r14) +# NOALIAS-NEXT: [0,8] . D====eeeeeeeER. addq $44, 576(%r14) +# NOALIAS-NEXT: [0,9] . D=====eeeeeeeER addq $44, 640(%r14) + +# YESALIAS: [0,0] DeeeeeeeER. . . . . . . . . . . . . . addq $44, 64(%r14) +# YESALIAS-NEXT: [0,1] D=======eeeeeeeER . . . . . . . . . . . . addq $44, 128(%r14) +# YESALIAS-NEXT: [0,2] .D=============eeeeeeeER . . . . . . . . . . . addq $44, 192(%r14) +# YESALIAS-NEXT: [0,3] .D====================eeeeeeeER . . . . . . . . . addq $44, 256(%r14) +# YESALIAS-NEXT: [0,4] . D==========================eeeeeeeER . . . . . . . . addq $44, 320(%r14) +# YESALIAS-NEXT: [0,5] . D=================================eeeeeeeER. . . . . . . addq $44, 384(%r14) +# YESALIAS-NEXT: [0,6] . D=======================================eeeeeeeER . . . . . addq $44, 448(%r14) +# YESALIAS-NEXT: [0,7] . D==============================================eeeeeeeER . . . . addq $44, 512(%r14) +# YESALIAS-NEXT: [0,8] . D====================================================eeeeeeeER . . addq $44, 576(%r14) +# YESALIAS-NEXT: [0,9] . D===========================================================eeeeeeeER addq $44, 640(%r14) + +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14) + +# NOALIAS-NEXT: 1. 1 2.0 1.0 0.0 addq $44, 128(%r14) +# NOALIAS-NEXT: 2. 1 2.0 1.0 0.0 addq $44, 192(%r14) +# NOALIAS-NEXT: 3. 1 3.0 1.0 0.0 addq $44, 256(%r14) +# NOALIAS-NEXT: 4. 1 3.0 1.0 0.0 addq $44, 320(%r14) +# NOALIAS-NEXT: 5. 1 4.0 1.0 0.0 addq $44, 384(%r14) +# NOALIAS-NEXT: 6. 1 4.0 1.0 0.0 addq $44, 448(%r14) +# NOALIAS-NEXT: 7. 1 5.0 1.0 0.0 addq $44, 512(%r14) +# NOALIAS-NEXT: 8. 1 5.0 1.0 0.0 addq $44, 576(%r14) +# NOALIAS-NEXT: 9. 1 6.0 1.0 0.0 addq $44, 640(%r14) +# NOALIAS-NEXT: 1 3.5 1.0 0.0 + +# YESALIAS-NEXT: 1. 1 8.0 0.0 0.0 addq $44, 128(%r14) +# YESALIAS-NEXT: 2. 1 14.0 0.0 0.0 addq $44, 192(%r14) +# YESALIAS-NEXT: 3. 1 21.0 0.0 0.0 addq $44, 256(%r14) +# YESALIAS-NEXT: 4. 1 27.0 0.0 0.0 addq $44, 320(%r14) +# YESALIAS-NEXT: 5. 1 34.0 0.0 0.0 addq $44, 384(%r14) +# YESALIAS-NEXT: 6. 1 40.0 0.0 0.0 addq $44, 448(%r14) +# YESALIAS-NEXT: 7. 1 47.0 0.0 0.0 addq $44, 512(%r14) +# YESALIAS-NEXT: 8. 1 53.0 0.0 0.0 addq $44, 576(%r14) +# YESALIAS-NEXT: 9. 1 60.0 0.0 0.0 addq $44, 640(%r14) +# YESALIAS-NEXT: 1 30.5 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/independent-load-stores.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/independent-load-stores.s new file mode 100644 index 0000000000000..4ebdee99ad6b3 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/independent-load-stores.s @@ -0,0 +1,142 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS + + addq $44, 64(%r14) + addq $44, 128(%r14) + addq $44, 192(%r14) + addq $44, 256(%r14) + addq $44, 320(%r14) + addq $44, 384(%r14) + addq $44, 448(%r14) + addq $44, 512(%r14) + addq $44, 576(%r14) + addq $44, 640(%r14) + +# ALL: Iterations: 100 +# ALL-NEXT: Instructions: 1000 + +# NOALIAS-NEXT: Total Cycles: 1009 +# YESALIAS-NEXT: Total Cycles: 7003 + +# ALL-NEXT: Total uOps: 3000 + +# ALL: Dispatch Width: 6 + +# NOALIAS-NEXT: uOps Per Cycle: 2.97 +# NOALIAS-NEXT: IPC: 0.99 + +# YESALIAS-NEXT: uOps Per Cycle: 0.43 +# YESALIAS-NEXT: IPC: 0.14 + +# ALL-NEXT: Block RThroughput: 10.0 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 3 7 1.00 * * addq $44, 64(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 128(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 192(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 256(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 320(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 384(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 448(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 512(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 576(%r14) +# ALL-NEXT: 3 7 1.00 * * addq $44, 640(%r14) + +# ALL: Resources: +# ALL-NEXT: [0] - SKXDivider +# ALL-NEXT: [1] - SKXFPDivider +# ALL-NEXT: [2] - SKXPort0 +# ALL-NEXT: [3] - SKXPort1 +# ALL-NEXT: [4] - SKXPort2 +# ALL-NEXT: [5] - SKXPort3 +# ALL-NEXT: [6] - SKXPort4 +# ALL-NEXT: [7] - SKXPort5 +# ALL-NEXT: [8] - SKXPort6 +# ALL-NEXT: [9] - SKXPort7 + +# ALL: Resource pressure per iteration: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +# ALL-NEXT: - - 2.50 2.50 6.66 6.67 10.00 2.50 2.50 6.67 + +# ALL: Resource pressure by instruction: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 64(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 128(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 192(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 256(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.66 1.00 - 0.50 0.67 addq $44, 320(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.67 1.00 0.50 - 0.66 addq $44, 384(%r14) +# ALL-NEXT: - - - 0.50 0.66 0.67 1.00 - 0.50 0.67 addq $44, 448(%r14) +# ALL-NEXT: - - 0.50 - 0.67 0.66 1.00 0.50 - 0.67 addq $44, 512(%r14) +# ALL-NEXT: - - - 0.50 0.67 0.67 1.00 - 0.50 0.66 addq $44, 576(%r14) +# ALL-NEXT: - - 0.50 - 0.66 0.67 1.00 0.50 - 0.67 addq $44, 640(%r14) + +# ALL: Timeline view: + +# NOALIAS-NEXT: 012345678 +# NOALIAS-NEXT: Index 0123456789 + +# YESALIAS-NEXT: 0123456789 0123456789 0123456789 012 +# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789 + +# NOALIAS: [0,0] DeeeeeeeER. . . addq $44, 64(%r14) +# NOALIAS-NEXT: [0,1] D=eeeeeeeER . . addq $44, 128(%r14) +# NOALIAS-NEXT: [0,2] .D=eeeeeeeER . . addq $44, 192(%r14) +# NOALIAS-NEXT: [0,3] .D==eeeeeeeER . . addq $44, 256(%r14) +# NOALIAS-NEXT: [0,4] . D==eeeeeeeER . . addq $44, 320(%r14) +# NOALIAS-NEXT: [0,5] . D===eeeeeeeER. . addq $44, 384(%r14) +# NOALIAS-NEXT: [0,6] . D===eeeeeeeER . addq $44, 448(%r14) +# NOALIAS-NEXT: [0,7] . D====eeeeeeeER . addq $44, 512(%r14) +# NOALIAS-NEXT: [0,8] . D====eeeeeeeER. addq $44, 576(%r14) +# NOALIAS-NEXT: [0,9] . D=====eeeeeeeER addq $44, 640(%r14) + +# YESALIAS: [0,0] DeeeeeeeER. . . . . . . . . . . . . . addq $44, 64(%r14) +# YESALIAS-NEXT: [0,1] D=======eeeeeeeER . . . . . . . . . . . . addq $44, 128(%r14) +# YESALIAS-NEXT: [0,2] .D=============eeeeeeeER . . . . . . . . . . . addq $44, 192(%r14) +# YESALIAS-NEXT: [0,3] .D====================eeeeeeeER . . . . . . . . . addq $44, 256(%r14) +# YESALIAS-NEXT: [0,4] . D==========================eeeeeeeER . . . . . . . . addq $44, 320(%r14) +# YESALIAS-NEXT: [0,5] . D=================================eeeeeeeER. . . . . . . addq $44, 384(%r14) +# YESALIAS-NEXT: [0,6] . D=======================================eeeeeeeER . . . . . addq $44, 448(%r14) +# YESALIAS-NEXT: [0,7] . D==============================================eeeeeeeER . . . . addq $44, 512(%r14) +# YESALIAS-NEXT: [0,8] . D====================================================eeeeeeeER . . addq $44, 576(%r14) +# YESALIAS-NEXT: [0,9] . D===========================================================eeeeeeeER addq $44, 640(%r14) + +# ALL: Average Wait times (based on the timeline view): +# ALL-NEXT: [0]: Executions +# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue +# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# ALL-NEXT: [3]: Average time elapsed from WB until retire stage + +# ALL: [0] [1] [2] [3] +# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14) + +# NOALIAS-NEXT: 1. 1 2.0 1.0 0.0 addq $44, 128(%r14) +# NOALIAS-NEXT: 2. 1 2.0 1.0 0.0 addq $44, 192(%r14) +# NOALIAS-NEXT: 3. 1 3.0 1.0 0.0 addq $44, 256(%r14) +# NOALIAS-NEXT: 4. 1 3.0 1.0 0.0 addq $44, 320(%r14) +# NOALIAS-NEXT: 5. 1 4.0 1.0 0.0 addq $44, 384(%r14) +# NOALIAS-NEXT: 6. 1 4.0 1.0 0.0 addq $44, 448(%r14) +# NOALIAS-NEXT: 7. 1 5.0 1.0 0.0 addq $44, 512(%r14) +# NOALIAS-NEXT: 8. 1 5.0 1.0 0.0 addq $44, 576(%r14) +# NOALIAS-NEXT: 9. 1 6.0 1.0 0.0 addq $44, 640(%r14) +# NOALIAS-NEXT: 1 3.5 1.0 0.0 + +# YESALIAS-NEXT: 1. 1 8.0 0.0 0.0 addq $44, 128(%r14) +# YESALIAS-NEXT: 2. 1 14.0 0.0 0.0 addq $44, 192(%r14) +# YESALIAS-NEXT: 3. 1 21.0 0.0 0.0 addq $44, 256(%r14) +# YESALIAS-NEXT: 4. 1 27.0 0.0 0.0 addq $44, 320(%r14) +# YESALIAS-NEXT: 5. 1 34.0 0.0 0.0 addq $44, 384(%r14) +# YESALIAS-NEXT: 6. 1 40.0 0.0 0.0 addq $44, 448(%r14) +# YESALIAS-NEXT: 7. 1 47.0 0.0 0.0 addq $44, 512(%r14) +# YESALIAS-NEXT: 8. 1 53.0 0.0 0.0 addq $44, 576(%r14) +# YESALIAS-NEXT: 9. 1 60.0 0.0 0.0 addq $44, 640(%r14) +# YESALIAS-NEXT: 1 30.5 0.1 0.0 diff --git a/llvm/test/tools/llvm-objcopy/COFF/debug-dir-unmapped.test b/llvm/test/tools/llvm-objcopy/COFF/debug-dir-unmapped.test new file mode 100644 index 0000000000000..b7966c14d7d9e --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/COFF/debug-dir-unmapped.test @@ -0,0 +1,52 @@ +## Check that we error out when trying to patch up debug directories that +## point to data outside of the runtime mapped sections (as we don't try to +## locate and copy such payloads from the padding areas of the input file). + +# RUN: yaml2obj %s -o %t.in.exe + +# RUN: not llvm-objcopy --remove-section .rdata %t.in.exe %t.out.exe 2>&1 | FileCheck %s + +# CHECK: error: '{{.*}}{{/|\\}}debug-dir-unmapped.test.tmp.out.exe': debug directory payload outside of mapped sections not supported + +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 4096 + ImageBase: 1073741824 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [ ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 + Debug: + RelativeVirtualAddress: 12288 + Size: 28 +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: .text + Characteristics: [ ] + VirtualAddress: 4096 + VirtualSize: 16 + SectionData: C3909090909090909090909090909090 + - Name: .rdata + Characteristics: [ ] + VirtualAddress: 8192 + VirtualSize: 32 + SectionData: FFFFFFFF00000000FFFFFFFF00000000 + - Name: .buildid + Characteristics: [ ] + VirtualAddress: 12288 + VirtualSize: 28 + SectionData: 0000000042EE405C00000000020000001900000000000000E4070000 +symbols: +... diff --git a/llvm/test/tools/llvm-objcopy/COFF/patch-debug-dir2.test b/llvm/test/tools/llvm-objcopy/COFF/patch-debug-dir2.test new file mode 100644 index 0000000000000..c18a73a4af3b8 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/COFF/patch-debug-dir2.test @@ -0,0 +1,68 @@ +## Check that we successfully patch the PointerToRawData field in more than +## one debug directory entry. + +# RUN: yaml2obj %s -o %t.in.exe + +# RUN: llvm-readobj --coff-debug-directory %t.in.exe | FileCheck %s --check-prefixes=DEBUG-DIRS,DEBUG-DIRS-PRE +# RUN: llvm-readobj --sections %t.in.exe | FileCheck %s --check-prefixes=SECTIONS,SECTIONS-PRE +# RUN: llvm-objcopy --remove-section .rdata %t.in.exe %t.out.exe +# RUN: llvm-readobj --coff-debug-directory %t.out.exe | FileCheck %s --check-prefixes=DEBUG-DIRS,DEBUG-DIRS-POST +# RUN: llvm-readobj --sections %t.out.exe | FileCheck %s --check-prefixes=SECTIONS,SECTIONS-POST + +# DEBUG-DIRS: AddressOfRawData: 0x3038 +# DEBUG-DIRS-PRE-NEXT: PointerToRawData: 0x638 +# DEBUG-DIRS-POST-NEXT: PointerToRawData: 0x438 + +# DEBUG-DIRS: AddressOfRawData: 0x3051 +# DEBUG-DIRS-PRE-NEXT: PointerToRawData: 0x651 +# DEBUG-DIRS-POST-NEXT: PointerToRawData: 0x451 + +# SECTIONS: Name: .buildid +# SECTIONS-NEXT: VirtualSize: +# SECTIONS-NEXT: VirtualAddress: +# SECTIONS-NEXT: RawDataSize: +# SECTIONS-PRE-NEXT: PointerToRawData: 0x600 +# SECTIONS-POST-NEXT: PointerToRawData: 0x400 + +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 4096 + ImageBase: 5368709120 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [ ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 + Debug: + RelativeVirtualAddress: 12288 + Size: 56 +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: .text + Characteristics: [ ] + VirtualAddress: 4096 + VirtualSize: 1 + SectionData: C3 + - Name: .rdata + Characteristics: [ ] + VirtualAddress: 8192 + VirtualSize: 32 + SectionData: FFFFFFFFFFFFFFFF0000000000000000FFFFFFFFFFFFFFFF0000000000000000 + - Name: .buildid + Characteristics: [ ] + VirtualAddress: 12288 + VirtualSize: 85 + SectionData: 0000000046C7A65E00000000020000001900000038300000380600000000000046C7A65E000000001400000004000000513000005106000052534453B3411F5F27A80D2A4C4C44205044422E010000000001000000 +symbols: +... diff --git a/llvm/test/tools/llvm-objcopy/ELF/binary-no-paddr.test b/llvm/test/tools/llvm-objcopy/ELF/binary-no-paddr.test index c492785a8b405..99cf19fad9c4b 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/binary-no-paddr.test +++ b/llvm/test/tools/llvm-objcopy/ELF/binary-no-paddr.test @@ -1,7 +1,16 @@ -# RUN: yaml2obj %s -o %t -# RUN: llvm-objcopy -O binary %t %t2 -# RUN: od -t x2 -v %t2 | FileCheck %s --ignore-case -# RUN: wc -c < %t2 | FileCheck %s --check-prefix=SIZE +# RUN: yaml2obj -D PADDR=1 %s -o %t1 +# RUN: llvm-objcopy -O binary %t1 %t1.out +# RUN: od -t x2 -v %t1.out | FileCheck %s --ignore-case +# RUN: wc -c < %t1.out | FileCheck %s --check-prefix=SIZE + +## When all p_paddr fields are 0, GNU objcopy resets LMA to VMA +## and gives a different output. +## https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=6ffd79000b45e77b3625143932ffbf781b6aecab +## We don't implement this special rule. The p_paddr=0 output is the same as +## the p_paddr=1 case. +# RUN: yaml2obj -D PADDR=0 %s -o %t0 +# RUN: llvm-objcopy -O binary %t0 %t0.out +# RUN: cmp %t1.out %t0.out !ELF FileHeader: @@ -26,17 +35,15 @@ ProgramHeaders: - Type: PT_LOAD Flags: [ PF_X, PF_R ] VAddr: 0x1000 - PAddr: 0x0000 - Align: 0x1000 + PAddr: [[PADDR]] Sections: - Section: .text - Type: PT_LOAD Flags: [ PF_R, PF_W ] VAddr: 0x1004 - PAddr: 0x0000 - Align: 0x1000 + PAddr: [[PADDR]] Sections: - Section: .data -# CHECK: 0000000 c3c3 c3c3 3232 -# SIZE: 6 +# CHECK: 0000000 3232 c3c3 +# SIZE: 4 diff --git a/llvm/test/tools/llvm-objcopy/ELF/binary-paddr.test b/llvm/test/tools/llvm-objcopy/ELF/binary-paddr.test index fd78d436930d1..f7974a60ffd67 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/binary-paddr.test +++ b/llvm/test/tools/llvm-objcopy/ELF/binary-paddr.test @@ -48,7 +48,7 @@ ProgramHeaders: # RUN: od -A x -t x2 %t2.out | FileCheck %s --check-prefix=CHECK2 --ignore-case # RUN: wc -c %t2.out | FileCheck %s --check-prefix=SIZE2 -## The computed LMA of .data is 0x4000. The minimum LMA of all sections is 0x1000. +## The computed LMA of .data is 0x4000. The minimum LMA of all non-empty sections is 0x1000. ## The content of .data will be written at 0x4000-0x1000 = 0x3000. # CHECK2: 000000 c3c3 c3c3 0000 0000 0000 0000 0000 0000 # CHECK2-NEXT: 000010 0000 0000 0000 0000 0000 0000 0000 0000 @@ -93,7 +93,7 @@ ProgramHeaders: # RUN: od -A x -t x2 %t3.out | FileCheck %s --check-prefix=CHECK3 --ignore-case # RUN: wc -c %t3.out | FileCheck %s --check-prefix=SIZE3 -## The minimum LMA of all sections is 0x1000. +## The minimum LMA of all non-empty sections is 0x1000. ## The content of .data will be written at 0x3000-0x1000 = 0x2000. # CHECK3: 000000 c3c3 c3c3 0000 0000 0000 0000 0000 0000 # CHECK3-NEXT: 000010 0000 0000 0000 0000 0000 0000 0000 0000 @@ -129,3 +129,58 @@ ProgramHeaders: VAddr: 0x3000 Sections: - Section: .data + +## The first section (.text) is empty. Test that we skip its LMA until the first +## non-empty section, otherwise we would leave a large number of leading zeroes. +# RUN: yaml2obj --docnum=4 %s -o %t4 +# RUN: llvm-objcopy -O binary %t4 %t4.out +# RUN: od -A x -t x2 %t4.out | FileCheck %s --check-prefix=SKIPEMPTY + +# SKIPEMPTY: 000000 3232 +# SKIPEMPTY-NEXT: 000002 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1000 + AddressAlign: 0x1000 + - Name: gap + Type: Fill + Size: 0x1000 + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_WRITE ] + Content: "3232" + +## The last section (.data) is empty. Test that we stop dumping after the last +## non-empty section, otherwise we would leave a large number of trailing zeroes. +# RUN: yaml2obj --docnum=5 %s -o %t5 +# RUN: llvm-objcopy -O binary %t5 %t5.out +# RUN: od -A x -t x2 %t5.out | FileCheck %s --check-prefix=SKIPEMPTY + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1000 + AddressAlign: 0x1000 + Content: "3232" + - Name: gap + Type: Fill + Size: 0xffd + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_WRITE ] diff --git a/llvm/test/tools/llvm-objcopy/ELF/dump-section.test b/llvm/test/tools/llvm-objcopy/ELF/dump-section.test index 176cc791801aa..fadd955a0f938 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/dump-section.test +++ b/llvm/test/tools/llvm-objcopy/ELF/dump-section.test @@ -4,9 +4,11 @@ # RUN: llvm-objcopy --dump-section .text=%t4 %t %t5 # RUN: llvm-objcopy --dump-section .foo=%t6 %t %t7 # RUN: not llvm-objcopy --dump-section .bar=%t8 %t %t9 2>&1 | FileCheck %s --check-prefix=NOBITS -DINPUT=%t +# RUN: llvm-objcopy --dump-section .empty=%t.empty %t /dev/null # RUN: od -t x1 %t2 | FileCheck %s --ignore-case # RUN: od -t x1 %t6 | FileCheck %s --ignore-case --check-prefix=NON-ALLOC # RUN: wc -c %t2 | FileCheck %s --check-prefix=SIZE +# RUN: wc -c %t.empty | FileCheck %s --check-prefix=EMPTY # RUN: diff %t2 %t3 # RUN: diff %t4 %t3 @@ -26,6 +28,9 @@ Sections: Type: SHT_PROGBITS Flags: [ SHF_WRITE ] Content: "CAFE" + - Name: .empty + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] - Name: .bar Type: SHT_NOBITS Flags: [ SHF_WRITE ] @@ -35,10 +40,12 @@ ProgramHeaders: Sections: - Section: .text -#CHECK: 0000000 de ad be ef +# CHECK: 0000000 de ad be ef + +# NON-ALLOC: 0000000 ca fe -#NON-ALLOC: 0000000 ca fe +# SIZE: 4 -#SIZE: 4 +# NOBITS: error: '[[INPUT]]': cannot dump section '.bar': it has no contents -#NOBITS: error: '[[INPUT]]': cannot dump section '.bar': it has no contents +# EMPTY: 0 diff --git a/llvm/test/tools/llvm-objcopy/MachO/Inputs/strip-all-with-dwarf.yaml b/llvm/test/tools/llvm-objcopy/MachO/Inputs/strip-all-with-dwarf.yaml index 268cc2549e728..213d66fd43714 100644 --- a/llvm/test/tools/llvm-objcopy/MachO/Inputs/strip-all-with-dwarf.yaml +++ b/llvm/test/tools/llvm-objcopy/MachO/Inputs/strip-all-with-dwarf.yaml @@ -33,6 +33,23 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 + relocations: + - address: 0x0 + symbolnum: 0 + pcrel: false + length: 0 + extern: true + type: 0 + scattered: false + value: 0 + - address: 0x0 + symbolnum: 1 + pcrel: false + length: 0 + extern: true + type: 0 + scattered: false + value: 0 - sectname: __data segname: __DATA addr: 0x0000000000000024 @@ -81,6 +98,39 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 + relocations: + - address: 0x0 + symbolnum: 0 + pcrel: false + length: 0 + extern: true + type: 0 + scattered: false + value: 0 + - address: 0x0 + symbolnum: 0 + pcrel: false + length: 0 + extern: true + type: 0 + scattered: false + value: 0 + - address: 0x0 + symbolnum: 0 + pcrel: false + length: 0 + extern: true + type: 0 + scattered: false + value: 0 + - address: 0x0 + symbolnum: 0 + pcrel: false + length: 0 + extern: true + type: 0 + scattered: false + value: 0 - sectname: __debug_macinfo segname: __DWARF addr: 0x0000000000000166 @@ -153,6 +203,15 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 + relocations: + - address: 0x0 + symbolnum: 0 + pcrel: false + length: 0 + extern: true + type: 0 + scattered: false + value: 0 - sectname: __eh_frame segname: __TEXT addr: 0x0000000000000290 @@ -177,6 +236,15 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 + relocations: + - address: 0x0 + symbolnum: 0 + pcrel: false + length: 0 + extern: true + type: 0 + scattered: false + value: 0 - cmd: LC_BUILD_VERSION cmdsize: 24 platform: 1 diff --git a/llvm/test/tools/llvm-objcopy/MachO/basic-big-endian-32-copy.test b/llvm/test/tools/llvm-objcopy/MachO/basic-big-endian-32-copy.test index 083011badc3ba..67e2be00eb925 100644 --- a/llvm/test/tools/llvm-objcopy/MachO/basic-big-endian-32-copy.test +++ b/llvm/test/tools/llvm-objcopy/MachO/basic-big-endian-32-copy.test @@ -31,8 +31,8 @@ LoadCommands: size: 61 offset: 0x00000198 align: 4 - reloff: 0x00000250 - nreloc: 1 + reloff: 0x0 + nreloc: 0 flags: 0x80000400 reserved1: 0x00000000 reserved2: 0x00000000 @@ -43,8 +43,8 @@ LoadCommands: size: 40 offset: 0x000001D8 align: 2 - reloff: 0x00000258 - nreloc: 2 + reloff: 0x00000 + nreloc: 0 flags: 0x02000000 reserved1: 0x00000000 reserved2: 0x00000000 @@ -67,9 +67,9 @@ LoadCommands: sdk: 0 - cmd: LC_SYMTAB cmdsize: 24 - symoff: 616 + symoff: 592 nsyms: 2 - stroff: 640 + stroff: 616 strsize: 16 - cmd: LC_DYSYMTAB cmdsize: 80 diff --git a/llvm/test/tools/llvm-objcopy/MachO/basic-big-endian-64-copy.test b/llvm/test/tools/llvm-objcopy/MachO/basic-big-endian-64-copy.test index ff8e4a0096bb8..5963e57d1ce63 100644 --- a/llvm/test/tools/llvm-objcopy/MachO/basic-big-endian-64-copy.test +++ b/llvm/test/tools/llvm-objcopy/MachO/basic-big-endian-64-copy.test @@ -32,8 +32,8 @@ LoadCommands: size: 93 offset: 0x00000220 align: 4 - reloff: 0x00000310 - nreloc: 3 + reloff: 0x0 + nreloc: 0 flags: 0x80000400 reserved1: 0x00000000 reserved2: 0x00000000 @@ -56,8 +56,8 @@ LoadCommands: size: 32 offset: 0x000002B0 align: 3 - reloff: 0x00000328 - nreloc: 1 + reloff: 0x0 + nreloc: 0 flags: 0x02000000 reserved1: 0x00000000 reserved2: 0x00000000 @@ -80,9 +80,9 @@ LoadCommands: sdk: 0 - cmd: LC_SYMTAB cmdsize: 24 - symoff: 816 + symoff: 784 nsyms: 2 - stroff: 848 + stroff: 816 strsize: 36 - cmd: LC_DYSYMTAB cmdsize: 80 diff --git a/llvm/test/tools/llvm-objcopy/MachO/basic-little-endian-32-copy.test b/llvm/test/tools/llvm-objcopy/MachO/basic-little-endian-32-copy.test index 7e6663945179a..54cb3193f714e 100644 --- a/llvm/test/tools/llvm-objcopy/MachO/basic-little-endian-32-copy.test +++ b/llvm/test/tools/llvm-objcopy/MachO/basic-little-endian-32-copy.test @@ -31,8 +31,8 @@ LoadCommands: size: 61 offset: 0x00000198 align: 4 - reloff: 0x00000250 - nreloc: 1 + reloff: 0x0 + nreloc: 0 flags: 0x80000400 reserved1: 0x00000000 reserved2: 0x00000000 @@ -43,8 +43,8 @@ LoadCommands: size: 40 offset: 0x000001D8 align: 2 - reloff: 0x00000258 - nreloc: 2 + reloff: 0x0 + nreloc: 0 flags: 0x02000000 reserved1: 0x00000000 reserved2: 0x00000000 @@ -67,9 +67,9 @@ LoadCommands: sdk: 0 - cmd: LC_SYMTAB cmdsize: 24 - symoff: 616 + symoff: 592 nsyms: 2 - stroff: 640 + stroff: 616 strsize: 16 - cmd: LC_DYSYMTAB cmdsize: 80 diff --git a/llvm/test/tools/llvm-objcopy/MachO/basic-little-endian-64-copy.test b/llvm/test/tools/llvm-objcopy/MachO/basic-little-endian-64-copy.test index 18e3bbb997ea5..8795602e51ada 100644 --- a/llvm/test/tools/llvm-objcopy/MachO/basic-little-endian-64-copy.test +++ b/llvm/test/tools/llvm-objcopy/MachO/basic-little-endian-64-copy.test @@ -32,8 +32,8 @@ LoadCommands: size: 93 offset: 0x00000220 align: 4 - reloff: 0x00000310 - nreloc: 3 + reloff: 0x0 + nreloc: 0 flags: 0x80000400 reserved1: 0x00000000 reserved2: 0x00000000 @@ -56,8 +56,8 @@ LoadCommands: size: 32 offset: 0x000002B0 align: 3 - reloff: 0x00000328 - nreloc: 1 + reloff: 0x0 + nreloc: 0 flags: 0x02000000 reserved1: 0x00000000 reserved2: 0x00000000 @@ -80,9 +80,9 @@ LoadCommands: sdk: 0 - cmd: LC_SYMTAB cmdsize: 24 - symoff: 816 + symoff: 784 nsyms: 2 - stroff: 848 + stroff: 816 strsize: 36 - cmd: LC_DYSYMTAB cmdsize: 80 diff --git a/llvm/test/tools/llvm-objcopy/MachO/relocations.test b/llvm/test/tools/llvm-objcopy/MachO/relocations.test new file mode 100644 index 0000000000000..8859b9e4ac144 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/relocations.test @@ -0,0 +1,95 @@ +# RUN: yaml2obj %s -o %t + +## Show that llvm-objcopy copies relocation entries where r_extern = 0. +# RUN: llvm-objcopy %t %t2 +# RUN: cmp %t %t2 + +## Show that llvm-objcopy updates section indices properly. +# RUN: llvm-objcopy --remove-section=__DATA,__foo %t %t3 +# RUN: llvm-objdump --macho --reloc %t3 | FileCheck %s + +# CHECK: Relocation information (__DATA,__bar) 2 entries +# CHECK-NEXT: address pcrel length extern type scattered symbolnum/value +# CHECK-NEXT: 00000000 False quad False SUB False 2 (__DATA,__bar) +# CHECK-NEXT: 00000000 False quad False UNSIGND False 1 (__TEXT,__text) + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 1 + sizeofcmds: 312 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 312 + segname: '' + vmaddr: 0 + vmsize: 24 + fileoff: 344 + filesize: 24 + maxprot: 7 + initprot: 7 + nsects: 3 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 8 + offset: 0x00000158 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '0000000000000000' + - sectname: __foo + segname: __DATA + addr: 0x0000000000000008 + size: 8 + offset: 0x00000160 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '0000000000000000' + - sectname: __bar + segname: __DATA + addr: 0x0000000000000010 + size: 8 + offset: 0x00000168 + align: 0 + reloff: 0x00000170 + nreloc: 2 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: F0FFFFFFFFFFFFFF + relocations: + - address: 0x00000000 + symbolnum: 3 + pcrel: false + length: 3 + extern: false + type: 5 + scattered: false + value: 0 + - address: 0x00000000 + symbolnum: 1 + pcrel: false + length: 3 + extern: false + type: 0 + scattered: false + value: 0 +... diff --git a/llvm/test/tools/llvm-objcopy/MachO/strip-debug.test b/llvm/test/tools/llvm-objcopy/MachO/strip-debug.test index 817ca0ecb561e..604f011d60353 100644 --- a/llvm/test/tools/llvm-objcopy/MachO/strip-debug.test +++ b/llvm/test/tools/llvm-objcopy/MachO/strip-debug.test @@ -23,19 +23,19 @@ # RELOC: Relocations [ # RELOC-NEXT: Section __text { -# RELOC-NEXT: 0x0 0 0 0 X86_64_RELOC_UNSIGNED 0 - -# RELOC-NEXT: 0x0 0 0 0 X86_64_RELOC_UNSIGNED 0 - +# RELOC-NEXT: 0x0 0 0 1 X86_64_RELOC_UNSIGNED 0 _bar +# RELOC-NEXT: 0x0 0 0 1 X86_64_RELOC_UNSIGNED 0 _main # RELOC-NEXT: } # DEBUG: Section __debug_info { -# DEBUG-NEXT: 0x0 0 0 0 X86_64_RELOC_UNSIGNED 0 - -# DEBUG-NEXT: 0x0 0 0 0 X86_64_RELOC_UNSIGNED 0 - -# DEBUG-NEXT: 0x0 0 0 0 X86_64_RELOC_UNSIGNED 0 - -# DEBUG-NEXT: 0x0 0 0 0 X86_64_RELOC_UNSIGNED 0 - +# DEBUG-NEXT: 0x0 0 0 1 X86_64_RELOC_UNSIGNED 0 _bar +# DEBUG-NEXT: 0x0 0 0 1 X86_64_RELOC_UNSIGNED 0 _bar +# DEBUG-NEXT: 0x0 0 0 1 X86_64_RELOC_UNSIGNED 0 _bar +# DEBUG-NEXT: 0x0 0 0 1 X86_64_RELOC_UNSIGNED 0 _bar # DEBUG-NEXT: } # RELOC-NEXT: Section __compact_unwind { -# RELOC-NEXT: 0x0 0 0 0 X86_64_RELOC_UNSIGNED 0 - +# RELOC-NEXT: 0x0 0 0 1 X86_64_RELOC_UNSIGNED 0 _bar # RELOC-NEXT: } # DEBUG-NEXT: Section __debug_line { -# DEBUG-NEXT: 0x0 0 0 0 X86_64_RELOC_UNSIGNED 0 - +# DEBUG-NEXT: 0x0 0 0 1 X86_64_RELOC_UNSIGNED 0 _bar # DEBUG-NEXT: } # RELOC-NEXT: ] diff --git a/llvm/test/tools/llvm-objcopy/MachO/symbol-table.test b/llvm/test/tools/llvm-objcopy/MachO/symbol-table.test new file mode 100644 index 0000000000000..7fec35fde1df0 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/symbol-table.test @@ -0,0 +1,437 @@ +## This test is based on a trimmed down version of the binary built as follows: +## a.c: +## __attribute__((used)) static int PrivateSymbol; +## __attribute__((visibility("hidden"))) int PrivateExternalSymbol; +## __attribute__((used)) int CommonSymbol; +## extern int UndefinedExternalSymbol; +## // Defined external symbol +## int main() { +## return PrivateSymbol + PrivateExternalSymbol + CommonSymbol + +## UndefinedExternalSymbol; +## } +## build command: +## clang -g -fno-exceptions -fno-unwind-tables -undefined dynamic_lookup \ +## a.c -o a.exe +## All the load commands except the symbol table and its transitive dependencies +## have been removed. + +# RUN: yaml2obj %s -o %t +# RUN: llvm-objcopy %t %t.copy +# RUN: cmp %t %t.copy + +## Verify that the binary is valid and check its symbol table. +# RUN: llvm-readobj --symbols %t.copy | FileCheck %s + +# CHECK: Symbols [ +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _PrivateSymbol (169) +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __bss (0x4) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x100001008 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _PrivateExternalSymbol (121) +# CHECK-NEXT: PrivateExtern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __common (0x5) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x100001010 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: /Users/aaaaaaaa/ (191) +# CHECK-NEXT: Type: SymDebugTable (0x64) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: main.c (184) +# CHECK-NEXT: Type: SymDebugTable (0x64) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: /var/folders/1d/zyfdpp7j2995h5hqspjy28bc0000gn/T/main-c5ac21.o (38) +# CHECK-NEXT: Type: SymDebugTable (0x66) +# CHECK-NEXT: Section: __got (0x3) +# CHECK-NEXT: RefType: ReferenceFlagUndefinedLazy (0x1) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x5EA74C81 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: (207) +# CHECK-NEXT: Type: SymDebugTable (0x2E) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x100000F80 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _main (101) +# CHECK-NEXT: Type: SymDebugTable (0x24) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x100000F80 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: (207) +# CHECK-NEXT: Type: SymDebugTable (0x24) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x2D +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: (207) +# CHECK-NEXT: Type: SymDebugTable (0x4E) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x2D +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _PrivateSymbol (169) +# CHECK-NEXT: Type: SymDebugTable (0x26) +# CHECK-NEXT: Section: __bss (0x4) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x100001008 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _CommonSymbol (107) +# CHECK-NEXT: Type: SymDebugTable (0x20) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _PrivateExternalSymbol (121) +# CHECK-NEXT: Type: SymDebugTable (0x20) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: (207) +# CHECK-NEXT: Type: SymDebugTable (0x64) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _CommonSymbol (107) +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __common (0x5) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x10000100C +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: __mh_execute_header (18) +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x10) +# CHECK-NEXT: ReferencedDynamically (0x10) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x100000000 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _main (101) +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x100000F80 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _UndefinedExternalSymbol (144) +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Undef (0x0) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0xFE00) +# CHECK-NEXT: AltEntry (0x200) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: dyld_stub_binder (1) +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Undef (0x0) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x100) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: ] + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x80000003 + filetype: 0x00000002 + ncmds: 5 + sizeofcmds: 720 + flags: 0x00200085 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 4096 + fileoff: 0 + filesize: 4096 + maxprot: 5 + initprot: 5 + nsects: 2 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000100000F80 + size: 45 + offset: 0x00000F80 + align: 4 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 554889E5488B0575000000488D0D7A000000C745FC000000008B156900000003156B0000000311031089D05DC3 + - sectname: __unwind_info + segname: __TEXT + addr: 0x0000000100000FB0 + size: 72 + offset: 0x00000FB0 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 010000001C000000000000001C000000000000001C00000002000000800F00003400000034000000AE0F00000000000034000000030000000C000100100001000000000000000000 + - cmd: LC_SEGMENT_64 + cmdsize: 312 + segname: __DATA + vmaddr: 4294971392 + vmsize: 4096 + fileoff: 4096 + filesize: 4096 + maxprot: 3 + initprot: 3 + nsects: 3 + flags: 0 + Sections: + - sectname: __got + segname: __DATA + addr: 0x0000000100001000 + size: 8 + offset: 0x00001000 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000006 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '0000000000000000' + - sectname: __bss + segname: __DATA + addr: 0x0000000100001008 + size: 4 + offset: 0x00000000 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000001 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __common + segname: __DATA + addr: 0x000000010000100C + size: 8 + offset: 0x00000000 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000001 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294975488 + vmsize: 4096 + fileoff: 8192 + filesize: 500 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 8192 + nsyms: 18 + stroff: 8484 + strsize: 208 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 13 + iextdefsym: 13 + nextdefsym: 3 + iundefsym: 16 + nundefsym: 2 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 8480 + nindirectsyms: 1 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 +LinkEditData: + NameList: + - n_strx: 169 + n_type: 0x0E + n_sect: 4 + n_desc: 0 + n_value: 4294971400 + - n_strx: 121 + n_type: 0x1E + n_sect: 5 + n_desc: 0 + n_value: 4294971408 + - n_strx: 191 + n_type: 0x64 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 184 + n_type: 0x64 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 38 + n_type: 0x66 + n_sect: 3 + n_desc: 1 + n_value: 1588022401 + - n_strx: 207 + n_type: 0x2E + n_sect: 1 + n_desc: 0 + n_value: 4294971264 + - n_strx: 101 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294971264 + - n_strx: 207 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 45 + - n_strx: 207 + n_type: 0x4E + n_sect: 1 + n_desc: 0 + n_value: 45 + - n_strx: 169 + n_type: 0x26 + n_sect: 4 + n_desc: 0 + n_value: 4294971400 + - n_strx: 107 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 121 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 207 + n_type: 0x64 + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 107 + n_type: 0x0F + n_sect: 5 + n_desc: 0 + n_value: 4294971404 + - n_strx: 18 + n_type: 0x0F + n_sect: 1 + n_desc: 16 + n_value: 4294967296 + - n_strx: 101 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 4294971264 + - n_strx: 144 + n_type: 0x01 + n_sect: 0 + n_desc: 65024 + n_value: 0 + - n_strx: 1 + n_type: 0x01 + n_sect: 0 + n_desc: 256 + n_value: 0 + StringTable: + - '' + - dyld_stub_binder + - __mh_execute_header + - '/var/folders/1d/zyfdpp7j2995h5hqspjy28bc0000gn/T/main-c5ac21.o' + - _main + - _CommonSymbol + - _PrivateExternalSymbol + - _UndefinedExternalSymbol + - _PrivateSymbol + - main.c + - '/Users/aaaaaaaa/' +... diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/disassemble-code-data-mix.s b/llvm/test/tools/llvm-objdump/ELF/ARM/disassemble-code-data-mix.s index b0d5d7a53eae9..52cb0677c7804 100644 --- a/llvm/test/tools/llvm-objdump/ELF/ARM/disassemble-code-data-mix.s +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/disassemble-code-data-mix.s @@ -1,4 +1,6 @@ -@RUN: llvm-mc -triple arm-unknown-linux -filetype=obj %s | llvm-objdump -d - | FileCheck %s +@ RUN: llvm-mc -triple arm-unknown-linux -filetype=obj %s -o %t +@ RUN: llvm-objdump -d %t | FileCheck %s +@ RUN: llvm-objdump -d -r %t | FileCheck --check-prefixes=CHECK,RELOC %s .cpu arm7tdmi .global myInt @@ -32,5 +34,8 @@ myStr: .string "test string" -@CHECK: .word 0x00000000 -@CHECK-DAG: 74 65 73 74 20 73 74 72 test str +@ CHECK: .word 0x00000000 +@ RELOC-NEXT: R_ARM_ABS32 myInt +@ CHECK-EMPTY: +@ CHECK-NEXT: : +@ CHECK-NEXT: 74 65 73 74 20 73 74 72 test str diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml index f1470c8479781..a7ec7a2997e5f 100644 --- a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml +++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml @@ -2,10 +2,12 @@ # RUN: yaml2obj %s --docnum=1 -o %t # RUN: llvm-objdump %t -d | FileCheck %s --check-prefix=EXEC -# EXEC: Disassembly of section .text1: -# EXEC: 4000: e8 00 00 00 00 callq 0x4005 -# EXEC: Disassembly of section .text2: -# EXEC: 4005: e8 12 34 56 78 callq 0x7856741c +# EXEC-LABEL: : +# EXEC-NEXT: 4000: e8 00 00 00 00 callq 0x4005 +# EXEC-LABEL: : +# EXEC-NEXT: 4005: e8 12 34 56 78 callq 0x7856741c +# EXEC-LABEL: : +# EXEC-NEXT: 400a: 8b 05 f0 0f 00 00 movl 4080(%rip), %eax # 5000 --- !ELF FileHeader: @@ -28,6 +30,11 @@ Sections: Type: SHT_PROGBITS Flags: [SHF_ALLOC, SHF_EXECINSTR] Address: 0x400A + Content: '8b05f00f0000' # Case 3: Memory operands + - Name: .data + Type: SHT_PROGBITS + Flags: [SHF_ALLOC, SHF_WRITE] + Address: 0x5000 Symbols: - Name: first Section: .text1 @@ -41,6 +48,9 @@ Symbols: - Name: fourth Section: .text3 Value: 0x400A + - Name: data1 + Section: .data + Value: 0x5000 # RUN: yaml2obj %s --docnum=2 -o %t.o # RUN: llvm-objdump %t.o -d | FileCheck %s --check-prefix=REL diff --git a/llvm/test/tools/llvm-readobj/COFF/debug-directory-unmapped.test b/llvm/test/tools/llvm-readobj/COFF/debug-directory-unmapped.test new file mode 100644 index 0000000000000..789d21e89b6d2 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/COFF/debug-directory-unmapped.test @@ -0,0 +1,70 @@ +## Test that printing debug directories that aren't part of the runtime +## mapped sections doesn't fail. Currently llvm-readobj only prints the +## entry itself and not the payload. Note that there isn't currently any +## meaningful data in this test input where it claims the debug entry +## payload is. + +# RUN: yaml2obj %s -o %t.exe + +# RUN: llvm-readobj --coff-debug-directory %t.exe | FileCheck %s + +# CHECK: DebugDirectory [ +# CHECK-NEXT: DebugEntry { +# CHECK-NEXT: Characteristics: 0x0 +# CHECK-NEXT: TimeDateStamp: 2019-01-17 21:06:10 (0x5C40EE42) +# CHECK-NEXT: MajorVersion: 0x0 +# CHECK-NEXT: MinorVersion: 0x0 +# CHECK-NEXT: Type: CodeView (0x2) +# CHECK-NEXT: SizeOfData: 0x19 +# CHECK-NEXT: AddressOfRawData: 0x0 +# CHECK-NEXT: PointerToRawData: 0x3E4 +# CHECK-NEXT: } +# CHECK-NEXT: DebugEntry { +# CHECK-NEXT: Characteristics: 0x0 +# CHECK-NEXT: TimeDateStamp: 2019-01-17 21:06:10 (0x5C40EE42) +# CHECK-NEXT: MajorVersion: 0x0 +# CHECK-NEXT: MinorVersion: 0x0 +# CHECK-NEXT: Type: ExtendedDLLCharacteristics (0x14) +# CHECK-NEXT: SizeOfData: 0x4 +# CHECK-NEXT: AddressOfRawData: 0x0 +# CHECK-NEXT: PointerToRawData: 0x3E0 +# CHECK-NEXT: } +# CHECK-NEXT: ] + +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 4096 + ImageBase: 1073741824 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [ ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 + Debug: + RelativeVirtualAddress: 8192 + Size: 56 +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: .text + Characteristics: [ ] + VirtualAddress: 4096 + VirtualSize: 16 + SectionData: C3909090909090909090909090909090 + - Name: .buildid + Characteristics: [ ] + VirtualAddress: 8192 + VirtualSize: 56 + SectionData: 0000000042EE405C00000000020000001900000000000000E40300000000000042EE405C00000000140000000400000000000000E0030000 +symbols: +... diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/phdrs-elf.exe-i386 b/llvm/test/tools/llvm-readobj/ELF/Inputs/phdrs-elf.exe-i386 deleted file mode 100644 index a5a9541aec5fd..0000000000000 Binary files a/llvm/test/tools/llvm-readobj/ELF/Inputs/phdrs-elf.exe-i386 and /dev/null differ diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/phdrs-elf.exe-x86_64 b/llvm/test/tools/llvm-readobj/ELF/Inputs/phdrs-elf.exe-x86_64 deleted file mode 100644 index 6b6520809d719..0000000000000 Binary files a/llvm/test/tools/llvm-readobj/ELF/Inputs/phdrs-elf.exe-x86_64 and /dev/null differ diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test b/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test index cf6ba17259abc..cc3a38fb167b8 100644 --- a/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test +++ b/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test @@ -1,97 +1,356 @@ -#Source : -#__thread int a = 1; -#__thread int b; -# -#int main () { -# b = 2; -# throw (a + b) ; -# return 0; -#} -# compiled as clang++ source.cpp -# and clang++ -m32 source.cpp - -RUN: llvm-readelf -l %p/Inputs/phdrs-elf.exe-i386 \ -RUN: | FileCheck %s -check-prefix ELF32 -RUN: llvm-readelf -l %p/Inputs/phdrs-elf.exe-x86_64 \ -RUN: | FileCheck %s -check-prefixes ELF64-PHDRS,ELF64-MAPPING -RUN: llvm-readelf -program-headers %p/Inputs/phdrs-elf.exe-x86_64 \ -RUN: | FileCheck %s -check-prefixes ELF64-PHDRS,ELF64-MAPPING - -# Check that -section-mapping produces a mapping and not the program headers. -RUN: llvm-readelf -section-mapping %p/Inputs/phdrs-elf.exe-x86_64 \ -RUN: | FileCheck %s -check-prefix ELF64-MAPPING -implicit-check-not="Program Headers:" - -# Check that -section-mapping=false -program-headers produces just program headers. -RUN: llvm-readelf -section-mapping=false -program-headers %p/Inputs/phdrs-elf.exe-x86_64 \ -RUN: | FileCheck %s -check-prefix ELF64-PHDRS -implicit-check-not="Section to Segment mapping:" - -# Check that only one copy of the section/segment mapping table is produced. -RUN: llvm-readelf -section-mapping -program-headers %p/Inputs/phdrs-elf.exe-x86_64 \ -RUN: | FileCheck %s -check-prefix ELF64-ONEMAPPING - -ELF32: Elf file type is EXEC (Executable file) -ELF32-NEXT: Entry point 0x8048460 -ELF32-NEXT: There are 10 program headers, starting at offset 52 - -ELF32: Program Headers: -ELF32-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align -ELF32-NEXT: PHDR 0x000034 0x08048034 0x08048034 0x00140 0x00140 R E 0x4 -ELF32-NEXT: INTERP 0x000174 0x08048174 0x08048174 0x00013 0x00013 R 0x1 -ELF32-NEXT: [Requesting program interpreter: /lib/ld-linux.so.2] -ELF32-NEXT: LOAD 0x000000 0x08048000 0x08048000 0x006d0 0x006d0 R E 0x1000 -ELF32-NEXT: LOAD 0x000ef0 0x08049ef0 0x08049ef0 0x00128 0x00140 RW 0x1000 -ELF32-NEXT: DYNAMIC 0x000f08 0x08049f08 0x08049f08 0x000e8 0x000e8 RW 0x4 -ELF32-NEXT: NOTE 0x000188 0x08048188 0x08048188 0x00044 0x00044 R 0x4 -ELF32-NEXT: TLS 0x000ef0 0x08049ef0 0x08049ef0 0x00004 0x00008 R 0x4 -ELF32-NEXT: GNU_EH_FRAME 0x000640 0x08048640 0x08048640 0x0001c 0x0001c R 0x4 -ELF32-NEXT: GNU_STACK 0x000000 0x00000000 0x00000000 0x00000 0x00000 RW 0x4 -ELF32-NEXT: GNU_RELRO 0x000ef0 0x08049ef0 0x08049ef0 0x00110 0x00110 R 0x1 - -ELF32: Section to Segment mapping: -ELF32-NEXT: Segment Sections... -ELF32-NEXT: 00 -ELF32-NEXT: 01 .interp -ELF32-NEXT: 02 .interp .note.ABI-tag .note.gnu.build-id .hash .gnu.hash .dynsym .dynstr .gnu.version .gnu.version_r .rel.dyn .rel.plt .init .plt .text .fini .rodata .eh_frame_hdr .eh_frame -ELF32-NEXT: 03 .tdata .ctors .dtors .jcr .dynamic .got .got.plt .data .bss -ELF32-NEXT: 04 .dynamic -ELF32-NEXT: 05 .note.ABI-tag .note.gnu.build-id -ELF32-NEXT: 06 .tdata .tbss -ELF32-NEXT: 07 .eh_frame_hdr -ELF32-NEXT: 08 -ELF32-NEXT: 09 .tdata .ctors .dtors .jcr .dynamic .got -ELF32-NEXT: None .comment .shstrtab .symtab .strtab - -ELF64-PHDRS: Elf file type is EXEC (Executable file) -ELF64-PHDRS-NEXT: Entry point 0x400610 -ELF64-PHDRS-NEXT: There are 10 program headers, starting at offset 64 - -ELF64-PHDRS: Program Headers: -ELF64-PHDRS-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align -ELF64-PHDRS-NEXT: PHDR 0x000040 0x0000000000400040 0x0000000000400040 0x000230 0x000230 R E 0x8 -ELF64-PHDRS-NEXT: INTERP 0x000270 0x0000000000400270 0x0000000000400270 0x00001c 0x00001c R 0x1 -ELF64-PHDRS-NEXT: [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2] -ELF64-PHDRS-NEXT: LOAD 0x000000 0x0000000000400000 0x0000000000400000 0x000924 0x000924 R E 0x200000 -ELF64-PHDRS-NEXT: LOAD 0x000db4 0x0000000000600db4 0x0000000000600db4 0x000274 0x0002a4 RW 0x200000 -ELF64-PHDRS-NEXT: DYNAMIC 0x000dd0 0x0000000000600dd0 0x0000000000600dd0 0x000210 0x000210 RW 0x8 -ELF64-PHDRS-NEXT: NOTE 0x00028c 0x000000000040028c 0x000000000040028c 0x000044 0x000044 R 0x4 -ELF64-PHDRS-NEXT: TLS 0x000db4 0x0000000000600db4 0x0000000000600db4 0x000004 0x000008 R 0x4 -ELF64-PHDRS-NEXT: GNU_EH_FRAME 0x00083c 0x000000000040083c 0x000000000040083c 0x00002c 0x00002c R 0x4 -ELF64-PHDRS-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0x8 -ELF64-PHDRS-NEXT: GNU_RELRO 0x000db4 0x0000000000600db4 0x0000000000600db4 0x00024c 0x00024c R 0x1 - -ELF64-MAPPING: Section to Segment mapping: -ELF64-MAPPING-NEXT: Segment Sections... -ELF64-MAPPING-NEXT: 00 -ELF64-MAPPING-NEXT: 01 .interp -ELF64-MAPPING-NEXT: 02 .interp .note.ABI-tag .note.gnu.build-id .hash .gnu.hash .dynsym .dynstr .gnu.version .gnu.version_r .rela.dyn .rela.plt .init .plt .text .fini .rodata .eh_frame_hdr .eh_frame -ELF64-MAPPING-NEXT: 03 .tdata .init_array .fini_array .jcr .dynamic .got .got.plt .data .bss -ELF64-MAPPING-NEXT: 04 .dynamic -ELF64-MAPPING-NEXT: 05 .note.ABI-tag .note.gnu.build-id -ELF64-MAPPING-NEXT: 06 .tdata .tbss -ELF64-MAPPING-NEXT: 07 .eh_frame_hdr -ELF64-MAPPING-NEXT: 08 -ELF64-MAPPING-NEXT: 09 .tdata .init_array .fini_array .jcr .dynamic .got -ELF64-MAPPING-NEXT: None .comment .shstrtab .symtab .strtab - -ELF64-ONEMAPPING: Section to Segment mapping: -ELF64-ONEMAPPING-NOT: Section to Segment mapping: +## Check how llvm-readelf dumps program headers and prints sections to segments mapping. + +## Check that -l, --program-headers and --segments are the same option. +# RUN: yaml2obj --docnum=1 -DBITS=32 -DMACHINE=EM_386 %s -o %t32.elf +# RUN: llvm-readelf -l %t32.elf 2>&1 > %t.readelf-l.txt +# RUN: llvm-readelf --program-headers %t32.elf 2>&1 > %t.readelf-pheaders.txt +# RUN: cmp %t.readelf-l.txt %t.readelf-pheaders.txt +# RUN: llvm-readelf --segments %t32.elf 2>&1 > %t.readelf-segments.txt +# RUN: cmp %t.readelf-l.txt %t.readelf-segments.txt + +## Check we are able to print program headers of different types. +# RUN: llvm-readelf -l %t32.elf | \ +# RUN: FileCheck %s --check-prefixes=ELF32,MAPPING --strict-whitespace --match-full-lines + +# RUN: yaml2obj --docnum=1 -DBITS=64 -DMACHINE=EM_X86_64 %s -o %t64.elf +# RUN: llvm-readelf -l %t64.elf | \ +# RUN: FileCheck %s --check-prefixes=ELF64,MAPPING --strict-whitespace --match-full-lines + +# ELF32:There are 23 program headers, starting at offset 52 +# ELF32-EMPTY: +# ELF32-NEXT:Program Headers: +# ELF32-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# ELF32-NEXT: PHDR 0x000314 0x00001000 0x00001000 0x00003 0x00003 W 0x1 +# ELF32-NEXT: PHDR 0x000317 0x00002000 0x00002000 0x00007 0x00007 E 0x1 +# ELF32-NEXT: NULL 0x000317 0x00002000 0x00002000 0x00007 0x00007 E 0x1 +# ELF32-NEXT: DYNAMIC 0x000314 0x00001000 0x00001000 0x00003 0x00003 RWE 0x1 +# ELF32-NEXT: INTERP 0x00031e 0x00003000 0x00003000 0x00004 0x00004 RW 0x1 +# ELF32-NEXT: [Requesting program interpreter: ABC] +# ELF32-NEXT: NOTE 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: SHLIB 0x000314 0x00001000 0x00001000 0x00001 0x00001 0x1 +# ELF32-NEXT: TLS 0x000322 0x00004000 0x00004000 0x00001 0x00001 0x1 +# ELF32-NEXT: : 0x60000000 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_EH_FRAME 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: SUNW_UNWIND 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_STACK 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_RELRO 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: GNU_PROPERTY 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x65a3dbe6 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x65a3dbe7 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x65a41be6 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x6fffffff 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000000 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000001 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000002 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x70000003 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-NEXT: : 0x7fffffff 0x000314 0x00001000 0x00001000 0x00003 0x00003 0x1 +# ELF32-EMPTY: + +# ELF64:There are 23 program headers, starting at offset 64 +# ELF64-EMPTY: +# ELF64-NEXT:Program Headers: +# ELF64-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# ELF64-NEXT: PHDR 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 W 0x1 +# ELF64-NEXT: PHDR 0x00054b 0x0000000000002000 0x0000000000002000 0x000007 0x000007 E 0x1 +# ELF64-NEXT: NULL 0x00054b 0x0000000000002000 0x0000000000002000 0x000007 0x000007 E 0x1 +# ELF64-NEXT: DYNAMIC 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 RWE 0x1 +# ELF64-NEXT: INTERP 0x000552 0x0000000000003000 0x0000000000003000 0x000004 0x000004 RW 0x1 +# ELF64-NEXT: [Requesting program interpreter: ABC] +# ELF64-NEXT: NOTE 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: SHLIB 0x000548 0x0000000000001000 0x0000000000001000 0x000001 0x000001 0x1 +# ELF64-NEXT: TLS 0x000556 0x0000000000004000 0x0000000000004000 0x000001 0x000001 0x1 +# ELF64-NEXT: : 0x60000000 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_EH_FRAME 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: SUNW_UNWIND 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_STACK 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_RELRO 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: GNU_PROPERTY 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x65a3dbe6 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x65a3dbe7 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x65a41be6 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x6fffffff 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000000 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000001 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000002 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x70000003 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-NEXT: : 0x7fffffff 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ELF64-EMPTY: + +# MAPPING: Section to Segment mapping: +# MAPPING-NEXT: Segment Sections... +# MAPPING-NEXT: 00 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 01 .bar.begin .bar.end {{$}} +# MAPPING-NEXT: 02 .bar.begin .bar.end {{$}} +# MAPPING-NEXT: 03 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 04 .interp {{$}} +# MAPPING-NEXT: 05 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 06 .foo.begin {{$}} +# MAPPING-NEXT: 07 .tls {{$}} +# MAPPING-NEXT: 08 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 09 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 10 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 11 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 12 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 13 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 14 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 15 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 16 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 17 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 18 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 19 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 20 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 21 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 22 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: None .unused .strtab .shstrtab {{$}} + +--- !ELF +FileHeader: + Class: ELFCLASS[[BITS]] + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: [[MACHINE]] + Entry: 0x12345678 +Sections: + - Name: .foo.begin + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x1000 + Size: 0x1 + - Name: .foo.end + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Size: 0x2 + - Name: .bar.begin + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x2000 + Size: 0x3 + - Name: .bar.end + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Size: 0x4 + - Name: .interp + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x3000 + Content: "41424300" ## "ABC" + - Name: .tls + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_TLS ] + Address: 0x4000 + Size: 0x1 + - Name: .unused + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_TLS ] + Address: 0x5000 + Size: 0x1 +ProgramHeaders: +## Case 1: an arbitrary segment with sections. + - Type: PT_PHDR + Flags: [ PF_W ] + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 2: another segment with different sections. + - Type: PT_PHDR + Flags: [ PF_X ] + VAddr: 0x2000 + Sections: + - Section: .bar.begin + - Section: .bar.end +## Case 3: the PT_NULL segment. + - Type: PT_NULL + Flags: [ PF_X ] + VAddr: 0x2000 + Sections: + - Section: .bar.begin + - Section: .bar.end +## Case 4: the PT_DYNAMIC segment. + - Type: PT_DYNAMIC + Flags: [ PF_R, PF_W, PF_X ] + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 5: the PT_INTERP segment. + - Type: PT_INTERP + Flags: [ PF_R, PF_W ] + VAddr: 0x3000 + Sections: + - Section: .interp +## Case 6: the PT_NOTE segment. + - Type: PT_NOTE + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 7: the PT_SHLIB segment. + - Type: PT_SHLIB + VAddr: 0x1000 + Sections: + - Section: .foo.begin +## Case 8: the PT_TLS segment. + - Type: PT_TLS + VAddr: 0x4000 + Sections: + - Section: .tls +## Case 9: the PT_LOOS segment. + - Type: 0x60000000 ## PT_LOOS + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 10: the PT_GNU_EH_FRAME segment. + - Type: PT_GNU_EH_FRAME + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 11: the PT_SUNW_UNWIND segment. + - Type: 0x6464e550 ## PT_SUNW_UNWIND + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 12: the PT_GNU_STACK segment. + - Type: PT_GNU_STACK + Sections: + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 13: the PT_GNU_RELRO segment. + - Type: PT_GNU_RELRO + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 14: the PT_GNU_PROPERTY segment. + - Type: PT_GNU_PROPERTY + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 15: the PT_OPENBSD_RANDOMIZE segment. + - Type: 0x65a3dbe6 ## PT_OPENBSD_RANDOMIZE + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 16: the PT_OPENBSD_WXNEEDED segment. + - Type: 0x65a3dbe7 ## PT_OPENBSD_WXNEEDED + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 17: the PT_OPENBSD_BOOTDATA segment. + - Type: 0x65a41be6 ## PT_OPENBSD_BOOTDATA + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 18: the PT_HIOS segment. + - Type: 0x6fffffff ## PT_HIOS + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 19: the PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO segment. + - Type: 0x70000000 ## PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 20: the PT_ARM_EXIDX/PT_MIPS_RTPROC segment. + - Type: 0x70000001 ## PT_ARM_EXIDX, PT_MIPS_RTPROC + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 20: the PT_MIPS_OPTIONS segment. + - Type: 0x70000002 ## PT_MIPS_OPTIONS + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 21: the PT_MIPS_ABIFLAGS segment. + - Type: 0x70000003 ## PT_MIPS_ABIFLAGS + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 22: the PT_HIPROC segment. + - Type: 0x7fffffff ## PT_HIPROC + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end + +## Check how we dump ARM specific program headers. +# RUN: yaml2obj --docnum=1 -DBITS=64 -DMACHINE=EM_ARM %s -o %tarm.elf +# RUN: llvm-readelf --program-headers %tarm.elf | FileCheck %s --check-prefix=ARM + +# ARM: : 0x70000000 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ARM-NEXT: EXIDX 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# ARM-NEXT: : 0x70000002 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 + +## Check how we dump MIPS specific program headers. +# RUN: yaml2obj --docnum=1 -DBITS=64 -DMACHINE=EM_MIPS %s -o %tmips.elf +# RUN: llvm-readelf --program-headers %tmips.elf | FileCheck %s --check-prefix=MIPS + +# MIPS: REGINFO 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# MIPS-NEXT: RTPROC 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# MIPS-NEXT: OPTIONS 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 +# MIPS-NEXT: ABIFLAGS 0x000548 0x0000000000001000 0x0000000000001000 0x000003 0x000003 0x1 + +## Check we report a warning when a program interpreter name is non-null-terminated or when +## PT_INTERP has an offset that goes past the end of the file. +# RUN: yaml2obj --docnum=2 %s -o %t.err + +## Show the size of the output produced. It is used in the YAML below. +# RUN: wc -c < %t.err | FileCheck %s --check-prefix=SIZE +# SIZE: 560 + +## Write the additional 'C', '\0, 'C' bytes to the end. +# RUN: echo -n -e "C\x00C" >> %t.err + +# RUN: llvm-readelf --program-headers %t.err 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err --check-prefix=ERROR-INTERP + +# ERROR-INTERP: Type Offset +# ERROR-INTERP-NEXT: INTERP 0x000[[#%x,OFFSET:0x230]] +# ERROR-INTERP-NEXT: [Requesting program interpreter: C] +# ERROR-INTERP-NEXT: INTERP 0x000[[#OFFSET + 1]] +# ERROR-INTERP-NEXT: [Requesting program interpreter: ] +# ERROR-INTERP-NEXT: INTERP 0x000[[#OFFSET + 2]] +# ERROR-INTERP-NEXT: warning: '[[FILE]]': unable to read program interpreter name at offset 0x[[#OFFSET+2]]: it is not null-terminated +# ERROR-INTERP-NEXT: INTERP 0x000[[#OFFSET + 3]] +# ERROR-INTERP-NEXT: warning: '[[FILE]]': unable to read program interpreter name at offset 0x[[#OFFSET+3]]: it goes past the end of the file (0x[[#OFFSET + 3]]) +# ERROR-INTERP-NEXT: INTERP 0xaabbccddeeff1122 +# ERROR-INTERP-NEXT: warning: '[[FILE]]': unable to read program interpreter name at offset 0xaabbccddeeff1122: it goes past the end of the file (0x[[#OFFSET + 3]]) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +ProgramHeaders: +## Case 1: the offset points to the first additional byte. + - Type: PT_INTERP + Offset: 560 +## Case 1: the offset points to the second additional byte, +## which is a null byte. + - Type: PT_INTERP + Offset: 561 +## Case 3: the offset points to the third additional +## byte, which is the last byte in the file. + - Type: PT_INTERP + Offset: 562 +## Case 4: the offset goes 1 byte past the end of the file. + - Type: PT_INTERP + Offset: 563 +## Case 5: an arbitrary large offset that goes past the end of the file. + - Type: PT_INTERP + Offset: 0xAABBCCDDEEFF1122 diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping-no-shdrs.test b/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping-no-shdrs.test deleted file mode 100644 index 94f827485a583..0000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping-no-shdrs.test +++ /dev/null @@ -1,15 +0,0 @@ -RUN: llvm-objcopy --strip-sections %p/Inputs/phdrs-elf.exe-x86_64 %t.o -RUN: llvm-readelf --section-mapping %t.o | FileCheck %s -CHECK: Section to Segment mapping: -CHECK-NEXT: Segment Sections... -CHECK-NEXT: 00 {{$}} -CHECK-NEXT: 01 {{$}} -CHECK-NEXT: 02 {{$}} -CHECK-NEXT: 03 {{$}} -CHECK-NEXT: 04 {{$}} -CHECK-NEXT: 05 {{$}} -CHECK-NEXT: 06 {{$}} -CHECK-NEXT: 07 {{$}} -CHECK-NEXT: 08 {{$}} -CHECK-NEXT: 09 {{$}} -CHECK-NOT: {{.}} diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping.test b/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping.test new file mode 100644 index 0000000000000..ddbfdd5abd93f --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping.test @@ -0,0 +1,76 @@ +## Check how llvm-readelf prints sections to segments mapping. + +## Check that --section-mapping produces a sections to segments +## mapping and not anything else. +# RUN: yaml2obj %s -o %t64.elf +# RUN: llvm-readelf --section-mapping %t64.elf \ +# RUN: | FileCheck %s --check-prefix=MAPPING --strict-whitespace --match-full-lines --implicit-check-not={{.}} + +# MAPPING: Section to Segment mapping: +# MAPPING-NEXT: Segment Sections... +# MAPPING-NEXT: 00 .foo.begin .foo.end {{$}} +# MAPPING-NEXT: 01 .bar.begin .bar.end {{$}} +# MAPPING-NEXT: None .strtab .shstrtab {{$}} + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + Entry: 0x12345678 +Sections: + - Name: .foo.begin + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x1000 + Size: 0x1 + - Name: .foo.end + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Size: 0x2 + - Name: .bar.begin + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x2000 + Size: 0x3 + - Name: .bar.end + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Size: 0x4 +ProgramHeaders: +## Case 1: an arbitrary segment with sections. + - Type: PT_PHDR + Flags: [ PF_W ] + VAddr: 0x1000 + Sections: + - Section: .foo.begin + - Section: .foo.end +## Case 2: another segment with different sections. + - Type: PT_PHDR + Flags: [ PF_X ] + VAddr: 0x2000 + Sections: + - Section: .bar.begin + - Section: .bar.end + +## Check that --section-mapping=false --program-headers produces just program headers. +# RUN: llvm-readelf --section-mapping=false --program-headers %t64.elf \ +# RUN: | FileCheck %s --check-prefix=SEC-MAPPING-FALSE --implicit-check-not="Section to Segment mapping:" + +# SEC-MAPPING-FALSE: Program Headers: + +## Check that only one copy of the section/segment mapping table is produced +## when both --section-mapping and --program-headers are used. +# RUN: llvm-readelf --section-mapping --program-headers %t64.elf \ +# RUN: | FileCheck %s --check-prefix=MAPPING --implicit-check-not="Section to Segment mapping:" + +## Check the output when an object has no section headers. +## RUN: llvm-objcopy --strip-sections %t64.elf %tno-shdrs.o +## RUN: llvm-readelf --section-mapping %tno-shdrs.o | FileCheck %s --check-prefix=NO-SECHDRS + +# NO-SECHDRS: Section to Segment mapping: +# NO-SECHDRS-NEXT: Segment Sections... +# NO-SECHDRS-NEXT: 00 {{$}} +# NO-SECHDRS-NEXT: 01 {{$}} +# NO-SECHDRS-NOT: {{.}} diff --git a/llvm/test/tools/obj2yaml/coff-bss.s b/llvm/test/tools/obj2yaml/COFF/bss.s similarity index 100% rename from llvm/test/tools/obj2yaml/coff-bss.s rename to llvm/test/tools/obj2yaml/COFF/bss.s diff --git a/llvm/test/tools/obj2yaml/coff-long-file-symbol.test b/llvm/test/tools/obj2yaml/COFF/long-file-symbol.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/coff-long-file-symbol.test rename to llvm/test/tools/obj2yaml/COFF/long-file-symbol.yaml diff --git a/llvm/test/tools/obj2yaml/coff-long-section-name.test b/llvm/test/tools/obj2yaml/COFF/long-section-name.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/coff-long-section-name.test rename to llvm/test/tools/obj2yaml/COFF/long-section-name.yaml diff --git a/llvm/test/tools/obj2yaml/coff-section-aux-symbol.test b/llvm/test/tools/obj2yaml/COFF/section-aux-symbol.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/coff-section-aux-symbol.test rename to llvm/test/tools/obj2yaml/COFF/section-aux-symbol.yaml diff --git a/llvm/test/tools/obj2yaml/coff-weak-external.test b/llvm/test/tools/obj2yaml/COFF/weak-external.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/coff-weak-external.test rename to llvm/test/tools/obj2yaml/COFF/weak-external.yaml diff --git a/llvm/test/tools/obj2yaml/call-graph-profile-section.yaml b/llvm/test/tools/obj2yaml/ELF/call-graph-profile-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/call-graph-profile-section.yaml rename to llvm/test/tools/obj2yaml/ELF/call-graph-profile-section.yaml diff --git a/llvm/test/tools/obj2yaml/duplicate-symbol-and-section-names.test b/llvm/test/tools/obj2yaml/ELF/duplicate-symbol-and-section-names.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/duplicate-symbol-and-section-names.test rename to llvm/test/tools/obj2yaml/ELF/duplicate-symbol-and-section-names.yaml diff --git a/llvm/test/tools/obj2yaml/dynamic-section-arch-tags.test b/llvm/test/tools/obj2yaml/ELF/dynamic-section-arch-tags.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/dynamic-section-arch-tags.test rename to llvm/test/tools/obj2yaml/ELF/dynamic-section-arch-tags.yaml diff --git a/llvm/test/tools/obj2yaml/dynamic-section.test b/llvm/test/tools/obj2yaml/ELF/dynamic-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/dynamic-section.test rename to llvm/test/tools/obj2yaml/ELF/dynamic-section.yaml diff --git a/llvm/test/tools/obj2yaml/elf-emachine.yaml b/llvm/test/tools/obj2yaml/ELF/emachine.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-emachine.yaml rename to llvm/test/tools/obj2yaml/ELF/emachine.yaml diff --git a/llvm/test/tools/obj2yaml/elf-entsize.yaml b/llvm/test/tools/obj2yaml/ELF/entsize.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-entsize.yaml rename to llvm/test/tools/obj2yaml/ELF/entsize.yaml diff --git a/llvm/test/tools/obj2yaml/elf-gnu-hash-section.yaml b/llvm/test/tools/obj2yaml/ELF/gnu-hash-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-gnu-hash-section.yaml rename to llvm/test/tools/obj2yaml/ELF/gnu-hash-section.yaml diff --git a/llvm/test/tools/obj2yaml/elf-gnu-unique-symbols.yaml b/llvm/test/tools/obj2yaml/ELF/gnu-unique-symbols.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-gnu-unique-symbols.yaml rename to llvm/test/tools/obj2yaml/ELF/gnu-unique-symbols.yaml diff --git a/llvm/test/tools/obj2yaml/elf-hash-section.yaml b/llvm/test/tools/obj2yaml/ELF/hash-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-hash-section.yaml rename to llvm/test/tools/obj2yaml/ELF/hash-section.yaml diff --git a/llvm/test/tools/obj2yaml/implicit-sections-order.yaml b/llvm/test/tools/obj2yaml/ELF/implicit-sections-order.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/implicit-sections-order.yaml rename to llvm/test/tools/obj2yaml/ELF/implicit-sections-order.yaml diff --git a/llvm/test/tools/obj2yaml/invalid-section-name.yaml b/llvm/test/tools/obj2yaml/ELF/invalid-section-name.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/invalid-section-name.yaml rename to llvm/test/tools/obj2yaml/ELF/invalid-section-name.yaml diff --git a/llvm/test/tools/obj2yaml/linker-options.yaml b/llvm/test/tools/obj2yaml/ELF/linker-options.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/linker-options.yaml rename to llvm/test/tools/obj2yaml/ELF/linker-options.yaml diff --git a/llvm/test/tools/obj2yaml/elf-llvm-addrsig-section.yaml b/llvm/test/tools/obj2yaml/ELF/llvm-addrsig-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-llvm-addrsig-section.yaml rename to llvm/test/tools/obj2yaml/ELF/llvm-addrsig-section.yaml diff --git a/llvm/test/tools/obj2yaml/llvm-deplibs-section.yaml b/llvm/test/tools/obj2yaml/ELF/llvm-deplibs-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/llvm-deplibs-section.yaml rename to llvm/test/tools/obj2yaml/ELF/llvm-deplibs-section.yaml diff --git a/llvm/test/tools/obj2yaml/elf-mips-eflags.yaml b/llvm/test/tools/obj2yaml/ELF/mips-eflags.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-mips-eflags.yaml rename to llvm/test/tools/obj2yaml/ELF/mips-eflags.yaml diff --git a/llvm/test/tools/obj2yaml/no-symbol-reloc.test b/llvm/test/tools/obj2yaml/ELF/no-symbol-reloc.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/no-symbol-reloc.test rename to llvm/test/tools/obj2yaml/ELF/no-symbol-reloc.yaml diff --git a/llvm/test/tools/obj2yaml/elf-no-symtab.yaml b/llvm/test/tools/obj2yaml/ELF/no-symtab.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-no-symtab.yaml rename to llvm/test/tools/obj2yaml/ELF/no-symtab.yaml diff --git a/llvm/test/tools/obj2yaml/elf-null-section.yaml b/llvm/test/tools/obj2yaml/ELF/null-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-null-section.yaml rename to llvm/test/tools/obj2yaml/ELF/null-section.yaml diff --git a/llvm/test/tools/obj2yaml/elf-ppc64-relocations.yaml b/llvm/test/tools/obj2yaml/ELF/ppc64-relocations.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-ppc64-relocations.yaml rename to llvm/test/tools/obj2yaml/ELF/ppc64-relocations.yaml diff --git a/llvm/test/tools/obj2yaml/program-headers.yaml b/llvm/test/tools/obj2yaml/ELF/program-headers.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/program-headers.yaml rename to llvm/test/tools/obj2yaml/ELF/program-headers.yaml diff --git a/llvm/test/tools/obj2yaml/rel-rela-section.yaml b/llvm/test/tools/obj2yaml/ELF/rel-rela-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/rel-rela-section.yaml rename to llvm/test/tools/obj2yaml/ELF/rel-rela-section.yaml diff --git a/llvm/test/tools/obj2yaml/elf-reladyn-section-shinfo.yaml b/llvm/test/tools/obj2yaml/ELF/reladyn-section-shinfo.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-reladyn-section-shinfo.yaml rename to llvm/test/tools/obj2yaml/ELF/reladyn-section-shinfo.yaml diff --git a/llvm/test/tools/obj2yaml/relocation-addend.yaml b/llvm/test/tools/obj2yaml/ELF/relocation-addend.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/relocation-addend.yaml rename to llvm/test/tools/obj2yaml/ELF/relocation-addend.yaml diff --git a/llvm/test/tools/obj2yaml/relocation-type.yaml b/llvm/test/tools/obj2yaml/ELF/relocation-type.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/relocation-type.yaml rename to llvm/test/tools/obj2yaml/ELF/relocation-type.yaml diff --git a/llvm/test/tools/obj2yaml/relr-section.yaml b/llvm/test/tools/obj2yaml/ELF/relr-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/relr-section.yaml rename to llvm/test/tools/obj2yaml/ELF/relr-section.yaml diff --git a/llvm/test/tools/obj2yaml/section-group.test b/llvm/test/tools/obj2yaml/ELF/section-group.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/section-group.test rename to llvm/test/tools/obj2yaml/ELF/section-group.yaml diff --git a/llvm/test/tools/obj2yaml/section-type.yaml b/llvm/test/tools/obj2yaml/ELF/section-type.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/section-type.yaml rename to llvm/test/tools/obj2yaml/ELF/section-type.yaml diff --git a/llvm/test/tools/obj2yaml/sections-info.yaml b/llvm/test/tools/obj2yaml/ELF/sections-info.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/sections-info.yaml rename to llvm/test/tools/obj2yaml/ELF/sections-info.yaml diff --git a/llvm/test/tools/obj2yaml/elf-shinfo.yaml b/llvm/test/tools/obj2yaml/ELF/shinfo.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-shinfo.yaml rename to llvm/test/tools/obj2yaml/ELF/shinfo.yaml diff --git a/llvm/test/tools/obj2yaml/elf-shlink.yaml b/llvm/test/tools/obj2yaml/ELF/shlink.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-shlink.yaml rename to llvm/test/tools/obj2yaml/ELF/shlink.yaml diff --git a/llvm/test/tools/obj2yaml/elf-sht-symtab-shndx.yaml b/llvm/test/tools/obj2yaml/ELF/sht-symtab-shndx.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-sht-symtab-shndx.yaml rename to llvm/test/tools/obj2yaml/ELF/sht-symtab-shndx.yaml diff --git a/llvm/test/tools/obj2yaml/special-symbol-indices.yaml b/llvm/test/tools/obj2yaml/ELF/special-symbol-indices.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/special-symbol-indices.yaml rename to llvm/test/tools/obj2yaml/ELF/special-symbol-indices.yaml diff --git a/llvm/test/tools/obj2yaml/elf-stack-sizes.yaml b/llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-stack-sizes.yaml rename to llvm/test/tools/obj2yaml/ELF/stack-sizes.yaml diff --git a/llvm/test/tools/obj2yaml/symbol-type.yaml b/llvm/test/tools/obj2yaml/ELF/symbol-type.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/symbol-type.yaml rename to llvm/test/tools/obj2yaml/ELF/symbol-type.yaml diff --git a/llvm/test/tools/obj2yaml/elf-symbol-visibility.yaml b/llvm/test/tools/obj2yaml/ELF/symbol-visibility.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/elf-symbol-visibility.yaml rename to llvm/test/tools/obj2yaml/ELF/symbol-visibility.yaml diff --git a/llvm/test/tools/obj2yaml/verdef-section.yaml b/llvm/test/tools/obj2yaml/ELF/verdef-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/verdef-section.yaml rename to llvm/test/tools/obj2yaml/ELF/verdef-section.yaml diff --git a/llvm/test/tools/obj2yaml/verneed-section.yaml b/llvm/test/tools/obj2yaml/ELF/verneed-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/verneed-section.yaml rename to llvm/test/tools/obj2yaml/ELF/verneed-section.yaml diff --git a/llvm/test/tools/obj2yaml/versym-section.yaml b/llvm/test/tools/obj2yaml/ELF/versym-section.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/versym-section.yaml rename to llvm/test/tools/obj2yaml/ELF/versym-section.yaml diff --git a/llvm/test/tools/obj2yaml/macho-DWARF-debug_aranges-error.yaml b/llvm/test/tools/obj2yaml/MachO/DWARF-debug_aranges-error.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/macho-DWARF-debug_aranges-error.yaml rename to llvm/test/tools/obj2yaml/MachO/DWARF-debug_aranges-error.yaml diff --git a/llvm/test/tools/obj2yaml/basic-minidump.yaml b/llvm/test/tools/obj2yaml/Minidump/basic.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/basic-minidump.yaml rename to llvm/test/tools/obj2yaml/Minidump/basic.yaml diff --git a/llvm/test/tools/obj2yaml/Inputs/aix_xcoff.o b/llvm/test/tools/obj2yaml/XCOFF/Inputs/aix_xcoff.o similarity index 100% rename from llvm/test/tools/obj2yaml/Inputs/aix_xcoff.o rename to llvm/test/tools/obj2yaml/XCOFF/Inputs/aix_xcoff.o diff --git a/llvm/test/tools/obj2yaml/Inputs/aix_xcoff_truncated_file_header.o b/llvm/test/tools/obj2yaml/XCOFF/Inputs/aix_xcoff_truncated_file_header.o similarity index 100% rename from llvm/test/tools/obj2yaml/Inputs/aix_xcoff_truncated_file_header.o rename to llvm/test/tools/obj2yaml/XCOFF/Inputs/aix_xcoff_truncated_file_header.o diff --git a/llvm/test/tools/obj2yaml/aix_xcoff.test b/llvm/test/tools/obj2yaml/XCOFF/aix.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/aix_xcoff.test rename to llvm/test/tools/obj2yaml/XCOFF/aix.yaml diff --git a/llvm/test/tools/obj2yaml/aix_xcoff_truncated_file_header.test b/llvm/test/tools/obj2yaml/XCOFF/truncated-file-header.yaml similarity index 100% rename from llvm/test/tools/obj2yaml/aix_xcoff_truncated_file_header.test rename to llvm/test/tools/obj2yaml/XCOFF/truncated-file-header.yaml diff --git a/llvm/tools/dsymutil/BinaryHolder.cpp b/llvm/tools/dsymutil/BinaryHolder.cpp index 31d32d8d8e804..df3cb7161a81d 100644 --- a/llvm/tools/dsymutil/BinaryHolder.cpp +++ b/llvm/tools/dsymutil/BinaryHolder.cpp @@ -41,12 +41,15 @@ getMachOFatMemoryBuffers(StringRef Filename, MemoryBuffer &Mem, return Buffers; } -Error BinaryHolder::ArchiveEntry::load(StringRef Filename, +Error BinaryHolder::ArchiveEntry::load(IntrusiveRefCntPtr VFS, + StringRef Filename, TimestampTy Timestamp, bool Verbose) { StringRef ArchiveFilename = getArchiveAndObjectName(Filename).first; // Try to load archive and force it to be memory mapped. - auto ErrOrBuff = MemoryBuffer::getFileOrSTDIN(ArchiveFilename, -1, false); + auto ErrOrBuff = (ArchiveFilename == "-") + ? MemoryBuffer::getSTDIN() + : VFS->getBufferForFile(ArchiveFilename, -1, false); if (auto Err = ErrOrBuff.getError()) return errorCodeToError(Err); @@ -83,9 +86,12 @@ Error BinaryHolder::ArchiveEntry::load(StringRef Filename, return Error::success(); } -Error BinaryHolder::ObjectEntry::load(StringRef Filename, bool Verbose) { +Error BinaryHolder::ObjectEntry::load(IntrusiveRefCntPtr VFS, + StringRef Filename, bool Verbose) { // Try to load regular binary and force it to be memory mapped. - auto ErrOrBuff = MemoryBuffer::getFileOrSTDIN(Filename, -1, false); + auto ErrOrBuff = (Filename == "-") + ? MemoryBuffer::getSTDIN() + : VFS->getBufferForFile(Filename, -1, false); if (auto Err = ErrOrBuff.getError()) return errorCodeToError(Err); @@ -223,7 +229,7 @@ BinaryHolder::getObjectEntry(StringRef Filename, TimestampTy Timestamp) { Verbose); } else { ArchiveEntry &AE = ArchiveCache[ArchiveFilename]; - auto Err = AE.load(Filename, Timestamp, Verbose); + auto Err = AE.load(VFS, Filename, Timestamp, Verbose); if (Err) { ArchiveCache.erase(ArchiveFilename); // Don't return the error here: maybe the file wasn't an archive. @@ -240,7 +246,7 @@ BinaryHolder::getObjectEntry(StringRef Filename, TimestampTy Timestamp) { std::lock_guard Lock(ObjectCacheMutex); if (!ObjectCache.count(Filename)) { ObjectEntry &OE = ObjectCache[Filename]; - auto Err = OE.load(Filename, Verbose); + auto Err = OE.load(VFS, Filename, Verbose); if (Err) { ObjectCache.erase(Filename); return std::move(Err); diff --git a/llvm/tools/dsymutil/BinaryHolder.h b/llvm/tools/dsymutil/BinaryHolder.h index 223626ac8cdc3..dee944d826a10 100644 --- a/llvm/tools/dsymutil/BinaryHolder.h +++ b/llvm/tools/dsymutil/BinaryHolder.h @@ -23,6 +23,7 @@ #include "llvm/Support/Chrono.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/VirtualFileSystem.h" #include @@ -37,7 +38,8 @@ class BinaryHolder { public: using TimestampTy = sys::TimePoint; - BinaryHolder(bool Verbose = false) : Verbose(Verbose) {} + BinaryHolder(IntrusiveRefCntPtr VFS, bool Verbose = false) + : VFS(VFS), Verbose(Verbose) {} // Forward declarations for friend declaration. class ObjectEntry; @@ -55,7 +57,8 @@ class BinaryHolder { class ObjectEntry : public EntryBase { public: /// Load the given object binary in memory. - Error load(StringRef Filename, bool Verbose = false); + Error load(IntrusiveRefCntPtr VFS, StringRef Filename, + bool Verbose = false); /// Access all owned ObjectFiles. std::vector getObjects() const; @@ -106,7 +109,8 @@ class BinaryHolder { }; /// Load the given object binary in memory. - Error load(StringRef Filename, TimestampTy Timestamp, bool Verbose = false); + Error load(IntrusiveRefCntPtr VFS, StringRef Filename, + TimestampTy Timestamp, bool Verbose = false); Expected getObjectEntry(StringRef Filename, TimestampTy Timestamp, @@ -133,6 +137,9 @@ class BinaryHolder { StringMap ObjectCache; std::mutex ObjectCacheMutex; + /// Virtual File System instance. + IntrusiveRefCntPtr VFS; + bool Verbose; }; diff --git a/llvm/tools/dsymutil/DebugMap.cpp b/llvm/tools/dsymutil/DebugMap.cpp index 3cd1bb0f7b307..042804b2b762d 100644 --- a/llvm/tools/dsymutil/DebugMap.cpp +++ b/llvm/tools/dsymutil/DebugMap.cpp @@ -234,7 +234,7 @@ MappingTraits::YamlDMO::YamlDMO( dsymutil::DebugMapObject MappingTraits::YamlDMO::denormalize(IO &IO) { - BinaryHolder BinHolder(/* Verbose =*/false); + BinaryHolder BinHolder(vfs::getRealFileSystem(), /* Verbose =*/false); const auto &Ctxt = *reinterpret_cast(IO.getContext()); SmallString<80> Path(Ctxt.PrependPath); StringMap SymbolAddresses; @@ -254,7 +254,12 @@ MappingTraits::YamlDMO::denormalize(IO &IO) { << toString(std::move(Err)) << '\n'; } else { for (const auto &Sym : Object->symbols()) { - uint64_t Address = Sym.getValue(); + Expected AddressOrErr = Sym.getValue(); + if (!AddressOrErr) { + // TODO: Actually report errors helpfully. + consumeError(AddressOrErr.takeError()); + continue; + } Expected Name = Sym.getName(); Expected FlagsOrErr = Sym.getFlags(); if (!Name || !FlagsOrErr || @@ -266,7 +271,7 @@ MappingTraits::YamlDMO::denormalize(IO &IO) { consumeError(Name.takeError()); continue; } - SymbolAddresses[*Name] = Address; + SymbolAddresses[*Name] = *AddressOrErr; } } } diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp index 131b705f94341..0612fd57a2ddd 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp @@ -455,8 +455,8 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) { if (Map.getTriple().isOSDarwin() && !Map.getBinaryPath().empty() && Options.FileType == OutputFileType::Object) return MachOUtils::generateDsymCompanion( - Map, Options.Translator, *Streamer->getAsmPrinter().OutStreamer, - OutFile); + Options.VFS, Map, Options.Translator, + *Streamer->getAsmPrinter().OutStreamer, OutFile); Streamer->finish(); return true; diff --git a/llvm/tools/dsymutil/LinkUtils.h b/llvm/tools/dsymutil/LinkUtils.h index c630be328fa31..eef3338908bec 100644 --- a/llvm/tools/dsymutil/LinkUtils.h +++ b/llvm/tools/dsymutil/LinkUtils.h @@ -13,6 +13,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Remarks/RemarkFormat.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/WithColor.h" #include "llvm/DWARFLinker/DWARFLinker.h" @@ -62,6 +63,10 @@ struct LinkOptions { /// Symbol map translator. SymbolMapTranslator Translator; + /// Virtual File System. + llvm::IntrusiveRefCntPtr VFS = + vfs::getRealFileSystem(); + /// Fields used for linking and placing remarks into the .dSYM bundle. /// @{ diff --git a/llvm/tools/dsymutil/MachODebugMapParser.cpp b/llvm/tools/dsymutil/MachODebugMapParser.cpp index 42b3f2ecb2b7a..617b708561c42 100644 --- a/llvm/tools/dsymutil/MachODebugMapParser.cpp +++ b/llvm/tools/dsymutil/MachODebugMapParser.cpp @@ -23,12 +23,13 @@ using namespace llvm::object; class MachODebugMapParser { public: - MachODebugMapParser(StringRef BinaryPath, ArrayRef Archs, + MachODebugMapParser(llvm::IntrusiveRefCntPtr VFS, + StringRef BinaryPath, ArrayRef Archs, StringRef PathPrefix = "", bool PaperTrailWarnings = false, bool Verbose = false) : BinaryPath(std::string(BinaryPath)), Archs(Archs.begin(), Archs.end()), PathPrefix(std::string(PathPrefix)), - PaperTrailWarnings(PaperTrailWarnings), BinHolder(Verbose), + PaperTrailWarnings(PaperTrailWarnings), BinHolder(VFS, Verbose), CurrentDebugMapObject(nullptr) {} /// Parses and returns the DebugMaps of the input binary. The binary contains @@ -190,7 +191,8 @@ MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary, StringRef BinaryPath) { loadMainBinarySymbols(MainBinary); ArrayRef UUID = MainBinary.getUuid(); - Result = std::make_unique(MainBinary.getArchTriple(), BinaryPath, UUID); + Result = + std::make_unique(MainBinary.getArchTriple(), BinaryPath, UUID); MainBinaryStrings = MainBinary.getStringTableData(); for (const SymbolRef &Symbol : MainBinary.symbols()) { const DataRefImpl &DRI = Symbol.getRawDataRefImpl(); @@ -478,7 +480,7 @@ void MachODebugMapParser::loadCurrentObjectFileSymbols( CurrentObjectAddresses.clear(); for (auto Sym : Obj.symbols()) { - uint64_t Addr = Sym.getValue(); + uint64_t Addr = cantFail(Sym.getValue()); Expected Name = Sym.getName(); if (!Name) { // TODO: Actually report errors helpfully. @@ -562,7 +564,7 @@ void MachODebugMapParser::loadMainBinarySymbols( Section = *SectionOrErr; if (Section == MainBinary.section_end() || Section->isText()) continue; - uint64_t Addr = Sym.getValue(); + uint64_t Addr = cantFail(Sym.getValue()); Expected NameOrErr = Sym.getName(); if (!NameOrErr) { // TODO: Actually report errors helpfully. @@ -583,20 +585,22 @@ void MachODebugMapParser::loadMainBinarySymbols( namespace llvm { namespace dsymutil { llvm::ErrorOr>> -parseDebugMap(StringRef InputFile, ArrayRef Archs, +parseDebugMap(llvm::IntrusiveRefCntPtr VFS, + StringRef InputFile, ArrayRef Archs, StringRef PrependPath, bool PaperTrailWarnings, bool Verbose, bool InputIsYAML) { if (InputIsYAML) return DebugMap::parseYAMLDebugMap(InputFile, PrependPath, Verbose); - MachODebugMapParser Parser(InputFile, Archs, PrependPath, PaperTrailWarnings, - Verbose); + MachODebugMapParser Parser(VFS, InputFile, Archs, PrependPath, + PaperTrailWarnings, Verbose); return Parser.parse(); } -bool dumpStab(StringRef InputFile, ArrayRef Archs, +bool dumpStab(llvm::IntrusiveRefCntPtr VFS, + StringRef InputFile, ArrayRef Archs, StringRef PrependPath) { - MachODebugMapParser Parser(InputFile, Archs, PrependPath, false); + MachODebugMapParser Parser(VFS, InputFile, Archs, PrependPath, false); return Parser.dumpStab(); } } // namespace dsymutil diff --git a/llvm/tools/dsymutil/MachOUtils.cpp b/llvm/tools/dsymutil/MachOUtils.cpp index f9219dc9cbb61..3f736115d7f57 100644 --- a/llvm/tools/dsymutil/MachOUtils.cpp +++ b/llvm/tools/dsymutil/MachOUtils.cpp @@ -332,7 +332,8 @@ static unsigned segmentLoadCommandSize(bool Is64Bit, unsigned NumSections) { // Stream a dSYM companion binary file corresponding to the binary referenced // by \a DM to \a OutFile. The passed \a MS MCStreamer is setup to write to // \a OutFile and it must be using a MachObjectWriter object to do so. -bool generateDsymCompanion(const DebugMap &DM, SymbolMapTranslator &Translator, +bool generateDsymCompanion(llvm::IntrusiveRefCntPtr VFS, + const DebugMap &DM, SymbolMapTranslator &Translator, MCStreamer &MS, raw_fd_ostream &OutFile) { auto &ObjectStreamer = static_cast(MS); MCAssembler &MCAsm = ObjectStreamer.getAssembler(); @@ -343,7 +344,7 @@ bool generateDsymCompanion(const DebugMap &DM, SymbolMapTranslator &Translator, MCAsmLayout Layout(MCAsm); MCAsm.layout(Layout); - BinaryHolder InputBinaryHolder(false); + BinaryHolder InputBinaryHolder(VFS, false); auto ObjectEntry = InputBinaryHolder.getObjectEntry(DM.getBinaryPath()); if (!ObjectEntry) { diff --git a/llvm/tools/dsymutil/MachOUtils.h b/llvm/tools/dsymutil/MachOUtils.h index f64fc68e1b42e..b1cdd44d38e60 100644 --- a/llvm/tools/dsymutil/MachOUtils.h +++ b/llvm/tools/dsymutil/MachOUtils.h @@ -12,6 +12,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/VirtualFileSystem.h" #include @@ -40,7 +41,8 @@ bool generateUniversalBinary(SmallVectorImpl &ArchFiles, StringRef OutputFileName, const LinkOptions &, StringRef SDKPath); -bool generateDsymCompanion(const DebugMap &DM, SymbolMapTranslator &Translator, +bool generateDsymCompanion(llvm::IntrusiveRefCntPtr VFS, + const DebugMap &DM, SymbolMapTranslator &Translator, MCStreamer &MS, raw_fd_ostream &OutFile); std::string getArchName(StringRef Arch); diff --git a/llvm/tools/dsymutil/Options.td b/llvm/tools/dsymutil/Options.td index 5360bf09ac751..9292fb8021511 100644 --- a/llvm/tools/dsymutil/Options.td +++ b/llvm/tools/dsymutil/Options.td @@ -44,7 +44,7 @@ def dump_debug_map: F<"dump-debug-map">, HelpText<"Parse and dump the debug map to standard output. Not DWARF link will take place.">, Group; -def yaml_input: F<"y">, +def yaml_input: Flag<["-", "--"], "y">, HelpText<"Treat the input file is a YAML debug map rather than a binary.">, Group; @@ -52,7 +52,7 @@ def papertrail: F<"papertrail">, HelpText<"Embed warnings in the linked DWARF debug info.">, Group; -def assembly: F<"S">, +def assembly: Flag<["-", "--"], "S">, HelpText<"Output textual assembly instead of a binary dSYM companion file.">, Group; @@ -93,16 +93,16 @@ def: Flag<["-"], "u">, HelpText<"Alias for --update">, Group; -def output: Separate<["--", "-"], "o">, +def output: Separate<["-", "--"], "o">, MetaVarName<"">, HelpText<"Specify the output file. Defaults to .dwarf">, Group; def: Separate<["--", "-"], "out">, Alias, - HelpText<"Alias for --o">, + HelpText<"Alias for -o">, Group; def: Joined<["--", "-"], "out=">, Alias; -def: Joined<["--", "-"], "o=">, Alias; +def: Joined<["-", "--"], "o=">, Alias; def oso_prepend_path: Separate<["--", "-"], "oso-prepend-path">, MetaVarName<"">, diff --git a/llvm/tools/dsymutil/dsymutil.cpp b/llvm/tools/dsymutil/dsymutil.cpp index b0a6e7a1a6726..78503a02ec002 100644 --- a/llvm/tools/dsymutil/dsymutil.cpp +++ b/llvm/tools/dsymutil/dsymutil.cpp @@ -510,15 +510,16 @@ int main(int argc, char **argv) { for (auto &InputFile : Options.InputFiles) { // Dump the symbol table for each input file and requested arch if (Options.DumpStab) { - if (!dumpStab(InputFile, Options.Archs, Options.LinkOpts.PrependPath)) + if (!dumpStab(Options.LinkOpts.VFS, InputFile, Options.Archs, + Options.LinkOpts.PrependPath)) return 1; continue; } auto DebugMapPtrsOrErr = - parseDebugMap(InputFile, Options.Archs, Options.LinkOpts.PrependPath, - Options.PaperTrailWarnings, Options.LinkOpts.Verbose, - Options.InputIsYAMLDebugMap); + parseDebugMap(Options.LinkOpts.VFS, InputFile, Options.Archs, + Options.LinkOpts.PrependPath, Options.PaperTrailWarnings, + Options.LinkOpts.Verbose, Options.InputIsYAMLDebugMap); if (auto EC = DebugMapPtrsOrErr.getError()) { WithColor::error() << "cannot parse the debug map for '" << InputFile @@ -545,7 +546,7 @@ int main(int argc, char **argv) { } // Shared a single binary holder for all the link steps. - BinaryHolder BinHolder; + BinaryHolder BinHolder(Options.LinkOpts.VFS); ThreadPoolStrategy S = hardware_concurrency(Options.LinkOpts.Threads); if (Options.LinkOpts.Threads == 0) { diff --git a/llvm/tools/dsymutil/dsymutil.h b/llvm/tools/dsymutil/dsymutil.h index 3c44b9564befb..f88f57bb20a55 100644 --- a/llvm/tools/dsymutil/dsymutil.h +++ b/llvm/tools/dsymutil/dsymutil.h @@ -35,12 +35,14 @@ class BinaryHolder; /// The file has to be a MachO object file. Multiple debug maps can be /// returned when the file is universal (aka fat) binary. ErrorOr>> -parseDebugMap(StringRef InputFile, ArrayRef Archs, +parseDebugMap(llvm::IntrusiveRefCntPtr VFS, + StringRef InputFile, ArrayRef Archs, StringRef PrependPath, bool PaperTrailWarnings, bool Verbose, bool InputIsYAML); -/// Dump the symbol table -bool dumpStab(StringRef InputFile, ArrayRef Archs, +/// Dump the symbol table. +bool dumpStab(llvm::IntrusiveRefCntPtr VFS, + StringRef InputFile, ArrayRef Archs, StringRef PrependPath = ""); /// Link the Dwarf debug info as directed by the passed DebugMap \p DM into a diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp index 0b249d2220634..50b017345c0c6 100644 --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -871,6 +871,7 @@ static std::unique_ptr createLTO(IndexWriteCallback OnIndexWrite, Conf.OptLevel = options::OptLevel; Conf.PTO.LoopVectorization = options::OptLevel > 1; Conf.PTO.SLPVectorization = options::OptLevel > 1; + Conf.AlwaysEmitRegularLTOObj = !options::obj_path.empty(); if (options::thinlto_index_only) { std::string OldPrefix, NewPrefix; diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp index 4cb808baba45f..981e0812d45ee 100644 --- a/llvm/tools/lli/lli.cpp +++ b/llvm/tools/lli/lli.cpp @@ -942,8 +942,8 @@ int runOrcLazyJIT(const char *ProgName) { orc::JITDylib *JD = J->getJITDylibByName(*JDItr); if (!JD) { JD = &ExitOnErr(J->createJITDylib(*JDItr)); - J->getMainJITDylib().addToSearchOrder(*JD); - JD->addToSearchOrder(J->getMainJITDylib()); + J->getMainJITDylib().addToLinkOrder(*JD); + JD->addToLinkOrder(J->getMainJITDylib()); } IdxToDylib[JITDylibs.getPosition(JDItr - JITDylibs.begin())] = JD; } diff --git a/llvm/tools/llvm-diff/DiffConsumer.cpp b/llvm/tools/llvm-diff/DiffConsumer.cpp index b797143bde1b4..6228ff2bae983 100644 --- a/llvm/tools/llvm-diff/DiffConsumer.cpp +++ b/llvm/tools/llvm-diff/DiffConsumer.cpp @@ -50,15 +50,15 @@ void DiffConsumer::printValue(Value *V, bool isL) { return; } if (V->getType()->isVoidTy()) { - if (isa(V)) { + if (auto *SI = dyn_cast(V)) { out << "store to "; - printValue(cast(V)->getPointerOperand(), isL); - } else if (isa(V)) { + printValue(SI->getPointerOperand(), isL); + } else if (auto *CI = dyn_cast(V)) { out << "call to "; - printValue(cast(V)->getCalledValue(), isL); - } else if (isa(V)) { + printValue(CI->getCalledOperand(), isL); + } else if (auto *II = dyn_cast(V)) { out << "invoke to "; - printValue(cast(V)->getCalledValue(), isL); + printValue(II->getCalledOperand(), isL); } else { out << *V; } diff --git a/llvm/tools/llvm-diff/DifferenceEngine.cpp b/llvm/tools/llvm-diff/DifferenceEngine.cpp index 91befdbe6419c..e54217917e5b5 100644 --- a/llvm/tools/llvm-diff/DifferenceEngine.cpp +++ b/llvm/tools/llvm-diff/DifferenceEngine.cpp @@ -224,7 +224,7 @@ class FunctionDifferenceEngine { bool diffCallSites(CallBase &L, CallBase &R, bool Complain) { // FIXME: call attributes - if (!equivalentAsOperands(L.getCalledValue(), R.getCalledValue())) { + if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand())) { if (Complain) Engine.log("called functions differ"); return true; } @@ -638,7 +638,8 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart, if (!isa(*I)) return; CallInst *LCall = cast(&*I); InvokeInst *RInvoke = cast(RTerm); - if (!equivalentAsOperands(LCall->getCalledValue(), RInvoke->getCalledValue())) + if (!equivalentAsOperands(LCall->getCalledOperand(), + RInvoke->getCalledOperand())) return; if (!LCall->use_empty()) Values[LCall] = RInvoke; @@ -651,7 +652,8 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart, if (!isa(*I)) return; CallInst *RCall = cast(I); InvokeInst *LInvoke = cast(LTerm); - if (!equivalentAsOperands(LInvoke->getCalledValue(), RCall->getCalledValue())) + if (!equivalentAsOperands(LInvoke->getCalledOperand(), + RCall->getCalledOperand())) return; if (!LInvoke->use_empty()) Values[LInvoke] = RCall; diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp index b4f4d97b4e12b..f1f37c9ec6ea6 100644 --- a/llvm/tools/llvm-dwarfdump/Statistics.cpp +++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp @@ -58,14 +58,14 @@ struct PerFunctionStats { unsigned NumParamTypes = 0; /// Number of function parameters with a DW_AT_location. unsigned NumParamLocations = 0; - /// Number of variables. - unsigned NumVars = 0; - /// Number of variables with source location. - unsigned NumVarSourceLocations = 0; - /// Number of variables with type. - unsigned NumVarTypes = 0; - /// Number of variables with DW_AT_location. - unsigned NumVarLocations = 0; + /// Number of local variables. + unsigned NumLocalVars = 0; + /// Number of local variables with source location. + unsigned NumLocalVarSourceLocations = 0; + /// Number of local variables with type. + unsigned NumLocalVarTypes = 0; + /// Number of local variables with DW_AT_location. + unsigned NumLocalVarLocations = 0; }; /// Holds accumulated global statistics about DIEs. @@ -80,21 +80,19 @@ struct GlobalStats { /// Total number of PC range bytes covered by DW_AT_locations of /// formal parameters. unsigned ParamScopeBytesCovered = 0; - /// Total number of PC range bytes in each variable's enclosing scope - /// (only for parameters). + /// Total number of PC range bytes in each parameter's enclosing scope. unsigned ParamScopeBytes = 0; /// Total number of PC range bytes covered by DW_AT_locations with /// the debug entry values (DW_OP_entry_value) (only for parameters). unsigned ParamScopeEntryValueBytesCovered = 0; /// Total number of PC range bytes covered by DW_AT_locations (only for local /// variables). - unsigned VarScopeBytesCovered = 0; - /// Total number of PC range bytes in each variable's enclosing scope - /// (only for local variables). - unsigned VarScopeBytes = 0; + unsigned LocalVarScopeBytesCovered = 0; + /// Total number of PC range bytes in each local variable's enclosing scope. + unsigned LocalVarScopeBytes = 0; /// Total number of PC range bytes covered by DW_AT_locations with /// the debug entry values (DW_OP_entry_value) (only for local variables). - unsigned VarScopeEntryValueBytesCovered = 0; + unsigned LocalVarScopeEntryValueBytesCovered = 0; /// Total number of call site entries (DW_AT_call_file & DW_AT_call_line). unsigned CallSiteEntries = 0; /// Total number of call site DIEs (DW_TAG_call_site). @@ -130,10 +128,10 @@ struct LocationStats { std::vector ParamNonEntryValLocStats{ std::vector(NumOfCoverageCategories, 0)}; /// The debug location statistics for local variables. - std::vector VarLocStats{ + std::vector LocalVarLocStats{ std::vector(NumOfCoverageCategories, 0)}; /// Map non debug entry values coverage for local variables. - std::vector VarNonEntryValLocStats{ + std::vector LocalVarNonEntryValLocStats{ std::vector(NumOfCoverageCategories, 0)}; /// Total number of local variables and function parameters processed. unsigned NumVarParam = 0; @@ -148,8 +146,8 @@ struct LocationStats { static void collectLocStats(uint64_t BytesCovered, uint64_t BytesInScope, std::vector &VarParamLocStats, std::vector &ParamLocStats, - std::vector &VarLocStats, bool IsParam, - bool IsLocalVar) { + std::vector &LocalVarLocStats, + bool IsParam, bool IsLocalVar) { auto getCoverageBucket = [BytesCovered, BytesInScope]() -> unsigned { // No debug location at all for the variable. if (BytesCovered == 0) @@ -168,7 +166,7 @@ static void collectLocStats(uint64_t BytesCovered, uint64_t BytesInScope, if (IsParam) ParamLocStats[CoverageBucket]++; else if (IsLocalVar) - VarLocStats[CoverageBucket]++; + LocalVarLocStats[CoverageBucket]++; } /// Construct an identifier for a given DIE from its Prefix, Name, DeclFileName /// and DeclLine. The identifier aims to be unique for any unique entities, @@ -214,7 +212,7 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, uint64_t BytesEntryValuesCovered = 0; auto &FnStats = FnStatMap[FnPrefix]; bool IsParam = Die.getTag() == dwarf::DW_TAG_formal_parameter; - bool IsVariable = Die.getTag() == dwarf::DW_TAG_variable; + bool IsLocalVar = Die.getTag() == dwarf::DW_TAG_variable; bool IsConstantMember = Die.getTag() == dwarf::DW_TAG_member && Die.find(dwarf::DW_AT_const_value); @@ -230,13 +228,13 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, return; } - if (!IsParam && !IsVariable && !IsConstantMember) { + if (!IsParam && !IsLocalVar && !IsConstantMember) { // Not a variable or constant member. return; } // Ignore declarations of global variables. - if (IsVariable && Die.find(dwarf::DW_AT_declaration)) + if (IsLocalVar && Die.find(dwarf::DW_AT_declaration)) return; if (Die.findRecursively(dwarf::DW_AT_decl_file) && @@ -293,17 +291,17 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, LocStats.NumVarParam++; if (IsParam) LocStats.NumParam++; - else if (IsVariable) + else if (IsLocalVar) LocStats.NumVar++; collectLocStats(BytesCovered, BytesInScope, LocStats.VarParamLocStats, - LocStats.ParamLocStats, LocStats.VarLocStats, IsParam, - IsVariable); + LocStats.ParamLocStats, LocStats.LocalVarLocStats, IsParam, + IsLocalVar); // Non debug entry values coverage statistics. collectLocStats(BytesCovered - BytesEntryValuesCovered, BytesInScope, LocStats.VarParamNonEntryValLocStats, LocStats.ParamNonEntryValLocStats, - LocStats.VarNonEntryValLocStats, IsParam, IsVariable); + LocStats.LocalVarNonEntryValLocStats, IsParam, IsLocalVar); } // Collect PC range coverage data. @@ -324,10 +322,12 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, std::min(BytesInScope, BytesCovered); GlobalStats.ParamScopeBytes += BytesInScope; GlobalStats.ParamScopeEntryValueBytesCovered += BytesEntryValuesCovered; - } else if (IsVariable) { - GlobalStats.VarScopeBytesCovered += std::min(BytesInScope, BytesCovered); - GlobalStats.VarScopeBytes += BytesInScope; - GlobalStats.VarScopeEntryValueBytesCovered += BytesEntryValuesCovered; + } else if (IsLocalVar) { + GlobalStats.LocalVarScopeBytesCovered += + std::min(BytesInScope, BytesCovered); + GlobalStats.LocalVarScopeBytes += BytesInScope; + GlobalStats.LocalVarScopeEntryValueBytesCovered += + BytesEntryValuesCovered; } assert(GlobalStats.ScopeBytesCovered <= GlobalStats.ScopeBytes); } @@ -352,14 +352,14 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, FnStats.NumParamSourceLocations++; if (HasLoc) FnStats.NumParamLocations++; - } else if (IsVariable) { - FnStats.NumVars++; + } else if (IsLocalVar) { + FnStats.NumLocalVars++; if (HasType) - FnStats.NumVarTypes++; + FnStats.NumLocalVarTypes++; if (HasSrcLoc) - FnStats.NumVarSourceLocations++; + FnStats.NumLocalVarSourceLocations++; if (HasLoc) - FnStats.NumVarLocations++; + FnStats.NumLocalVarLocations++; } } @@ -473,33 +473,41 @@ static void printDatum(raw_ostream &OS, const char *Key, json::Value Value) { LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n'); } -static void printLocationStats(raw_ostream &OS, - const char *Key, +static void printLocationStats(raw_ostream &OS, const char *Key, std::vector &LocationStats) { - OS << ",\"" << Key << " with 0% of its scope covered\":" + OS << ",\"" << Key << " with 0% of parent scope covered by DW_AT_location\":" << LocationStats[0]; - LLVM_DEBUG(llvm::dbgs() << Key << " with 0% of its scope covered: " - << LocationStats[0] << '\n'); - OS << ",\"" << Key << " with (0%,10%) of its scope covered\":" + LLVM_DEBUG( + llvm::dbgs() << Key + << " with 0% of parent scope covered by DW_AT_location: \\" + << LocationStats[0] << '\n'); + OS << ",\"" << Key + << " with (0%,10%) of parent scope covered by DW_AT_location\":" << LocationStats[1]; - LLVM_DEBUG(llvm::dbgs() << Key << " with (0%,10%) of its scope covered: " - << LocationStats[1] << '\n'); + LLVM_DEBUG(llvm::dbgs() + << Key + << " with (0%,10%) of parent scope covered by DW_AT_location: " + << LocationStats[1] << '\n'); for (unsigned i = 2; i < NumOfCoverageCategories - 1; ++i) { OS << ",\"" << Key << " with [" << (i - 1) * 10 << "%," << i * 10 - << "%) of its scope covered\":" << LocationStats[i]; + << "%) of parent scope covered by DW_AT_location\":" << LocationStats[i]; LLVM_DEBUG(llvm::dbgs() << Key << " with [" << (i - 1) * 10 << "%," << i * 10 - << "%) of its scope covered: " << LocationStats[i]); + << "%) of parent scope covered by DW_AT_location: " + << LocationStats[i]); } - OS << ",\"" << Key << " with 100% of its scope covered\":" + OS << ",\"" << Key + << " with 100% of parent scope covered by DW_AT_location\":" << LocationStats[NumOfCoverageCategories - 1]; - LLVM_DEBUG(llvm::dbgs() << Key << " with 100% of its scope covered: " - << LocationStats[NumOfCoverageCategories - 1]); + LLVM_DEBUG( + llvm::dbgs() << Key + << " with 100% of parent scope covered by DW_AT_location: " + << LocationStats[NumOfCoverageCategories - 1]); } static void printSectionSizes(raw_ostream &OS, const SectionSizes &Sizes) { for (const auto &DebugSec : Sizes.DebugSectionSizes) - OS << ",\"size of " << DebugSec.getKey() << "\":" << DebugSec.getValue(); + OS << ",\"#bytes in " << DebugSec.getKey() << "\":" << DebugSec.getValue(); } /// \} @@ -531,7 +539,7 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, /// The version number should be increased every time the algorithm is changed /// (including bug fixes). New metrics may be added without increasing the /// version. - unsigned Version = 4; + unsigned Version = 5; unsigned VarParamTotal = 0; unsigned VarParamUnique = 0; unsigned VarParamWithLoc = 0; @@ -543,17 +551,18 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, unsigned ParamWithType = 0; unsigned ParamWithLoc = 0; unsigned ParamWithSrcLoc = 0; - unsigned VarTotal = 0; - unsigned VarWithType = 0; - unsigned VarWithSrcLoc = 0; - unsigned VarWithLoc = 0; + unsigned LocalVarTotal = 0; + unsigned LocalVarWithType = 0; + unsigned LocalVarWithSrcLoc = 0; + unsigned LocalVarWithLoc = 0; for (auto &Entry : Statistics) { PerFunctionStats &Stats = Entry.getValue(); unsigned TotalVars = Stats.VarsInFunction.size() * (Stats.NumFnInlined + Stats.NumFnOutOfLine); // Count variables in global scope. if (!Stats.IsFunction) - TotalVars = Stats.NumVars + Stats.ConstantMembers + Stats.NumArtificial; + TotalVars = + Stats.NumLocalVars + Stats.ConstantMembers + Stats.NumArtificial; unsigned Constants = Stats.ConstantMembers; VarParamWithLoc += Stats.TotalVarWithLoc + Constants; VarParamTotal += TotalVars; @@ -569,10 +578,10 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, ParamWithType += Stats.NumParamTypes; ParamWithLoc += Stats.NumParamLocations; ParamWithSrcLoc += Stats.NumParamSourceLocations; - VarTotal += Stats.NumVars; - VarWithType += Stats.NumVarTypes; - VarWithLoc += Stats.NumVarLocations; - VarWithSrcLoc += Stats.NumVarSourceLocations; + LocalVarTotal += Stats.NumLocalVars; + LocalVarWithType += Stats.NumLocalVarTypes; + LocalVarWithLoc += Stats.NumLocalVarLocations; + LocalVarWithSrcLoc += Stats.NumLocalVarSourceLocations; } // Print summary. @@ -580,57 +589,97 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, OS << "{\"version\":" << Version; LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n"; llvm::dbgs() << "---------------------------------\n"); + printDatum(OS, "file", Filename.str()); printDatum(OS, "format", FormatName); - printDatum(OS, "source functions", NumFunctions); - printDatum(OS, "source functions with location", NumFuncsWithSrcLoc); - printDatum(OS, "inlined functions", NumInlinedFunctions); - printDatum(OS, "inlined funcs with abstract origins", NumAbstractOrigins); - printDatum(OS, "unique source variables", VarParamUnique); - printDatum(OS, "source variables", VarParamTotal); - printDatum(OS, "variables with location", VarParamWithLoc); - printDatum(OS, "call site entries", GlobalStats.CallSiteEntries); - printDatum(OS, "call site DIEs", GlobalStats.CallSiteDIEs); - printDatum(OS, "call site parameter DIEs", GlobalStats.CallSiteParamDIEs); - printDatum(OS, "scope bytes total", GlobalStats.ScopeBytes); - printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered); - printDatum(OS, "entry value scope bytes covered", + + printDatum(OS, "#functions", NumFunctions); + printDatum(OS, "#functions with location", NumFuncsWithSrcLoc); + printDatum(OS, "#inlined functions", NumInlinedFunctions); + printDatum(OS, "#inlined functions with abstract origins", + NumAbstractOrigins); + + // This includes local variables and formal parameters. + printDatum(OS, "#unique source variables", VarParamUnique); + printDatum(OS, "#source variables", VarParamTotal); + printDatum(OS, "#source variables with location", VarParamWithLoc); + + printDatum(OS, "#call site entries", GlobalStats.CallSiteEntries); + printDatum(OS, "#call site DIEs", GlobalStats.CallSiteDIEs); + printDatum(OS, "#call site parameter DIEs", GlobalStats.CallSiteParamDIEs); + + printDatum(OS, "sum_all_variables(#bytes in parent scope)", + GlobalStats.ScopeBytes); + printDatum(OS, + "sum_all_variables(#bytes in parent scope covered by " + "DW_AT_location)", + GlobalStats.ScopeBytesCovered); + printDatum(OS, + "sum_all_variables(#bytes in parent scope covered by " + "DW_OP_entry_value)", GlobalStats.ScopeEntryValueBytesCovered); - printDatum(OS, "formal params scope bytes total", + + printDatum(OS, "sum_all_params(#bytes in parent scope)", GlobalStats.ParamScopeBytes); - printDatum(OS, "formal params scope bytes covered", - GlobalStats.ParamScopeBytesCovered); - printDatum(OS, "formal params entry value scope bytes covered", + printDatum( + OS, + "sum_all_params(#bytes in parent scope covered by DW_AT_location)", + GlobalStats.ParamScopeBytesCovered); + printDatum(OS, + "sum_all_params(#bytes in parent scope covered by " + "DW_OP_entry_value)", GlobalStats.ParamScopeEntryValueBytesCovered); - printDatum(OS, "vars scope bytes total", GlobalStats.VarScopeBytes); - printDatum(OS, "vars scope bytes covered", GlobalStats.VarScopeBytesCovered); - printDatum(OS, "vars entry value scope bytes covered", - GlobalStats.VarScopeEntryValueBytesCovered); - printDatum(OS, "total function size", GlobalStats.FunctionSize); - printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize); - printDatum(OS, "total formal params", ParamTotal); - printDatum(OS, "formal params with source location", ParamWithSrcLoc); - printDatum(OS, "formal params with type", ParamWithType); - printDatum(OS, "formal params with binary location", ParamWithLoc); - printDatum(OS, "total vars", VarTotal); - printDatum(OS, "vars with source location", VarWithSrcLoc); - printDatum(OS, "vars with type", VarWithType); - printDatum(OS, "vars with binary location", VarWithLoc); - printDatum(OS, "total variables procesed by location statistics", - LocStats.NumVarParam); + + printDatum(OS, "sum_all_local_vars(#bytes in parent scope)", + GlobalStats.LocalVarScopeBytes); + printDatum(OS, + "sum_all_local_vars(#bytes in parent scope covered by " + "DW_AT_location)", + GlobalStats.LocalVarScopeBytesCovered); + printDatum(OS, + "sum_all_local_vars(#bytes in parent scope covered by " + "DW_OP_entry_value)", + GlobalStats.LocalVarScopeEntryValueBytesCovered); + + printDatum(OS, "#bytes witin functions", GlobalStats.FunctionSize); + printDatum(OS, "#bytes witin inlined functions", + GlobalStats.InlineFunctionSize); + + // Print the summary for formal parameters. + printDatum(OS, "#params", ParamTotal); + printDatum(OS, "#params with source location", ParamWithSrcLoc); + printDatum(OS, "#params with type", ParamWithType); + printDatum(OS, "#params with binary location", ParamWithLoc); + + // Print the summary for local variables. + printDatum(OS, "#local vars", LocalVarTotal); + printDatum(OS, "#local vars with source location", LocalVarWithSrcLoc); + printDatum(OS, "#local vars with type", LocalVarWithType); + printDatum(OS, "#local vars with binary location", LocalVarWithLoc); + + // Print the debug section sizes. printSectionSizes(OS, Sizes); - printLocationStats(OS, "variables", LocStats.VarParamLocStats); - printLocationStats(OS, "variables (excluding the debug entry values)", + + // Print the location statistics for variables (includes local variables + // and formal parameters). + printDatum(OS, "#variables processed by location statistics", + LocStats.NumVarParam); + printLocationStats(OS, "#variables", LocStats.VarParamLocStats); + printLocationStats(OS, "#variables - entry values", LocStats.VarParamNonEntryValLocStats); - printDatum(OS, "total params procesed by location statistics", - LocStats.NumParam); - printLocationStats(OS, "params", LocStats.ParamLocStats); - printLocationStats(OS, "params (excluding the debug entry values)", + + // Print the location statistics for formal parameters. + printDatum(OS, "#params processed by location statistics", LocStats.NumParam); + printLocationStats(OS, "#params", LocStats.ParamLocStats); + printLocationStats(OS, "#params - entry values", LocStats.ParamNonEntryValLocStats); - printDatum(OS, "total vars procesed by location statistics", LocStats.NumVar); - printLocationStats(OS, "vars", LocStats.VarLocStats); - printLocationStats(OS, "vars (excluding the debug entry values)", - LocStats.VarNonEntryValLocStats); + + // Print the location statistics for local variables. + printDatum(OS, "#local vars processed by location statistics", + LocStats.NumVar); + printLocationStats(OS, "#local vars", LocStats.LocalVarLocStats); + printLocationStats(OS, "#local vars - entry values", + LocStats.LocalVarNonEntryValLocStats); OS << "}\n"; LLVM_DEBUG( llvm::dbgs() << "Total Availability: " diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h index 19a1ed0c401e3..5f67b396ad959 100644 --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h @@ -145,13 +145,6 @@ class CombinationGenerator { assert(!Range.empty() && "The range must not be empty."); rewind(); } - - // Only allow using our custom constructor. - WrappingIterator() = delete; - WrappingIterator(const WrappingIterator &) = delete; - WrappingIterator(WrappingIterator &&) = delete; - WrappingIterator &operator=(const WrappingIterator &) = delete; - WrappingIterator &operator=(WrappingIterator &&) = delete; }; const ArrayRef VariablesChoices; diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 9058a7907b0f9..961cd77c0ecbd 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -657,13 +657,13 @@ Error loadObjects(Session &S) { // Set every dylib to link against every other, in command line order. for (auto *JD : S.JDSearchOrder) { auto LookupFlags = JITDylibLookupFlags::MatchExportedSymbolsOnly; - JITDylibSearchOrder O; + JITDylibSearchOrder LinkOrder; for (auto *JD2 : S.JDSearchOrder) { if (JD2 == JD) continue; - O.push_back(std::make_pair(JD2, LookupFlags)); + LinkOrder.push_back(std::make_pair(JD2, LookupFlags)); } - JD->setSearchOrder(std::move(O)); + JD->setLinkOrder(std::move(LinkOrder)); } } diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/llvm/tools/llvm-objcopy/COFF/Writer.cpp index e35e0474a36d5..6b560890a4c16 100644 --- a/llvm/tools/llvm-objcopy/COFF/Writer.cpp +++ b/llvm/tools/llvm-objcopy/COFF/Writer.cpp @@ -383,6 +383,16 @@ Error COFFWriter::write(bool IsBigObj) { return Buf.commit(); } +Expected COFFWriter::virtualAddressToFileAddress(uint32_t RVA) { + for (const auto &S : Obj.getSections()) { + if (RVA >= S.Header.VirtualAddress && + RVA < S.Header.VirtualAddress + S.Header.SizeOfRawData) + return S.Header.PointerToRawData + RVA - S.Header.VirtualAddress; + } + return createStringError(object_error::parse_failed, + "debug directory payload not found"); +} + // Locate which sections contain the debug directories, iterate over all // the debug_directory structs in there, and set the PointerToRawData field // in all of them, according to their new physical location in the file. @@ -406,10 +416,17 @@ Error COFFWriter::patchDebugDirectory() { uint8_t *End = Ptr + Dir->Size; while (Ptr < End) { debug_directory *Debug = reinterpret_cast(Ptr); - Debug->PointerToRawData = - S.Header.PointerToRawData + Offset + sizeof(debug_directory); - Ptr += sizeof(debug_directory) + Debug->SizeOfData; - Offset += sizeof(debug_directory) + Debug->SizeOfData; + if (!Debug->AddressOfRawData) + return createStringError(object_error::parse_failed, + "debug directory payload outside of " + "mapped sections not supported"); + if (Expected FilePosOrErr = + virtualAddressToFileAddress(Debug->AddressOfRawData)) + Debug->PointerToRawData = *FilePosOrErr; + else + return FilePosOrErr.takeError(); + Ptr += sizeof(debug_directory); + Offset += sizeof(debug_directory); } // Debug directory found and patched, all done. return Error::success(); diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.h b/llvm/tools/llvm-objcopy/COFF/Writer.h index 681a8d5e4a66c..3c0bdcbd5d6f6 100644 --- a/llvm/tools/llvm-objcopy/COFF/Writer.h +++ b/llvm/tools/llvm-objcopy/COFF/Writer.h @@ -45,6 +45,7 @@ class COFFWriter { Error write(bool IsBigObj); Error patchDebugDirectory(); + Expected virtualAddressToFileAddress(uint32_t RVA); public: virtual ~COFFWriter() {} diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp index c1d8c1fa979cd..f7332b7f66fe0 100644 --- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp @@ -288,7 +288,7 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename, Object &Obj) { for (auto &Sec : Obj.sections()) { if (Sec.Name == SecName) { - if (Sec.OriginalData.empty()) + if (Sec.Type == SHT_NOBITS) return createStringError(object_error::parse_failed, "cannot dump section '%s': it has no contents", SecName.str().c_str()); diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp index bc590fad4bed6..8c3ae25967221 100644 --- a/llvm/tools/llvm-objcopy/ELF/Object.cpp +++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp @@ -1101,14 +1101,6 @@ static bool compareSegmentsByOffset(const Segment *A, const Segment *B) { return A->Index < B->Index; } -static bool compareSegmentsByPAddr(const Segment *A, const Segment *B) { - if (A->PAddr < B->PAddr) - return true; - if (A->PAddr > B->PAddr) - return false; - return A->Index < B->Index; -} - void BasicELFBuilder::initFileHeader() { Obj->Flags = 0x0; Obj->Type = ET_REL; @@ -2224,56 +2216,29 @@ Error BinaryWriter::write() { } Error BinaryWriter::finalize() { - // We need a temporary list of segments that has a special order to it - // so that we know that anytime ->ParentSegment is set that segment has - // already had it's offset properly set. We only want to consider the segments - // that will affect layout of allocated sections so we only add those. - std::vector OrderedSegments; - for (const SectionBase &Sec : Obj.allocSections()) - if (Sec.ParentSegment != nullptr) - OrderedSegments.push_back(Sec.ParentSegment); - - // For binary output, we're going to use physical addresses instead of - // virtual addresses, since a binary output is used for cases like ROM - // loading and physical addresses are intended for ROM loading. - // However, if no segment has a physical address, we'll fallback to using - // virtual addresses for all. - if (all_of(OrderedSegments, - [](const Segment *Seg) { return Seg->PAddr == 0; })) - for (Segment *Seg : OrderedSegments) - Seg->PAddr = Seg->VAddr; - - llvm::stable_sort(OrderedSegments, compareSegmentsByPAddr); - - // Because we add a ParentSegment for each section we might have duplicate - // segments in OrderedSegments. If there were duplicates then layoutSegments - // would do very strange things. - auto End = - std::unique(std::begin(OrderedSegments), std::end(OrderedSegments)); - OrderedSegments.erase(End, std::end(OrderedSegments)); - // Compute the section LMA based on its sh_offset and the containing segment's - // p_offset and p_paddr. Also compute the minimum LMA of all sections as - // MinAddr. In the output, the contents between address 0 and MinAddr will be - // skipped. + // p_offset and p_paddr. Also compute the minimum LMA of all non-empty + // sections as MinAddr. In the output, the contents between address 0 and + // MinAddr will be skipped. uint64_t MinAddr = UINT64_MAX; for (SectionBase &Sec : Obj.allocSections()) { if (Sec.ParentSegment != nullptr) Sec.Addr = Sec.Offset - Sec.ParentSegment->Offset + Sec.ParentSegment->PAddr; - MinAddr = std::min(MinAddr, Sec.Addr); + if (Sec.Size > 0) + MinAddr = std::min(MinAddr, Sec.Addr); } // Now that every section has been laid out we just need to compute the total // file size. This might not be the same as the offset returned by // layoutSections, because we want to truncate the last segment to the end of - // its last section, to match GNU objcopy's behaviour. + // its last non-empty section, to match GNU objcopy's behaviour. TotalSize = 0; - for (SectionBase &Sec : Obj.allocSections()) { - Sec.Offset = Sec.Addr - MinAddr; - if (Sec.Type != SHT_NOBITS) + for (SectionBase &Sec : Obj.allocSections()) + if (Sec.Type != SHT_NOBITS && Sec.Size > 0) { + Sec.Offset = Sec.Addr - MinAddr; TotalSize = std::max(TotalSize, Sec.Offset + Sec.Size); - } + } if (Error E = Buf.allocate(TotalSize)) return E; diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp index b4662ec5b5cd6..cf32f00f36153 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -103,6 +103,7 @@ extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd, R.Symbol = nullptr; // We'll fill this field later. R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); R.Scattered = MachOObj.isRelocationScattered(R.Info); + R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); S.Relocations.push_back(R); } @@ -203,12 +204,27 @@ void MachOReader::readSymbolTable(Object &O) const { } void MachOReader::setSymbolInRelocationInfo(Object &O) const { + std::vector Sections; + for (auto &LC : O.LoadCommands) + for (std::unique_ptr
&Sec : LC.Sections) + Sections.push_back(Sec.get()); + for (LoadCommand &LC : O.LoadCommands) for (std::unique_ptr
&Sec : LC.Sections) for (auto &Reloc : Sec->Relocations) - if (!Reloc.Scattered) - Reloc.Symbol = O.SymTable.getSymbolByIndex( - Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian())); + if (!Reloc.Scattered) { + const uint32_t SymbolNum = + Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); + if (Reloc.Extern) { + Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum); + } else { + // FIXME: Refactor error handling in MachOReader and report an error + // if we encounter an invalid relocation. + assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && + "Invalid section index."); + Reloc.Sec = Sections[SymbolNum - 1]; + } + } } void MachOReader::readRebaseInfo(Object &O) const { diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp index db31ef518aa89..1cb67d0647f7a 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -239,11 +239,13 @@ void MachOWriter::writeSections() { memcpy(B.getBufferStart() + Sec->Offset, Sec->Content.data(), Sec->Content.size()); for (size_t Index = 0; Index < Sec->Relocations.size(); ++Index) { - auto RelocInfo = Sec->Relocations[Index]; - if (!RelocInfo.Scattered) - RelocInfo.setPlainRelocationSymbolNum(RelocInfo.Symbol->Index, - IsLittleEndian); - + RelocationInfo RelocInfo = Sec->Relocations[Index]; + if (!RelocInfo.Scattered) { + const uint32_t SymbolNum = RelocInfo.Extern + ? (*RelocInfo.Symbol)->Index + : (*RelocInfo.Sec)->Index; + RelocInfo.setPlainRelocationSymbolNum(SymbolNum, IsLittleEndian); + } if (IsLittleEndian != sys::IsLittleEndianHost) MachO::swapStruct( reinterpret_cast(RelocInfo.Info)); diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp index 89fe9946c73ff..0f28cb054a960 100644 --- a/llvm/tools/llvm-objcopy/MachO/Object.cpp +++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp @@ -60,13 +60,13 @@ Error Object::removeSections( for (const LoadCommand &LC : LoadCommands) for (const std::unique_ptr
&Sec : LC.Sections) for (const RelocationInfo &R : Sec->Relocations) - if (R.Symbol && DeadSymbols.count(R.Symbol)) + if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol)) return createStringError(std::errc::invalid_argument, "symbol '%s' defined in section with index " "'%u' cannot be removed because it is " "referenced by a relocation in section '%s'", - R.Symbol->Name.c_str(), - *(R.Symbol->section()), + (*R.Symbol)->Name.c_str(), + *((*R.Symbol)->section()), Sec->CanonicalName.c_str()); SymTable.removeSymbols(IsDead); for (std::unique_ptr &S : SymTable.Symbols) diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h index dc828e66fcbd7..b0123732f80ae 100644 --- a/llvm/tools/llvm-objcopy/MachO/Object.h +++ b/llvm/tools/llvm-objcopy/MachO/Object.h @@ -107,9 +107,7 @@ struct SymbolEntry { uint16_t n_desc; uint64_t n_value; - bool isExternalSymbol() const { - return n_type & ((MachO::N_EXT | MachO::N_PEXT)); - } + bool isExternalSymbol() const { return n_type & MachO::N_EXT; } bool isLocalSymbol() const { return !isExternalSymbol(); } @@ -162,9 +160,14 @@ struct StringTable { }; struct RelocationInfo { - const SymbolEntry *Symbol; + // The referenced symbol entry. Set if !Scattered && Extern. + Optional Symbol; + // The referenced section. Set if !Scattered && !Extern. + Optional Sec; // True if Info is a scattered_relocation_info. bool Scattered; + // True if the r_symbolnum points to a section number (i.e. r_extern=0). + bool Extern; MachO::any_relocation_info Info; unsigned getPlainRelocationSymbolNum(bool IsLittleEndian) { diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp index 6a66a16b7004a..6d46496ecd4ea 100644 --- a/llvm/tools/llvm-objdump/MachODump.cpp +++ b/llvm/tools/llvm-objdump/MachODump.cpp @@ -230,8 +230,10 @@ struct SymbolSorter { if (!BTypeOrErr) reportError(BTypeOrErr.takeError(), B.getObject()->getFileName()); SymbolRef::Type BType = *BTypeOrErr; - uint64_t AAddr = (AType != SymbolRef::ST_Function) ? 0 : A.getValue(); - uint64_t BAddr = (BType != SymbolRef::ST_Function) ? 0 : B.getValue(); + uint64_t AAddr = + (AType != SymbolRef::ST_Function) ? 0 : cantFail(A.getValue()); + uint64_t BAddr = + (BType != SymbolRef::ST_Function) ? 0 : cantFail(B.getValue()); return AAddr < BAddr; } }; @@ -1267,7 +1269,7 @@ static void CreateSymbolAddressMap(MachOObjectFile *O, SymbolRef::Type ST = unwrapOrError(Symbol.getType(), FileName); if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data || ST == SymbolRef::ST_Other) { - uint64_t Address = Symbol.getValue(); + uint64_t Address = cantFail(Symbol.getValue()); StringRef SymName = unwrapOrError(Symbol.getName(), FileName); if (!SymName.startswith(".objc")) (*AddrMap)[Address] = SymName; @@ -3352,7 +3354,7 @@ static const char *get_symbol_64(uint32_t sect_offset, SectionRef S, // and return its name. const char *SymbolName = nullptr; if (reloc_found && isExtern) { - n_value = Symbol.getValue(); + n_value = cantFail(Symbol.getValue()); StringRef Name = unwrapOrError(Symbol.getName(), info->O->getFileName()); if (!Name.empty()) { SymbolName = Name.data(); @@ -6908,7 +6910,7 @@ static const char *GuessLiteralPointer(uint64_t ReferenceValue, if (info->O->getAnyRelocationPCRel(RE)) { unsigned Type = info->O->getAnyRelocationType(RE); if (Type == MachO::X86_64_RELOC_SIGNED) { - ReferenceValue = Symbol.getValue(); + ReferenceValue = cantFail(Symbol.getValue()); } } } @@ -7449,7 +7451,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, unwrapOrError(Symbol.getType(), MachOOF->getFileName()); if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data || ST == SymbolRef::ST_Other) { - uint64_t Address = Symbol.getValue(); + uint64_t Address = cantFail(Symbol.getValue()); StringRef SymName = unwrapOrError(Symbol.getName(), MachOOF->getFileName()); AddrMap[Address] = SymName; @@ -7528,7 +7530,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, // Start at the address of the symbol relative to the section's address. uint64_t SectSize = Sections[SectIdx].getSize(); - uint64_t Start = Symbols[SymIdx].getValue(); + uint64_t Start = cantFail(Symbols[SymIdx].getValue()); uint64_t SectionAddress = Sections[SectIdx].getAddress(); Start -= SectionAddress; @@ -7549,7 +7551,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, if (NextSymType == SymbolRef::ST_Function) { containsNextSym = Sections[SectIdx].containsSymbol(Symbols[NextSymIdx]); - NextSym = Symbols[NextSymIdx].getValue(); + NextSym = cantFail(Symbols[NextSymIdx].getValue()); NextSym -= SectionAddress; break; } @@ -8208,7 +8210,7 @@ void objdump::printMachOUnwindInfo(const MachOObjectFile *Obj) { if (Section == Obj->section_end()) continue; - uint64_t Addr = SymRef.getValue(); + uint64_t Addr = cantFail(SymRef.getValue()); Symbols.insert(std::make_pair(Addr, SymRef)); } diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index a68a8c562c1f9..6eebf98744aae 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1091,37 +1091,30 @@ static char getMappingSymbolKind(ArrayRef MappingSymbols, return (It - 1)->second; } -static uint64_t -dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, - const ObjectFile *Obj, ArrayRef Bytes, - ArrayRef MappingSymbols) { +static uint64_t dumpARMELFData(uint64_t SectionAddr, uint64_t Index, + uint64_t End, const ObjectFile *Obj, + ArrayRef Bytes, + ArrayRef MappingSymbols) { support::endianness Endian = Obj->isLittleEndian() ? support::little : support::big; - while (Index < End) { - outs() << format("%8" PRIx64 ":", SectionAddr + Index); - outs() << "\t"; - if (Index + 4 <= End) { - dumpBytes(Bytes.slice(Index, 4), outs()); - outs() << "\t.word\t" - << format_hex( - support::endian::read32(Bytes.data() + Index, Endian), 10); - Index += 4; - } else if (Index + 2 <= End) { - dumpBytes(Bytes.slice(Index, 2), outs()); - outs() << "\t\t.short\t" - << format_hex( - support::endian::read16(Bytes.data() + Index, Endian), 6); - Index += 2; - } else { - dumpBytes(Bytes.slice(Index, 1), outs()); - outs() << "\t\t.byte\t" << format_hex(Bytes[0], 4); - ++Index; - } - outs() << "\n"; - if (getMappingSymbolKind(MappingSymbols, Index) != 'd') - break; + outs() << format("%8" PRIx64 ":\t", SectionAddr + Index); + if (Index + 4 <= End) { + dumpBytes(Bytes.slice(Index, 4), outs()); + outs() << "\t.word\t" + << format_hex(support::endian::read32(Bytes.data() + Index, Endian), + 10); + return 4; + } + if (Index + 2 <= End) { + dumpBytes(Bytes.slice(Index, 2), outs()); + outs() << "\t\t.short\t" + << format_hex(support::endian::read16(Bytes.data() + Index, Endian), + 6); + return 2; } - return Index; + dumpBytes(Bytes.slice(Index, 1), outs()); + outs() << "\t\t.byte\t" << format_hex(Bytes[0], 4); + return 1; } static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, @@ -1458,125 +1451,139 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, bool CheckARMELFData = hasMappingSymbols(Obj) && Symbols[SI].Type != ELF::STT_OBJECT && !DisassembleAll; + bool DumpARMELFData = false; while (Index < End) { // ARM and AArch64 ELF binaries can interleave data and text in the // same section. We rely on the markers introduced to understand what // we need to dump. If the data marker is within a function, it is // denoted as a word/short etc. - if (CheckARMELFData && - getMappingSymbolKind(MappingSymbols, Index) == 'd') { - Index = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes, - MappingSymbols); - continue; - } - - // When -z or --disassemble-zeroes are given we always dissasemble - // them. Otherwise we might want to skip zero bytes we see. - if (!DisassembleZeroes) { - uint64_t MaxOffset = End - Index; - // For --reloc: print zero blocks patched by relocations, so that - // relocations can be shown in the dump. - if (RelCur != RelEnd) - MaxOffset = RelCur->getOffset() - Index; - - if (size_t N = - countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { - outs() << "\t\t..." << '\n'; - Index += N; - continue; + if (CheckARMELFData) { + char Kind = getMappingSymbolKind(MappingSymbols, Index); + DumpARMELFData = Kind == 'd'; + if (SecondarySTI) { + if (Kind == 'a') { + STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI; + DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm; + } else if (Kind == 't') { + STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI; + DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm; + } } } - if (SecondarySTI) { - if (getMappingSymbolKind(MappingSymbols, Index) == 'a') { - STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI; - DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm; - } else if (getMappingSymbolKind(MappingSymbols, Index) == 't') { - STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI; - DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm; + if (DumpARMELFData) { + Size = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes, + MappingSymbols); + } else { + // When -z or --disassemble-zeroes are given we always dissasemble + // them. Otherwise we might want to skip zero bytes we see. + if (!DisassembleZeroes) { + uint64_t MaxOffset = End - Index; + // For --reloc: print zero blocks patched by relocations, so that + // relocations can be shown in the dump. + if (RelCur != RelEnd) + MaxOffset = RelCur->getOffset() - Index; + + if (size_t N = + countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { + outs() << "\t\t..." << '\n'; + Index += N; + continue; + } } - } - // Disassemble a real instruction or a data when disassemble all is - // provided - MCInst Inst; - bool Disassembled = DisAsm->getInstruction( - Inst, Size, Bytes.slice(Index), SectionAddr + Index, CommentStream); - if (Size == 0) - Size = 1; - - PIP.printInst(*IP, Disassembled ? &Inst : nullptr, - Bytes.slice(Index, Size), - {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, - outs(), "", *STI, &SP, Obj->getFileName(), &Rels); - outs() << CommentStream.str(); - Comments.clear(); - - // If disassembly has failed, avoid analysing invalid/incomplete - // instruction information. Otherwise, try to resolve the target of a - // call, tail call, etc. to a specific symbol. - if (Disassembled && MIA && - (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) || - MIA->isConditionalBranch(Inst))) { - uint64_t Target; - if (MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target)) { - // In a relocatable object, the target's section must reside in - // the same section as the call instruction or it is accessed - // through a relocation. - // - // In a non-relocatable object, the target may be in any section. - // In that case, locate the section(s) containing the target address - // and find the symbol in one of those, if possible. - // - // N.B. We don't walk the relocations in the relocatable case yet. - std::vector TargetSectionSymbols; - if (!Obj->isRelocatableObject()) { - auto It = llvm::partition_point( - SectionAddresses, - [=](const std::pair &O) { - return O.first <= Target; - }); - uint64_t TargetSecAddr = 0; - while (It != SectionAddresses.begin()) { - --It; - if (TargetSecAddr == 0) - TargetSecAddr = It->first; - if (It->first != TargetSecAddr) - break; - TargetSectionSymbols.push_back(&AllSymbols[It->second]); + // Disassemble a real instruction or a data when disassemble all is + // provided + MCInst Inst; + bool Disassembled = + DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), + SectionAddr + Index, CommentStream); + if (Size == 0) + Size = 1; + + PIP.printInst( + *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size), + {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, outs(), + "", *STI, &SP, Obj->getFileName(), &Rels); + outs() << CommentStream.str(); + Comments.clear(); + + // If disassembly has failed, avoid analysing invalid/incomplete + // instruction information. Otherwise, try to resolve the target + // address (jump target or memory operand address) and print it on the + // right of the instruction. + if (Disassembled && MIA) { + uint64_t Target; + bool PrintTarget = + MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target); + if (!PrintTarget) + if (Optional MaybeTarget = + MIA->evaluateMemoryOperandAddress( + Inst, SectionAddr + Index, Size)) { + Target = *MaybeTarget; + PrintTarget = true; + outs() << " # " << Twine::utohexstr(Target); } - } else { - TargetSectionSymbols.push_back(&Symbols); - } - TargetSectionSymbols.push_back(&AbsoluteSymbols); - - // Find the last symbol in the first candidate section whose offset - // is less than or equal to the target. If there are no such - // symbols, try in the next section and so on, before finally using - // the nearest preceding absolute symbol (if any), if there are no - // other valid symbols. - const SymbolInfoTy *TargetSym = nullptr; - for (const SectionSymbolsTy *TargetSymbols : TargetSectionSymbols) { - auto It = llvm::partition_point( - *TargetSymbols, - [=](const SymbolInfoTy &O) { return O.Addr <= Target; }); - if (It != TargetSymbols->begin()) { - TargetSym = &*(It - 1); - break; + if (PrintTarget) { + // In a relocatable object, the target's section must reside in + // the same section as the call instruction or it is accessed + // through a relocation. + // + // In a non-relocatable object, the target may be in any section. + // In that case, locate the section(s) containing the target + // address and find the symbol in one of those, if possible. + // + // N.B. We don't walk the relocations in the relocatable case yet. + std::vector TargetSectionSymbols; + if (!Obj->isRelocatableObject()) { + auto It = llvm::partition_point( + SectionAddresses, + [=](const std::pair &O) { + return O.first <= Target; + }); + uint64_t TargetSecAddr = 0; + while (It != SectionAddresses.begin()) { + --It; + if (TargetSecAddr == 0) + TargetSecAddr = It->first; + if (It->first != TargetSecAddr) + break; + TargetSectionSymbols.push_back(&AllSymbols[It->second]); + } + } else { + TargetSectionSymbols.push_back(&Symbols); + } + TargetSectionSymbols.push_back(&AbsoluteSymbols); + + // Find the last symbol in the first candidate section whose + // offset is less than or equal to the target. If there are no + // such symbols, try in the next section and so on, before finally + // using the nearest preceding absolute symbol (if any), if there + // are no other valid symbols. + const SymbolInfoTy *TargetSym = nullptr; + for (const SectionSymbolsTy *TargetSymbols : + TargetSectionSymbols) { + auto It = llvm::partition_point( + *TargetSymbols, + [=](const SymbolInfoTy &O) { return O.Addr <= Target; }); + if (It != TargetSymbols->begin()) { + TargetSym = &*(It - 1); + break; + } } - } - if (TargetSym != nullptr) { - uint64_t TargetAddress = TargetSym->Addr; - std::string TargetName = TargetSym->Name.str(); - if (Demangle) - TargetName = demangle(TargetName); - - outs() << " <" << TargetName; - uint64_t Disp = Target - TargetAddress; - if (Disp) - outs() << "+0x" << Twine::utohexstr(Disp); - outs() << '>'; + if (TargetSym != nullptr) { + uint64_t TargetAddress = TargetSym->Addr; + std::string TargetName = TargetSym->Name.str(); + if (Demangle) + TargetName = demangle(TargetName); + + outs() << " <" << TargetName; + uint64_t Disp = Target - TargetAddress; + if (Disp) + outs() << "+0x" << Twine::utohexstr(Disp); + outs() << '>'; + } } } } diff --git a/llvm/tools/llvm-rc/ResourceScriptToken.cpp b/llvm/tools/llvm-rc/ResourceScriptToken.cpp index 1753747887693..2e21f675b92c1 100644 --- a/llvm/tools/llvm-rc/ResourceScriptToken.cpp +++ b/llvm/tools/llvm-rc/ResourceScriptToken.cpp @@ -12,6 +12,7 @@ //===---------------------------------------------------------------------===// #include "ResourceScriptToken.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" #include @@ -201,7 +202,7 @@ bool Tokenizer::advance(size_t Amount) { } bool Tokenizer::skipWhitespaces() { - while (!streamEof() && std::isspace(Data[Pos])) + while (!streamEof() && isSpace(Data[Pos])) advance(); return !streamEof(); } diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index bd4ab0e7dec97..08031ae8f49c8 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -730,6 +730,10 @@ void COFFDumper::printCOFFDebugDirectory() { W.printHex("SizeOfData", D.SizeOfData); W.printHex("AddressOfRawData", D.AddressOfRawData); W.printHex("PointerToRawData", D.PointerToRawData); + // Ideally, if D.AddressOfRawData == 0, we should try to load the payload + // using D.PointerToRawData instead. + if (D.AddressOfRawData == 0) + continue; if (D.Type == COFF::IMAGE_DEBUG_TYPE_CODEVIEW) { const codeview::DebugInfo *DebugInfo; StringRef PDBFileName; diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 9930c03c6d972..b5d9a274e31e4 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -882,10 +882,6 @@ template class GNUStyle : public DumpStyle { std::string getSymbolSectionNdx(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *FirstSym); void printDynamicRelocation(const ELFO *Obj, Elf_Rela R, bool IsRela); - bool checkTLSSections(const Elf_Phdr &Phdr, const Elf_Shdr &Sec); - bool checkoffsets(const Elf_Phdr &Phdr, const Elf_Shdr &Sec); - bool checkVMA(const Elf_Phdr &Phdr, const Elf_Shdr &Sec); - bool checkPTDynamic(const Elf_Phdr &Phdr, const Elf_Shdr &Sec); void printProgramHeaders(const ELFO *Obj); void printSectionMapping(const ELFO *Obj); void printGNUVersionSectionProlog(const ELFFile *Obj, @@ -4022,63 +4018,76 @@ static inline std::string printPhdrFlags(unsigned Flag) { return Str; } -// SHF_TLS sections are only in PT_TLS, PT_LOAD or PT_GNU_RELRO -// PT_TLS must only have SHF_TLS sections template -bool GNUStyle::checkTLSSections(const Elf_Phdr &Phdr, - const Elf_Shdr &Sec) { - return (((Sec.sh_flags & ELF::SHF_TLS) && - ((Phdr.p_type == ELF::PT_TLS) || (Phdr.p_type == ELF::PT_LOAD) || - (Phdr.p_type == ELF::PT_GNU_RELRO))) || - (!(Sec.sh_flags & ELF::SHF_TLS) && Phdr.p_type != ELF::PT_TLS)); +static bool checkTLSSections(const typename ELFT::Phdr &Phdr, + const typename ELFT::Shdr &Sec) { + if (Sec.sh_flags & ELF::SHF_TLS) { + // .tbss must only be shown in the PT_TLS segment. + if (Sec.sh_type == ELF::SHT_NOBITS) + return Phdr.p_type == ELF::PT_TLS; + + // SHF_TLS sections are only shown in PT_TLS, PT_LOAD or PT_GNU_RELRO + // segments. + return (Phdr.p_type == ELF::PT_TLS) || (Phdr.p_type == ELF::PT_LOAD) || + (Phdr.p_type == ELF::PT_GNU_RELRO); + } + + // PT_TLS must only have SHF_TLS sections. + return Phdr.p_type != ELF::PT_TLS; } -// Non-SHT_NOBITS must have its offset inside the segment -// Only non-zero section can be at end of segment template -bool GNUStyle::checkoffsets(const Elf_Phdr &Phdr, const Elf_Shdr &Sec) { +static bool checkOffsets(const typename ELFT::Phdr &Phdr, + const typename ELFT::Shdr &Sec) { + // SHT_NOBITS sections don't need to have an offset inside the segment. if (Sec.sh_type == ELF::SHT_NOBITS) return true; - bool IsSpecial = - (Sec.sh_type == ELF::SHT_NOBITS) && ((Sec.sh_flags & ELF::SHF_TLS) != 0); - // .tbss is special, it only has memory in PT_TLS and has NOBITS properties - auto SectionSize = - (IsSpecial && Phdr.p_type != ELF::PT_TLS) ? 0 : Sec.sh_size; - if (Sec.sh_offset >= Phdr.p_offset) - return ((Sec.sh_offset + SectionSize <= Phdr.p_filesz + Phdr.p_offset) - /*only non-zero sized sections at end*/ - && (Sec.sh_offset + 1 <= Phdr.p_offset + Phdr.p_filesz)); - return false; -} - -// SHF_ALLOC must have VMA inside segment -// Only non-zero section can be at end of segment + + if (Sec.sh_offset < Phdr.p_offset) + return false; + + // Only non-empty sections can be at the end of a segment. + if (Sec.sh_size == 0) + return (Sec.sh_offset + 1 <= Phdr.p_offset + Phdr.p_filesz); + return Sec.sh_offset + Sec.sh_size <= Phdr.p_offset + Phdr.p_filesz; +} + +// Check that an allocatable section belongs to a virtual address +// space of a segment. template -bool GNUStyle::checkVMA(const Elf_Phdr &Phdr, const Elf_Shdr &Sec) { +static bool checkVMA(const typename ELFT::Phdr &Phdr, + const typename ELFT::Shdr &Sec) { if (!(Sec.sh_flags & ELF::SHF_ALLOC)) return true; - bool IsSpecial = + + if (Sec.sh_addr < Phdr.p_vaddr) + return false; + + bool IsTbss = (Sec.sh_type == ELF::SHT_NOBITS) && ((Sec.sh_flags & ELF::SHF_TLS) != 0); - // .tbss is special, it only has memory in PT_TLS and has NOBITS properties - auto SectionSize = - (IsSpecial && Phdr.p_type != ELF::PT_TLS) ? 0 : Sec.sh_size; - if (Sec.sh_addr >= Phdr.p_vaddr) - return ((Sec.sh_addr + SectionSize <= Phdr.p_vaddr + Phdr.p_memsz) && - (Sec.sh_addr + 1 <= Phdr.p_vaddr + Phdr.p_memsz)); - return false; + // .tbss is special, it only has memory in PT_TLS and has NOBITS properties. + bool IsTbssInNonTLS = IsTbss && Phdr.p_type != ELF::PT_TLS; + // Only non-empty sections can be at the end of a segment. + if (Sec.sh_size == 0 || IsTbssInNonTLS) + return Sec.sh_addr + 1 <= Phdr.p_vaddr + Phdr.p_memsz; + return Sec.sh_addr + Sec.sh_size <= Phdr.p_vaddr + Phdr.p_memsz; } -// No section with zero size must be at start or end of PT_DYNAMIC template -bool GNUStyle::checkPTDynamic(const Elf_Phdr &Phdr, const Elf_Shdr &Sec) { - if (Phdr.p_type != ELF::PT_DYNAMIC || Sec.sh_size != 0 || Phdr.p_memsz == 0) +static bool checkPTDynamic(const typename ELFT::Phdr &Phdr, + const typename ELFT::Shdr &Sec) { + if (Phdr.p_type != ELF::PT_DYNAMIC || Phdr.p_memsz == 0 || Sec.sh_size != 0) return true; - // Is section within the phdr both based on offset and VMA ? - return ((Sec.sh_type == ELF::SHT_NOBITS) || - (Sec.sh_offset > Phdr.p_offset && - Sec.sh_offset < Phdr.p_offset + Phdr.p_filesz)) && - (!(Sec.sh_flags & ELF::SHF_ALLOC) || - (Sec.sh_addr > Phdr.p_vaddr && Sec.sh_addr < Phdr.p_memsz)); + + // We get here when we have an empty section. Only non-empty sections can be + // at the start or at the end of PT_DYNAMIC. + // Is section within the phdr both based on offset and VMA? + bool CheckOffset = (Sec.sh_type == ELF::SHT_NOBITS) || + (Sec.sh_offset > Phdr.p_offset && + Sec.sh_offset < Phdr.p_offset + Phdr.p_filesz); + bool CheckVA = !(Sec.sh_flags & ELF::SHF_ALLOC) || + (Sec.sh_addr > Phdr.p_vaddr && Sec.sh_addr < Phdr.p_memsz); + return CheckOffset && CheckVA; } template @@ -4128,8 +4137,31 @@ void GNUStyle::printProgramHeaders(const ELFO *Obj) { for (auto Field : Fields) printField(Field); if (Phdr.p_type == ELF::PT_INTERP) { - OS << "\n [Requesting program interpreter: "; - OS << reinterpret_cast(Obj->base()) + Phdr.p_offset << "]"; + OS << "\n"; + auto ReportBadInterp = [&](const Twine &Msg) { + reportWarning( + createError("unable to read program interpreter name at offset 0x" + + Twine::utohexstr(Phdr.p_offset) + ": " + Msg), + this->FileName); + }; + + if (Phdr.p_offset >= Obj->getBufSize()) { + ReportBadInterp("it goes past the end of the file (0x" + + Twine::utohexstr(Obj->getBufSize()) + ")"); + continue; + } + + const char *Data = + reinterpret_cast(Obj->base()) + Phdr.p_offset; + size_t MaxSize = Obj->getBufSize() - Phdr.p_offset; + size_t Len = strnlen(Data, MaxSize); + if (Len == MaxSize) { + ReportBadInterp("it is not null-terminated"); + continue; + } + + OS << " [Requesting program interpreter: "; + OS << StringRef(Data, Len) << "]"; } OS << "\n"; } @@ -4144,17 +4176,16 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { unwrapOrError(this->FileName, Obj->program_headers())) { std::string Sections; OS << format(" %2.2d ", Phnum++); + // Check if each section is in a segment and then print mapping. for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) { - // Check if each section is in a segment and then print mapping. + if (Sec.sh_type == ELF::SHT_NULL) + continue; + // readelf additionally makes sure it does not print zero sized sections // at end of segments and for PT_DYNAMIC both start and end of section // .tbss must only be shown in PT_TLS section. - bool TbssInNonTLS = (Sec.sh_type == ELF::SHT_NOBITS) && - ((Sec.sh_flags & ELF::SHF_TLS) != 0) && - Phdr.p_type != ELF::PT_TLS; - if (!TbssInNonTLS && checkTLSSections(Phdr, Sec) && - checkoffsets(Phdr, Sec) && checkVMA(Phdr, Sec) && - checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL)) { + if (checkTLSSections(Phdr, Sec) && checkOffsets(Phdr, Sec) && + checkVMA(Phdr, Sec) && checkPTDynamic(Phdr, Sec)) { Sections += unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() + " "; diff --git a/llvm/tools/llvm-shlib/CMakeLists.txt b/llvm/tools/llvm-shlib/CMakeLists.txt index eb42abfc1d409..563e4a100dc47 100644 --- a/llvm/tools/llvm-shlib/CMakeLists.txt +++ b/llvm/tools/llvm-shlib/CMakeLists.txt @@ -164,7 +164,7 @@ if(LLVM_BUILD_LLVM_C_DYLIB AND MSVC) endif() add_custom_command(OUTPUT ${LLVM_EXPORTED_SYMBOL_FILE} - COMMAND ${PYTHON_EXECUTABLE} ${GEN_SCRIPT} --libsfile ${LIBSFILE} ${GEN_UNDERSCORE} --nm "${llvm_nm}" -o ${LLVM_EXPORTED_SYMBOL_FILE} + COMMAND "${Python3_EXECUTABLE}" ${GEN_SCRIPT} --libsfile ${LIBSFILE} ${GEN_UNDERSCORE} --nm "${llvm_nm}" -o ${LLVM_EXPORTED_SYMBOL_FILE} DEPENDS ${LIB_NAMES} ${llvm_nm_target} COMMENT "Generating export list for LLVM-C" VERBATIM ) diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index 62aa940910d49..2e84ca49b6e0b 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -219,42 +219,42 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, Optional P; switch (PGOKindFlag) { - case InstrGen: - P = PGOOptions(ProfileFile, "", "", PGOOptions::IRInstr); - break; - case InstrUse: - P = PGOOptions(ProfileFile, "", ProfileRemappingFile, PGOOptions::IRUse); - break; - case SampleUse: - P = PGOOptions(ProfileFile, "", ProfileRemappingFile, - PGOOptions::SampleUse); - break; - case NoPGO: - if (DebugInfoForProfiling) - P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction, - true); - else - P = None; - } - if (CSPGOKindFlag != NoCSPGO) { - if (P && (P->Action == PGOOptions::IRInstr || - P->Action == PGOOptions::SampleUse)) - errs() << "CSPGOKind cannot be used with IRInstr or SampleUse"; - if (CSPGOKindFlag == CSInstrGen) { - if (CSProfileGenFile.empty()) - errs() << "CSInstrGen needs to specify CSProfileGenFile"; - if (P) { - P->CSAction = PGOOptions::CSIRInstr; - P->CSProfileGenFile = CSProfileGenFile; - } else - P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, - PGOOptions::NoAction, PGOOptions::CSIRInstr); - } else /* CSPGOKindFlag == CSInstrUse */ { - if (!P) - errs() << "CSInstrUse needs to be together with InstrUse"; - P->CSAction = PGOOptions::CSIRUse; - } + case InstrGen: + P = PGOOptions(ProfileFile, "", "", PGOOptions::IRInstr); + break; + case InstrUse: + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, PGOOptions::IRUse); + break; + case SampleUse: + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, + PGOOptions::SampleUse); + break; + case NoPGO: + if (DebugInfoForProfiling) + P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction, + true); + else + P = None; + } + if (CSPGOKindFlag != NoCSPGO) { + if (P && (P->Action == PGOOptions::IRInstr || + P->Action == PGOOptions::SampleUse)) + errs() << "CSPGOKind cannot be used with IRInstr or SampleUse"; + if (CSPGOKindFlag == CSInstrGen) { + if (CSProfileGenFile.empty()) + errs() << "CSInstrGen needs to specify CSProfileGenFile"; + if (P) { + P->CSAction = PGOOptions::CSIRInstr; + P->CSProfileGenFile = CSProfileGenFile; + } else + P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, + PGOOptions::NoAction, PGOOptions::CSIRInstr); + } else /* CSPGOKindFlag == CSInstrUse */ { + if (!P) + errs() << "CSInstrUse needs to be together with InstrUse"; + P->CSAction = PGOOptions::CSIRUse; } + } PassInstrumentationCallbacks PIC; StandardInstrumentations SI; SI.registerCallbacks(PIC); diff --git a/llvm/tools/opt/NewPMDriver.h b/llvm/tools/opt/NewPMDriver.h index 8b5dc3d9b7444..707522cec4b38 100644 --- a/llvm/tools/opt/NewPMDriver.h +++ b/llvm/tools/opt/NewPMDriver.h @@ -22,7 +22,6 @@ namespace llvm { class StringRef; -class LLVMContext; class Module; class TargetMachine; class ToolOutputFile; diff --git a/llvm/tools/opt/PassPrinters.h b/llvm/tools/opt/PassPrinters.h index 692befbdae758..9342c46f2ff66 100644 --- a/llvm/tools/opt/PassPrinters.h +++ b/llvm/tools/opt/PassPrinters.h @@ -14,8 +14,6 @@ #ifndef LLVM_TOOLS_OPT_PASSPRINTERS_H #define LLVM_TOOLS_OPT_PASSPRINTERS_H -#include "llvm/IR/PassManager.h" - namespace llvm { class CallGraphSCCPass; @@ -25,7 +23,6 @@ class LoopPass; class PassInfo; class raw_ostream; class RegionPass; -class Module; FunctionPass *createFunctionPassPrinter(const PassInfo *PI, raw_ostream &out, bool Quiet); diff --git a/llvm/unittests/ADT/DenseSetTest.cpp b/llvm/unittests/ADT/DenseSetTest.cpp index b080aa7f0c650..556bd46537dbf 100644 --- a/llvm/unittests/ADT/DenseSetTest.cpp +++ b/llvm/unittests/ADT/DenseSetTest.cpp @@ -70,6 +70,15 @@ typedef ::testing::Types, DenseSetTestTypes; TYPED_TEST_CASE(DenseSetTest, DenseSetTestTypes); +TYPED_TEST(DenseSetTest, Constructor) { + constexpr unsigned a[] = {1, 2, 4}; + TypeParam set(std::begin(a), std::end(a)); + EXPECT_EQ(3u, set.size()); + EXPECT_EQ(1u, set.count(1)); + EXPECT_EQ(1u, set.count(2)); + EXPECT_EQ(1u, set.count(4)); +} + TYPED_TEST(DenseSetTest, InitializerList) { TypeParam set({1, 2, 1, 4}); EXPECT_EQ(3u, set.size()); diff --git a/llvm/unittests/ADT/StringExtrasTest.cpp b/llvm/unittests/ADT/StringExtrasTest.cpp index 681464e7e7c31..67d573d649752 100644 --- a/llvm/unittests/ADT/StringExtrasTest.cpp +++ b/llvm/unittests/ADT/StringExtrasTest.cpp @@ -23,6 +23,17 @@ TEST(StringExtrasTest, isPrint) { EXPECT_TRUE(isPrint('?')); } +TEST(StringExtrasTest, isSpace) { + EXPECT_TRUE(isSpace(' ')); + EXPECT_TRUE(isSpace('\t')); + EXPECT_TRUE(isSpace('\n')); + EXPECT_TRUE(isSpace('\v')); + EXPECT_TRUE(isSpace('\f')); + EXPECT_TRUE(isSpace('\v')); + EXPECT_FALSE(isSpace('\0')); + EXPECT_FALSE(isSpace('_')); +} + TEST(StringExtrasTest, Join) { std::vector Items; EXPECT_EQ("", join(Items.begin(), Items.end(), " ")); diff --git a/llvm/unittests/ADT/StringMapTest.cpp b/llvm/unittests/ADT/StringMapTest.cpp index 2cad2c19c5bb3..73c91f5fdd392 100644 --- a/llvm/unittests/ADT/StringMapTest.cpp +++ b/llvm/unittests/ADT/StringMapTest.cpp @@ -230,7 +230,7 @@ TEST_F(StringMapTest, StringMapEntryTest) { StringRef(testKeyFirst, testKeyLength), Allocator, 1u); EXPECT_STREQ(testKey, entry->first().data()); EXPECT_EQ(1u, entry->second); - free(entry); + entry->Destroy(Allocator); } // Test insert() method. diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 250e4ee950a27..5e2feae24f9d0 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -773,6 +773,19 @@ TEST_F(ComputeKnownBitsTest, ComputeKnownMulBits) { expectKnownBits(/*zero*/ 95u, /*one*/ 32u); } +TEST_F(ComputeKnownBitsTest, KnownNonZeroShift) { + // %q is known nonzero without known bits. + // Because %q is nonzero, %A[0] is known to be zero. + parseAssembly( + "define i8 @test(i8 %p, i8* %pq) {\n" + " %q = load i8, i8* %pq, !range !0\n" + " %A = shl i8 %p, %q\n" + " ret i8 %A\n" + "}\n" + "!0 = !{ i8 1, i8 5 }\n"); + expectKnownBits(/*zero*/ 1u, /*one*/ 0u); +} + TEST_F(ComputeKnownBitsTest, ComputeKnownFshl) { // fshl(....1111....0000, 00..1111........, 6) // = 11....000000..11 diff --git a/llvm/unittests/Analysis/VectorUtilsTest.cpp b/llvm/unittests/Analysis/VectorUtilsTest.cpp index 1f004a3a09f44..cf58fba373822 100644 --- a/llvm/unittests/Analysis/VectorUtilsTest.cpp +++ b/llvm/unittests/Analysis/VectorUtilsTest.cpp @@ -536,6 +536,24 @@ TEST_F(VFShapeAPITest, API_buildVFShape) { EXPECT_EQ(Shape, Expected); } +TEST_F(VFShapeAPITest, API_getScalarShape) { + buildShape(/*VF*/ 1, /*IsScalable*/ false, /*HasGlobalPred*/ false); + EXPECT_EQ(VFShape::getScalarShape(*CI), Shape); +} + +TEST_F(VFShapeAPITest, API_getVectorizedFunction) { + VFShape ScalarShape = VFShape::getScalarShape(*CI); + EXPECT_EQ(VFDatabase(*CI).getVectorizedFunction(ScalarShape), + M->getFunction("g")); + + buildShape(/*VF*/ 1, /*IsScalable*/ true, /*HasGlobalPred*/ false); + EXPECT_EQ(VFDatabase(*CI).getVectorizedFunction(Shape), nullptr); + buildShape(/*VF*/ 1, /*IsScalable*/ false, /*HasGlobalPred*/ true); + EXPECT_EQ(VFDatabase(*CI).getVectorizedFunction(Shape), nullptr); + buildShape(/*VF*/ 1, /*IsScalable*/ true, /*HasGlobalPred*/ true); + EXPECT_EQ(VFDatabase(*CI).getVectorizedFunction(Shape), nullptr); +} + TEST_F(VFShapeAPITest, API_updateVFShape) { buildShape(/*VF*/ 2, /*IsScalable*/ false, /*HasGlobalPred*/ false); diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp index defd27ee9771c..848cbc0790024 100644 --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -199,4 +199,182 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_SUB) { EXPECT_EQ(Known.One, APInt(8, 0x1)); } +TEST_F(AArch64SelectionDAGTest, isSplatValue_Fixed_BUILD_VECTOR) { + if (!TM) + return; + + TargetLowering TL(*TM); + + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto VecVT = EVT::getVectorVT(Context, IntVT, 16, false); + // Create a BUILD_VECTOR + SDValue Op = DAG->getConstant(1, Loc, VecVT); + EXPECT_EQ(Op->getOpcode(), ISD::BUILD_VECTOR); + EXPECT_TRUE(DAG->isSplatValue(Op, /*AllowUndefs=*/false)); + + APInt UndefElts; + APInt DemandedElts; + EXPECT_FALSE(DAG->isSplatValue(Op, DemandedElts, UndefElts)); + + // Width=16, Mask=3 + DemandedElts = APInt(16, 3); + EXPECT_TRUE(DAG->isSplatValue(Op, DemandedElts, UndefElts)); +} + +TEST_F(AArch64SelectionDAGTest, isSplatValue_Fixed_ADD_of_BUILD_VECTOR) { + if (!TM) + return; + + TargetLowering TL(*TM); + + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto VecVT = EVT::getVectorVT(Context, IntVT, 16, false); + + // Should create BUILD_VECTORs + SDValue Val1 = DAG->getConstant(1, Loc, VecVT); + SDValue Val2 = DAG->getConstant(3, Loc, VecVT); + EXPECT_EQ(Val1->getOpcode(), ISD::BUILD_VECTOR); + SDValue Op = DAG->getNode(ISD::ADD, Loc, VecVT, Val1, Val2); + + EXPECT_TRUE(DAG->isSplatValue(Op, /*AllowUndefs=*/false)); + + APInt UndefElts; + APInt DemandedElts; + EXPECT_FALSE(DAG->isSplatValue(Op, DemandedElts, UndefElts)); + + // Width=16, Mask=3 + DemandedElts = APInt(16, 3); + EXPECT_TRUE(DAG->isSplatValue(Op, DemandedElts, UndefElts)); +} + +TEST_F(AArch64SelectionDAGTest, isSplatValue_Scalable_SPLAT_VECTOR) { + if (!TM) + return; + + TargetLowering TL(*TM); + + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto VecVT = EVT::getVectorVT(Context, IntVT, 16, true); + // Create a SPLAT_VECTOR + SDValue Op = DAG->getConstant(1, Loc, VecVT); + EXPECT_EQ(Op->getOpcode(), ISD::SPLAT_VECTOR); + EXPECT_TRUE(DAG->isSplatValue(Op, /*AllowUndefs=*/false)); + + APInt UndefElts; + APInt DemandedElts; + EXPECT_TRUE(DAG->isSplatValue(Op, DemandedElts, UndefElts)); + + // Width=16, Mask=3. These bits should be ignored. + DemandedElts = APInt(16, 3); + EXPECT_TRUE(DAG->isSplatValue(Op, DemandedElts, UndefElts)); +} + +TEST_F(AArch64SelectionDAGTest, isSplatValue_Scalable_ADD_of_SPLAT_VECTOR) { + if (!TM) + return; + + TargetLowering TL(*TM); + + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto VecVT = EVT::getVectorVT(Context, IntVT, 16, true); + + // Should create SPLAT_VECTORS + SDValue Val1 = DAG->getConstant(1, Loc, VecVT); + SDValue Val2 = DAG->getConstant(3, Loc, VecVT); + EXPECT_EQ(Val1->getOpcode(), ISD::SPLAT_VECTOR); + SDValue Op = DAG->getNode(ISD::ADD, Loc, VecVT, Val1, Val2); + + EXPECT_TRUE(DAG->isSplatValue(Op, /*AllowUndefs=*/false)); + + APInt UndefElts; + APInt DemandedElts; + EXPECT_TRUE(DAG->isSplatValue(Op, DemandedElts, UndefElts)); + + // Width=16, Mask=3. These bits should be ignored. + DemandedElts = APInt(16, 3); + EXPECT_TRUE(DAG->isSplatValue(Op, DemandedElts, UndefElts)); +} + +TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Fixed_BUILD_VECTOR) { + if (!TM) + return; + + TargetLowering TL(*TM); + + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto VecVT = EVT::getVectorVT(Context, IntVT, 16, false); + // Create a BUILD_VECTOR + SDValue Op = DAG->getConstant(1, Loc, VecVT); + EXPECT_EQ(Op->getOpcode(), ISD::BUILD_VECTOR); + + int SplatIdx = -1; + EXPECT_EQ(DAG->getSplatSourceVector(Op, SplatIdx), Op); + EXPECT_EQ(SplatIdx, 0); +} + +TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Fixed_ADD_of_BUILD_VECTOR) { + if (!TM) + return; + + TargetLowering TL(*TM); + + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto VecVT = EVT::getVectorVT(Context, IntVT, 16, false); + + // Should create BUILD_VECTORs + SDValue Val1 = DAG->getConstant(1, Loc, VecVT); + SDValue Val2 = DAG->getConstant(3, Loc, VecVT); + EXPECT_EQ(Val1->getOpcode(), ISD::BUILD_VECTOR); + SDValue Op = DAG->getNode(ISD::ADD, Loc, VecVT, Val1, Val2); + + int SplatIdx = -1; + EXPECT_EQ(DAG->getSplatSourceVector(Op, SplatIdx), Op); + EXPECT_EQ(SplatIdx, 0); +} + +TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Scalable_SPLAT_VECTOR) { + if (!TM) + return; + + TargetLowering TL(*TM); + + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto VecVT = EVT::getVectorVT(Context, IntVT, 16, true); + // Create a SPLAT_VECTOR + SDValue Op = DAG->getConstant(1, Loc, VecVT); + EXPECT_EQ(Op->getOpcode(), ISD::SPLAT_VECTOR); + + int SplatIdx = -1; + EXPECT_EQ(DAG->getSplatSourceVector(Op, SplatIdx), Op); + EXPECT_EQ(SplatIdx, 0); +} + +TEST_F(AArch64SelectionDAGTest, getSplatSourceVector_Scalable_ADD_of_SPLAT_VECTOR) { + if (!TM) + return; + + TargetLowering TL(*TM); + + SDLoc Loc; + auto IntVT = EVT::getIntegerVT(Context, 8); + auto VecVT = EVT::getVectorVT(Context, IntVT, 16, true); + + // Should create SPLAT_VECTORS + SDValue Val1 = DAG->getConstant(1, Loc, VecVT); + SDValue Val2 = DAG->getConstant(3, Loc, VecVT); + EXPECT_EQ(Val1->getOpcode(), ISD::SPLAT_VECTOR); + SDValue Op = DAG->getNode(ISD::ADD, Loc, VecVT, Val1, Val2); + + int SplatIdx = -1; + EXPECT_EQ(DAG->getSplatSourceVector(Op, SplatIdx), Op); + EXPECT_EQ(SplatIdx, 0); +} + } // end anonymous namespace diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp index c9823201f5544..c4a466fdfe085 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp @@ -2008,8 +2008,47 @@ TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRanges) { ASSERT_TRUE((bool)ErrOrSections); std::unique_ptr DwarfContext = DWARFContext::create(*ErrOrSections, 8); - VerifyError(*DwarfContext, - "error: DW_AT_ranges offset is beyond .debug_ranges bounds:"); + VerifyError( + *DwarfContext, + "error: DW_AT_ranges offset is beyond .debug_ranges bounds: 0x00001000"); +} + +TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRnglists) { + // Create a single compile unit with a DW_AT_ranges whose section offset + // isn't valid. + const char *yamldata = R"( + debug_str: + - '' + - /tmp/main.c + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_ranges + Form: DW_FORM_sec_offset + debug_info: + - Length: + TotalLength: 17 + Version: 5 + UnitType: DW_UT_compile + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000001000 + + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata)); + ASSERT_TRUE((bool)ErrOrSections); + std::unique_ptr DwarfContext = + DWARFContext::create(*ErrOrSections, 8); + VerifyError(*DwarfContext, "error: DW_AT_ranges offset is beyond " + ".debug_rnglists bounds: 0x00001000"); } TEST(DWARFDebugInfo, TestDwarfVerifyInvalidStmtList) { diff --git a/llvm/unittests/ExecutionEngine/Orc/RemoteObjectLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/RemoteObjectLayerTest.cpp index 31a4247e27d06..f25ddbbe155af 100644 --- a/llvm/unittests/ExecutionEngine/Orc/RemoteObjectLayerTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/RemoteObjectLayerTest.cpp @@ -16,6 +16,19 @@ using namespace llvm; using namespace llvm::orc; +// Writing 64-bit XCOFF isn't supported yet, so we need to disable some tests on +// AIX till it is. +#ifdef _AIX +#define SKIPTEST_IF_UNSUPPORTED() \ + do { \ + return; \ + } while (false) +#else +#define SKIPTEST_IF_UNSUPPORTED() \ + do { \ + } while (false) +#endif + namespace { class MockObjectLayer { @@ -109,6 +122,7 @@ MockObjectLayer::ObjectPtr createTestObject() { } TEST(RemoteObjectLayer, AddObject) { + SKIPTEST_IF_UNSUPPORTED(); llvm::orc::rpc::registerStringError(); auto TestObject = createTestObject(); if (!TestObject) @@ -166,6 +180,7 @@ TEST(RemoteObjectLayer, AddObject) { } TEST(RemoteObjectLayer, AddObjectFailure) { + SKIPTEST_IF_UNSUPPORTED(); llvm::orc::rpc::registerStringError(); auto TestObject = createTestObject(); if (!TestObject) @@ -221,6 +236,7 @@ TEST(RemoteObjectLayer, AddObjectFailure) { TEST(RemoteObjectLayer, RemoveObject) { + SKIPTEST_IF_UNSUPPORTED(); llvm::orc::rpc::registerStringError(); auto TestObject = createTestObject(); if (!TestObject) @@ -268,6 +284,7 @@ TEST(RemoteObjectLayer, RemoveObject) { } TEST(RemoteObjectLayer, RemoveObjectFailure) { + SKIPTEST_IF_UNSUPPORTED(); llvm::orc::rpc::registerStringError(); auto TestObject = createTestObject(); if (!TestObject) @@ -324,6 +341,7 @@ TEST(RemoteObjectLayer, RemoveObjectFailure) { } TEST(RemoteObjectLayer, FindSymbol) { + SKIPTEST_IF_UNSUPPORTED(); llvm::orc::rpc::registerStringError(); auto TestObject = createTestObject(); if (!TestObject) @@ -406,6 +424,7 @@ TEST(RemoteObjectLayer, FindSymbol) { } TEST(RemoteObjectLayer, FindSymbolIn) { + SKIPTEST_IF_UNSUPPORTED(); llvm::orc::rpc::registerStringError(); auto TestObject = createTestObject(); if (!TestObject) @@ -484,6 +503,7 @@ TEST(RemoteObjectLayer, FindSymbolIn) { } TEST(RemoteObjectLayer, EmitAndFinalize) { + SKIPTEST_IF_UNSUPPORTED(); llvm::orc::rpc::registerStringError(); auto TestObject = createTestObject(); if (!TestObject) @@ -532,6 +552,7 @@ TEST(RemoteObjectLayer, EmitAndFinalize) { } TEST(RemoteObjectLayer, EmitAndFinalizeFailure) { + SKIPTEST_IF_UNSUPPORTED(); llvm::orc::rpc::registerStringError(); auto TestObject = createTestObject(); if (!TestObject) diff --git a/llvm/unittests/IR/AbstractCallSiteTest.cpp b/llvm/unittests/IR/AbstractCallSiteTest.cpp new file mode 100644 index 0000000000000..ddb10911ad028 --- /dev/null +++ b/llvm/unittests/IR/AbstractCallSiteTest.cpp @@ -0,0 +1,55 @@ +//===----- AbstractCallSiteTest.cpp - AbstractCallSite Unittests ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/AbstractCallSite.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; + +static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { + SMDiagnostic Err; + std::unique_ptr Mod = parseAssemblyString(IR, Err, C); + if (!Mod) + Err.print("AbstractCallSiteTests", errs()); + return Mod; +} + +TEST(AbstractCallSite, CallbackCall) { + LLVMContext C; + + const char *IR = + "define void @callback(i8* %X, i32* %A) {\n" + " ret void\n" + "}\n" + "define void @foo(i32* %A) {\n" + " call void (i32, void (i8*, ...)*, ...) @broker(i32 1, void (i8*, ...)* bitcast (void (i8*, i32*)* @callback to void (i8*, ...)*), i32* %A)\n" + " ret void\n" + "}\n" + "declare !callback !0 void @broker(i32, void (i8*, ...)*, ...)\n" + "!0 = !{!1}\n" + "!1 = !{i64 1, i64 -1, i1 true}"; + + std::unique_ptr M = parseIR(C, IR); + ASSERT_TRUE(M); + + Function *Callback = M->getFunction("callback"); + ASSERT_NE(Callback, nullptr); + + const Use *CallbackUse = Callback->getSingleUndroppableUse(); + ASSERT_NE(CallbackUse, nullptr); + + AbstractCallSite ACS(CallbackUse); + EXPECT_TRUE(ACS); + EXPECT_TRUE(ACS.isCallbackCall()); + EXPECT_TRUE(ACS.isCallee(CallbackUse)); + EXPECT_EQ(ACS.getCalledFunction(), Callback); +} diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index 4241851dfad78..4634bf89059a7 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_unittest(IRTests + AbstractCallSiteTest.cpp AsmWriterTest.cpp AttributesTest.cpp BasicBlockTest.cpp diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp index 93247908d8a77..c7ec3a9333023 100644 --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -6,11 +6,25 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/DebugInfo.h" +#include "llvm/AsmParser/Parser.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" using namespace llvm; +static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { + SMDiagnostic Err; + std::unique_ptr Mod = parseAssemblyString(IR, Err, C); + if (!Mod) + Err.print("DebugInfoTest", errs()); + return Mod; +} + namespace { TEST(DINodeTest, getFlag) { @@ -79,4 +93,56 @@ TEST(DINodeTest, splitFlags) { #undef CHECK_SPLIT } +TEST(StripTest, LoopMetadata) { + LLVMContext C; + std::unique_ptr M = parseIR(C, R"( + define void @f() !dbg !5 { + ret void, !dbg !10, !llvm.loop !11 + } + + !llvm.dbg.cu = !{!0} + !llvm.debugify = !{!3, !3} + !llvm.module.flags = !{!4} + + !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) + !1 = !DIFile(filename: "loop.ll", directory: "/") + !2 = !{} + !3 = !{i32 1} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = distinct !DISubprogram(name: "f", linkageName: "f", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !7) + !6 = !DISubroutineType(types: !2) + !7 = !{!8} + !8 = !DILocalVariable(name: "1", scope: !5, file: !1, line: 1, type: !9) + !9 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) + !10 = !DILocation(line: 1, column: 1, scope: !5) + !11 = distinct !{!11, !10, !10} +)"); + + // Look up the debug info emission kind for the CU via the loop metadata + // attached to the terminator. If, when stripping non-line table debug info, + // we update the terminator's metadata correctly, we should be able to + // observe the change in emission kind for the CU. + auto getEmissionKind = [&]() { + Instruction &I = *M->getFunction("f")->getEntryBlock().getFirstNonPHI(); + MDNode *LoopMD = I.getMetadata(LLVMContext::MD_loop); + return cast(LoopMD->getOperand(1)) + ->getScope() + ->getSubprogram() + ->getUnit() + ->getEmissionKind(); + }; + + EXPECT_EQ(getEmissionKind(), DICompileUnit::FullDebug); + + bool Changed = stripNonLineTableDebugInfo(*M); + EXPECT_TRUE(Changed); + + EXPECT_EQ(getEmissionKind(), DICompileUnit::LineTablesOnly); + + bool BrokenDebugInfo = false; + bool HardError = verifyModule(*M, &errs(), &BrokenDebugInfo); + EXPECT_FALSE(HardError); + EXPECT_FALSE(BrokenDebugInfo); +} + } // end namespace diff --git a/llvm/unittests/IR/LegacyPassManagerTest.cpp b/llvm/unittests/IR/LegacyPassManagerTest.cpp index 1d4b3b835b695..b7801b52481dd 100644 --- a/llvm/unittests/IR/LegacyPassManagerTest.cpp +++ b/llvm/unittests/IR/LegacyPassManagerTest.cpp @@ -687,9 +687,9 @@ namespace llvm { Passes.add(P); Passes.run(*M); ASSERT_EQ(P->SetupWorked, 1U); - ASSERT_EQ(P->NumSCCs, 5U); - ASSERT_EQ(P->NumFns, 8U); - ASSERT_EQ(P->NumFnDecls, 3U); + ASSERT_EQ(P->NumSCCs, 4U); + ASSERT_EQ(P->NumFns, 6U); + ASSERT_EQ(P->NumFnDecls, 1U); ASSERT_EQ(M->getFunctionList().size(), 3U); ASSERT_EQ(P->NumExtCalledBefore, /* test1, 2a, 2b, 3, 4 */ 5U); ASSERT_EQ(P->NumExtCalledAfter, /* test1, 3repl, 4 */ 3U); diff --git a/llvm/unittests/IR/VectorTypesTest.cpp b/llvm/unittests/IR/VectorTypesTest.cpp index f631a1d4a7c28..b55ea57ba502d 100644 --- a/llvm/unittests/IR/VectorTypesTest.cpp +++ b/llvm/unittests/IR/VectorTypesTest.cpp @@ -14,72 +14,110 @@ using namespace llvm; namespace { + +#define EXPECT_VTY_EQ(LHS, RHS) \ + ASSERT_NE(LHS, nullptr) << #LHS << " must not be null"; \ + ASSERT_NE(RHS, nullptr) << #RHS << " must not be null"; \ + EXPECT_EQ(LHS, RHS) << "Expect that " << #LHS << " == " << #RHS << " where " \ + << #LHS << " = " << *LHS << " and " << #RHS << " = " \ + << *RHS; + +#define EXPECT_VTY_NE(LHS, RHS) \ + ASSERT_NE(LHS, nullptr) << #LHS << " must not be null"; \ + ASSERT_NE(RHS, nullptr) << #RHS << " must not be null"; \ + EXPECT_NE(LHS, RHS) << "Expect that " << #LHS << " != " << #RHS << " where " \ + << #LHS << " = " << *LHS << " and " << #RHS << " = " \ + << *RHS; + TEST(VectorTypesTest, FixedLength) { LLVMContext Ctx; + Type *Int8Ty = Type::getInt8Ty(Ctx); Type *Int16Ty = Type::getInt16Ty(Ctx); Type *Int32Ty = Type::getInt32Ty(Ctx); Type *Int64Ty = Type::getInt64Ty(Ctx); Type *Float64Ty = Type::getDoubleTy(Ctx); - VectorType *V8Int32Ty = VectorType::get(Int32Ty, 8); - ASSERT_TRUE(isa(V8Int32Ty)); + auto *V16Int8Ty = FixedVectorType::get(Int8Ty, 16); + ASSERT_NE(nullptr, V16Int8Ty); + EXPECT_EQ(V16Int8Ty->getNumElements(), 16U); + EXPECT_EQ(V16Int8Ty->getElementType()->getScalarSizeInBits(), 8U); + + auto *V8Int32Ty = dyn_cast(VectorType::get(Int32Ty, 8)); + ASSERT_NE(nullptr, V8Int32Ty); EXPECT_EQ(V8Int32Ty->getNumElements(), 8U); EXPECT_EQ(V8Int32Ty->getElementType()->getScalarSizeInBits(), 32U); - VectorType *V8Int16Ty = VectorType::get(Int16Ty, {8, false}); - ASSERT_TRUE(isa(V8Int16Ty)); + auto *V8Int32TyExplicitFalse = + dyn_cast(VectorType::get(Int32Ty, 8, false)); + EXPECT_VTY_EQ(V8Int32Ty, V8Int32TyExplicitFalse); + + auto *V8Int8Ty = + dyn_cast(VectorType::get(Int8Ty, V8Int32Ty)); + EXPECT_VTY_NE(V8Int32Ty, V8Int8Ty); + EXPECT_EQ(V8Int8Ty->getElementCount(), V8Int32Ty->getElementCount()); + EXPECT_EQ(V8Int8Ty->getElementType()->getScalarSizeInBits(), 8U); + + auto *V8Int32Ty2 = + dyn_cast(VectorType::get(Int32Ty, V8Int32Ty)); + EXPECT_VTY_EQ(V8Int32Ty, V8Int32Ty2); + + auto *V8Int16Ty = + dyn_cast(VectorType::get(Int16Ty, {8, false})); + ASSERT_NE(nullptr, V8Int16Ty); EXPECT_EQ(V8Int16Ty->getNumElements(), 8U); EXPECT_EQ(V8Int16Ty->getElementType()->getScalarSizeInBits(), 16U); ElementCount EltCnt(4, false); - VectorType *V4Int64Ty = VectorType::get(Int64Ty, EltCnt); - ASSERT_TRUE(isa(V4Int64Ty)); + auto *V4Int64Ty = dyn_cast(VectorType::get(Int64Ty, EltCnt)); + ASSERT_NE(nullptr, V4Int64Ty); EXPECT_EQ(V4Int64Ty->getNumElements(), 4U); EXPECT_EQ(V4Int64Ty->getElementType()->getScalarSizeInBits(), 64U); - VectorType *V2Int64Ty = VectorType::get(Int64Ty, EltCnt/2); - ASSERT_TRUE(isa(V2Int64Ty)); + auto *V2Int64Ty = + dyn_cast(VectorType::get(Int64Ty, EltCnt / 2)); + ASSERT_NE(nullptr, V2Int64Ty); EXPECT_EQ(V2Int64Ty->getNumElements(), 2U); EXPECT_EQ(V2Int64Ty->getElementType()->getScalarSizeInBits(), 64U); - VectorType *V8Int64Ty = VectorType::get(Int64Ty, EltCnt*2); - ASSERT_TRUE(isa(V8Int64Ty)); + auto *V8Int64Ty = + dyn_cast(VectorType::get(Int64Ty, EltCnt * 2)); + ASSERT_NE(nullptr, V8Int64Ty); EXPECT_EQ(V8Int64Ty->getNumElements(), 8U); EXPECT_EQ(V8Int64Ty->getElementType()->getScalarSizeInBits(), 64U); - VectorType *V4Float64Ty = VectorType::get(Float64Ty, EltCnt); - ASSERT_TRUE(isa(V4Float64Ty)); + auto *V4Float64Ty = + dyn_cast(VectorType::get(Float64Ty, EltCnt)); + ASSERT_NE(nullptr, V4Float64Ty); EXPECT_EQ(V4Float64Ty->getNumElements(), 4U); EXPECT_EQ(V4Float64Ty->getElementType()->getScalarSizeInBits(), 64U); - VectorType *ExtTy = VectorType::getExtendedElementVectorType(V8Int16Ty); - EXPECT_EQ(ExtTy, V8Int32Ty); - ASSERT_TRUE(isa(ExtTy)); + auto *ExtTy = dyn_cast( + VectorType::getExtendedElementVectorType(V8Int16Ty)); + EXPECT_VTY_EQ(ExtTy, V8Int32Ty); EXPECT_EQ(ExtTy->getNumElements(), 8U); EXPECT_EQ(ExtTy->getElementType()->getScalarSizeInBits(), 32U); - VectorType *TruncTy = VectorType::getTruncatedElementVectorType(V8Int32Ty); - EXPECT_EQ(TruncTy, V8Int16Ty); - ASSERT_TRUE(isa(TruncTy)); + auto *TruncTy = dyn_cast( + VectorType::getTruncatedElementVectorType(V8Int32Ty)); + EXPECT_VTY_EQ(TruncTy, V8Int16Ty); EXPECT_EQ(TruncTy->getNumElements(), 8U); EXPECT_EQ(TruncTy->getElementType()->getScalarSizeInBits(), 16U); - VectorType *HalvedTy = VectorType::getHalfElementsVectorType(V4Int64Ty); - EXPECT_EQ(HalvedTy, V2Int64Ty); - ASSERT_TRUE(isa(HalvedTy)); + auto *HalvedTy = dyn_cast( + VectorType::getHalfElementsVectorType(V4Int64Ty)); + EXPECT_VTY_EQ(HalvedTy, V2Int64Ty); EXPECT_EQ(HalvedTy->getNumElements(), 2U); EXPECT_EQ(HalvedTy->getElementType()->getScalarSizeInBits(), 64U); - VectorType *DoubledTy = VectorType::getDoubleElementsVectorType(V4Int64Ty); - EXPECT_EQ(DoubledTy, V8Int64Ty); - ASSERT_TRUE(isa(DoubledTy)); + auto *DoubledTy = dyn_cast( + VectorType::getDoubleElementsVectorType(V4Int64Ty)); + EXPECT_VTY_EQ(DoubledTy, V8Int64Ty); EXPECT_EQ(DoubledTy->getNumElements(), 8U); EXPECT_EQ(DoubledTy->getElementType()->getScalarSizeInBits(), 64U); - VectorType *ConvTy = VectorType::getInteger(V4Float64Ty); - EXPECT_EQ(ConvTy, V4Int64Ty); - ASSERT_TRUE(isa(ConvTy)); + auto *ConvTy = dyn_cast(VectorType::getInteger(V4Float64Ty)); + EXPECT_VTY_EQ(ConvTy, V4Int64Ty); EXPECT_EQ(ConvTy->getNumElements(), 4U); EXPECT_EQ(ConvTy->getElementType()->getScalarSizeInBits(), 64U); @@ -91,70 +129,92 @@ TEST(VectorTypesTest, FixedLength) { TEST(VectorTypesTest, Scalable) { LLVMContext Ctx; + Type *Int8Ty = Type::getInt8Ty(Ctx); Type *Int16Ty = Type::getInt16Ty(Ctx); Type *Int32Ty = Type::getInt32Ty(Ctx); Type *Int64Ty = Type::getInt64Ty(Ctx); Type *Float64Ty = Type::getDoubleTy(Ctx); - VectorType *ScV8Int32Ty = VectorType::get(Int32Ty, 8, true); - ASSERT_TRUE(isa(ScV8Int32Ty)); - EXPECT_EQ(ScV8Int32Ty->getNumElements(), 8U); + auto *ScV16Int8Ty = ScalableVectorType::get(Int8Ty, 16); + ASSERT_NE(nullptr, ScV16Int8Ty); + EXPECT_EQ(ScV16Int8Ty->getMinNumElements(), 16U); + EXPECT_EQ(ScV16Int8Ty->getScalarSizeInBits(), 8U); + + auto *ScV8Int32Ty = + dyn_cast(VectorType::get(Int32Ty, 8, true)); + ASSERT_NE(nullptr, ScV8Int32Ty); + EXPECT_EQ(ScV8Int32Ty->getMinNumElements(), 8U); EXPECT_EQ(ScV8Int32Ty->getElementType()->getScalarSizeInBits(), 32U); - VectorType *ScV8Int16Ty = VectorType::get(Int16Ty, {8, true}); - ASSERT_TRUE(isa(ScV8Int16Ty)); - EXPECT_EQ(ScV8Int16Ty->getNumElements(), 8U); + auto *ScV8Int8Ty = + dyn_cast(VectorType::get(Int8Ty, ScV8Int32Ty)); + EXPECT_VTY_NE(ScV8Int32Ty, ScV8Int8Ty); + EXPECT_EQ(ScV8Int8Ty->getElementCount(), ScV8Int32Ty->getElementCount()); + EXPECT_EQ(ScV8Int8Ty->getElementType()->getScalarSizeInBits(), 8U); + + auto *ScV8Int32Ty2 = + dyn_cast(VectorType::get(Int32Ty, ScV8Int32Ty)); + EXPECT_VTY_EQ(ScV8Int32Ty, ScV8Int32Ty2); + + auto *ScV8Int16Ty = + dyn_cast(VectorType::get(Int16Ty, {8, true})); + ASSERT_NE(nullptr, ScV8Int16Ty); + EXPECT_EQ(ScV8Int16Ty->getMinNumElements(), 8U); EXPECT_EQ(ScV8Int16Ty->getElementType()->getScalarSizeInBits(), 16U); ElementCount EltCnt(4, true); - VectorType *ScV4Int64Ty = VectorType::get(Int64Ty, EltCnt); - ASSERT_TRUE(isa(ScV4Int64Ty)); - EXPECT_EQ(ScV4Int64Ty->getNumElements(), 4U); + auto *ScV4Int64Ty = + dyn_cast(VectorType::get(Int64Ty, EltCnt)); + ASSERT_NE(nullptr, ScV4Int64Ty); + EXPECT_EQ(ScV4Int64Ty->getMinNumElements(), 4U); EXPECT_EQ(ScV4Int64Ty->getElementType()->getScalarSizeInBits(), 64U); - VectorType *ScV2Int64Ty = VectorType::get(Int64Ty, EltCnt/2); - ASSERT_TRUE(isa(ScV2Int64Ty)); - EXPECT_EQ(ScV2Int64Ty->getNumElements(), 2U); + auto *ScV2Int64Ty = + dyn_cast(VectorType::get(Int64Ty, EltCnt / 2)); + ASSERT_NE(nullptr, ScV2Int64Ty); + EXPECT_EQ(ScV2Int64Ty->getMinNumElements(), 2U); EXPECT_EQ(ScV2Int64Ty->getElementType()->getScalarSizeInBits(), 64U); - VectorType *ScV8Int64Ty = VectorType::get(Int64Ty, EltCnt*2); - ASSERT_TRUE(isa(ScV8Int64Ty)); - EXPECT_EQ(ScV8Int64Ty->getNumElements(), 8U); + auto *ScV8Int64Ty = + dyn_cast(VectorType::get(Int64Ty, EltCnt * 2)); + ASSERT_NE(nullptr, ScV8Int64Ty); + EXPECT_EQ(ScV8Int64Ty->getMinNumElements(), 8U); EXPECT_EQ(ScV8Int64Ty->getElementType()->getScalarSizeInBits(), 64U); - VectorType *ScV4Float64Ty = VectorType::get(Float64Ty, EltCnt); - ASSERT_TRUE(isa(ScV4Float64Ty)); - EXPECT_EQ(ScV4Float64Ty->getNumElements(), 4U); + auto *ScV4Float64Ty = + dyn_cast(VectorType::get(Float64Ty, EltCnt)); + ASSERT_NE(nullptr, ScV4Float64Ty); + EXPECT_EQ(ScV4Float64Ty->getMinNumElements(), 4U); EXPECT_EQ(ScV4Float64Ty->getElementType()->getScalarSizeInBits(), 64U); - VectorType *ExtTy = VectorType::getExtendedElementVectorType(ScV8Int16Ty); - EXPECT_EQ(ExtTy, ScV8Int32Ty); - ASSERT_TRUE(isa(ExtTy)); - EXPECT_EQ(ExtTy->getNumElements(), 8U); + auto *ExtTy = dyn_cast( + VectorType::getExtendedElementVectorType(ScV8Int16Ty)); + EXPECT_VTY_EQ(ExtTy, ScV8Int32Ty); + EXPECT_EQ(ExtTy->getMinNumElements(), 8U); EXPECT_EQ(ExtTy->getElementType()->getScalarSizeInBits(), 32U); - VectorType *TruncTy = VectorType::getTruncatedElementVectorType(ScV8Int32Ty); - EXPECT_EQ(TruncTy, ScV8Int16Ty); - ASSERT_TRUE(isa(TruncTy)); - EXPECT_EQ(TruncTy->getNumElements(), 8U); + auto *TruncTy = dyn_cast( + VectorType::getTruncatedElementVectorType(ScV8Int32Ty)); + EXPECT_VTY_EQ(TruncTy, ScV8Int16Ty); + EXPECT_EQ(TruncTy->getMinNumElements(), 8U); EXPECT_EQ(TruncTy->getElementType()->getScalarSizeInBits(), 16U); - VectorType *HalvedTy = VectorType::getHalfElementsVectorType(ScV4Int64Ty); - EXPECT_EQ(HalvedTy, ScV2Int64Ty); - ASSERT_TRUE(isa(HalvedTy)); - EXPECT_EQ(HalvedTy->getNumElements(), 2U); + auto *HalvedTy = dyn_cast( + VectorType::getHalfElementsVectorType(ScV4Int64Ty)); + EXPECT_VTY_EQ(HalvedTy, ScV2Int64Ty); + EXPECT_EQ(HalvedTy->getMinNumElements(), 2U); EXPECT_EQ(HalvedTy->getElementType()->getScalarSizeInBits(), 64U); - VectorType *DoubledTy = VectorType::getDoubleElementsVectorType(ScV4Int64Ty); - EXPECT_EQ(DoubledTy, ScV8Int64Ty); - ASSERT_TRUE(isa(DoubledTy)); - EXPECT_EQ(DoubledTy->getNumElements(), 8U); + auto *DoubledTy = dyn_cast( + VectorType::getDoubleElementsVectorType(ScV4Int64Ty)); + EXPECT_VTY_EQ(DoubledTy, ScV8Int64Ty); + EXPECT_EQ(DoubledTy->getMinNumElements(), 8U); EXPECT_EQ(DoubledTy->getElementType()->getScalarSizeInBits(), 64U); - VectorType *ConvTy = VectorType::getInteger(ScV4Float64Ty); - EXPECT_EQ(ConvTy, ScV4Int64Ty); - ASSERT_TRUE(isa(ConvTy)); - EXPECT_EQ(ConvTy->getNumElements(), 4U); + auto *ConvTy = + dyn_cast(VectorType::getInteger(ScV4Float64Ty)); + EXPECT_VTY_EQ(ConvTy, ScV4Int64Ty); + EXPECT_EQ(ConvTy->getMinNumElements(), 4U); EXPECT_EQ(ConvTy->getElementType()->getScalarSizeInBits(), 64U); EltCnt = ScV8Int64Ty->getElementCount(); @@ -162,6 +222,56 @@ TEST(VectorTypesTest, Scalable) { ASSERT_TRUE(EltCnt.Scalable); } +TEST(VectorTypesTest, BaseVectorType) { + LLVMContext Ctx; + + Type *Int16Ty = Type::getInt16Ty(Ctx); + Type *Int32Ty = Type::getInt32Ty(Ctx); + + std::array VTys = {VectorType::get(Int16Ty, {4, true}), + VectorType::get(Int16Ty, {4, false}), + VectorType::get(Int16Ty, {2, true}), + VectorType::get(Int16Ty, {2, false}), + VectorType::get(Int32Ty, {4, true}), + VectorType::get(Int32Ty, {4, false}), + VectorType::get(Int32Ty, {2, true}), + VectorType::get(Int32Ty, {2, false})}; + + /* + The comparison matrix is symmetric, so we only check the upper triangle: + + (0,0) (0,1) (0,2) ... (0,7) + (1,0) (1,1) (1,2) . + (2,0) (2,1) (2,2) . + . . . + . . + . . + (7,0) ... (7,7) + */ + for (size_t I = 0, IEnd = VTys.size(); I < IEnd; ++I) { + // test I == J + VectorType *VI = VTys[I]; + ElementCount ECI = VI->getElementCount(); + EXPECT_EQ(isa(VI), ECI.Scalable); + + for (size_t J = I + 1, JEnd = VTys.size(); J < JEnd; ++J) { + // test I < J + VectorType *VJ = VTys[J]; + EXPECT_VTY_NE(VI, VJ); + + VectorType *VJPrime = VectorType::get(VI->getElementType(), VJ); + if (VI->getElementType() == VJ->getElementType()) { + EXPECT_VTY_EQ(VJ, VJPrime); + } else { + EXPECT_VTY_NE(VJ, VJPrime); + } + + EXPECT_EQ(VJ->getTypeID(), VJPrime->getTypeID()) + << "VJ and VJPrime are the same sort of vector"; + } + } +} + TEST(VectorTypesTest, FixedLenComparisons) { LLVMContext Ctx; DataLayout DL(""); @@ -169,10 +279,10 @@ TEST(VectorTypesTest, FixedLenComparisons) { Type *Int32Ty = Type::getInt32Ty(Ctx); Type *Int64Ty = Type::getInt64Ty(Ctx); - VectorType *V2Int32Ty = VectorType::get(Int32Ty, 2); - VectorType *V4Int32Ty = VectorType::get(Int32Ty, 4); + auto *V2Int32Ty = FixedVectorType::get(Int32Ty, 2); + auto *V4Int32Ty = FixedVectorType::get(Int32Ty, 4); - VectorType *V2Int64Ty = VectorType::get(Int64Ty, 2); + auto *V2Int64Ty = FixedVectorType::get(Int64Ty, 2); TypeSize V2I32Len = V2Int32Ty->getPrimitiveSizeInBits(); EXPECT_EQ(V2I32Len.getKnownMinSize(), 64U); @@ -192,18 +302,15 @@ TEST(VectorTypesTest, FixedLenComparisons) { V4Int32Ty->getPrimitiveSizeInBits().getFixedSize()); // Check the DataLayout interfaces. - EXPECT_EQ(DL.getTypeSizeInBits(V2Int64Ty), - DL.getTypeSizeInBits(V4Int32Ty)); + EXPECT_EQ(DL.getTypeSizeInBits(V2Int64Ty), DL.getTypeSizeInBits(V4Int32Ty)); EXPECT_EQ(DL.getTypeSizeInBits(V2Int32Ty), 64U); EXPECT_EQ(DL.getTypeSizeInBits(V2Int64Ty), 128U); - EXPECT_EQ(DL.getTypeStoreSize(V2Int64Ty), - DL.getTypeStoreSize(V4Int32Ty)); + EXPECT_EQ(DL.getTypeStoreSize(V2Int64Ty), DL.getTypeStoreSize(V4Int32Ty)); EXPECT_NE(DL.getTypeStoreSizeInBits(V2Int32Ty), DL.getTypeStoreSizeInBits(V2Int64Ty)); EXPECT_EQ(DL.getTypeStoreSizeInBits(V2Int32Ty), 64U); EXPECT_EQ(DL.getTypeStoreSize(V2Int64Ty), 16U); - EXPECT_EQ(DL.getTypeAllocSize(V4Int32Ty), - DL.getTypeAllocSize(V2Int64Ty)); + EXPECT_EQ(DL.getTypeAllocSize(V4Int32Ty), DL.getTypeAllocSize(V2Int64Ty)); EXPECT_NE(DL.getTypeAllocSizeInBits(V2Int32Ty), DL.getTypeAllocSizeInBits(V2Int64Ty)); EXPECT_EQ(DL.getTypeAllocSizeInBits(V4Int32Ty), 128U); @@ -218,10 +325,10 @@ TEST(VectorTypesTest, ScalableComparisons) { Type *Int32Ty = Type::getInt32Ty(Ctx); Type *Int64Ty = Type::getInt64Ty(Ctx); - VectorType *ScV2Int32Ty = VectorType::get(Int32Ty, {2, true}); - VectorType *ScV4Int32Ty = VectorType::get(Int32Ty, {4, true}); + auto *ScV2Int32Ty = ScalableVectorType::get(Int32Ty, 2); + auto *ScV4Int32Ty = ScalableVectorType::get(Int32Ty, 4); - VectorType *ScV2Int64Ty = VectorType::get(Int64Ty, {2, true}); + auto *ScV2Int64Ty = ScalableVectorType::get(Int64Ty, 2); TypeSize ScV2I32Len = ScV2Int32Ty->getPrimitiveSizeInBits(); EXPECT_EQ(ScV2I32Len.getKnownMinSize(), 64U); @@ -240,14 +347,12 @@ TEST(VectorTypesTest, ScalableComparisons) { EXPECT_EQ(DL.getTypeSizeInBits(ScV2Int64Ty), DL.getTypeSizeInBits(ScV4Int32Ty)); EXPECT_EQ(DL.getTypeSizeInBits(ScV2Int32Ty).getKnownMinSize(), 64U); - EXPECT_EQ(DL.getTypeStoreSize(ScV2Int64Ty), - DL.getTypeStoreSize(ScV4Int32Ty)); + EXPECT_EQ(DL.getTypeStoreSize(ScV2Int64Ty), DL.getTypeStoreSize(ScV4Int32Ty)); EXPECT_NE(DL.getTypeStoreSizeInBits(ScV2Int32Ty), DL.getTypeStoreSizeInBits(ScV2Int64Ty)); EXPECT_EQ(DL.getTypeStoreSizeInBits(ScV2Int32Ty).getKnownMinSize(), 64U); EXPECT_EQ(DL.getTypeStoreSize(ScV2Int64Ty).getKnownMinSize(), 16U); - EXPECT_EQ(DL.getTypeAllocSize(ScV4Int32Ty), - DL.getTypeAllocSize(ScV2Int64Ty)); + EXPECT_EQ(DL.getTypeAllocSize(ScV4Int32Ty), DL.getTypeAllocSize(ScV2Int64Ty)); EXPECT_NE(DL.getTypeAllocSizeInBits(ScV2Int32Ty), DL.getTypeAllocSizeInBits(ScV2Int64Ty)); EXPECT_EQ(DL.getTypeAllocSizeInBits(ScV4Int32Ty).getKnownMinSize(), 128U); @@ -260,8 +365,8 @@ TEST(VectorTypesTest, CrossComparisons) { Type *Int32Ty = Type::getInt32Ty(Ctx); - VectorType *V4Int32Ty = VectorType::get(Int32Ty, {4, false}); - VectorType *ScV4Int32Ty = VectorType::get(Int32Ty, {4, true}); + auto *V4Int32Ty = FixedVectorType::get(Int32Ty, 4); + auto *ScV4Int32Ty = ScalableVectorType::get(Int32Ty, 4); // Even though the minimum size is the same, a scalable vector could be // larger so we don't consider them to be the same size. diff --git a/llvm/unittests/ProfileData/SampleProfTest.cpp b/llvm/unittests/ProfileData/SampleProfTest.cpp index 3aaba056c6187..62273740c91a7 100644 --- a/llvm/unittests/ProfileData/SampleProfTest.cpp +++ b/llvm/unittests/ProfileData/SampleProfTest.cpp @@ -77,6 +77,60 @@ struct SampleProfTest : ::testing::Test { OS->close(); } + // Verify profile summary is consistent in the roundtrip to and from + // Metadata. \p AddPartialField is to choose whether the Metadata + // contains the IsPartialProfile field which is optional. + void verifyProfileSummary(ProfileSummary &Summary, Module &M, + const bool AddPartialField) { + LLVMContext &Context = M.getContext(); + const bool IsPartialProfile = Summary.isPartialProfile(); + auto VerifySummary = [IsPartialProfile](ProfileSummary &Summary) mutable { + ASSERT_EQ(ProfileSummary::PSK_Sample, Summary.getKind()); + ASSERT_EQ(137392u, Summary.getTotalCount()); + ASSERT_EQ(8u, Summary.getNumCounts()); + ASSERT_EQ(4u, Summary.getNumFunctions()); + ASSERT_EQ(1437u, Summary.getMaxFunctionCount()); + ASSERT_EQ(60351u, Summary.getMaxCount()); + ASSERT_EQ(IsPartialProfile, Summary.isPartialProfile()); + + uint32_t Cutoff = 800000; + auto Predicate = [&Cutoff](const ProfileSummaryEntry &PE) { + return PE.Cutoff == Cutoff; + }; + std::vector &Details = Summary.getDetailedSummary(); + auto EightyPerc = find_if(Details, Predicate); + Cutoff = 900000; + auto NinetyPerc = find_if(Details, Predicate); + Cutoff = 950000; + auto NinetyFivePerc = find_if(Details, Predicate); + Cutoff = 990000; + auto NinetyNinePerc = find_if(Details, Predicate); + ASSERT_EQ(60000u, EightyPerc->MinCount); + ASSERT_EQ(12557u, NinetyPerc->MinCount); + ASSERT_EQ(12557u, NinetyFivePerc->MinCount); + ASSERT_EQ(610u, NinetyNinePerc->MinCount); + }; + VerifySummary(Summary); + + // Test that conversion of summary to and from Metadata works. + Metadata *MD = Summary.getMD(Context, AddPartialField); + ASSERT_TRUE(MD); + ProfileSummary *PS = ProfileSummary::getFromMD(MD); + ASSERT_TRUE(PS); + VerifySummary(*PS); + delete PS; + + // Test that summary can be attached to and read back from module. + M.eraseNamedMetadata(M.getOrInsertModuleFlagsMetadata()); + M.setProfileSummary(MD, ProfileSummary::PSK_Sample); + MD = M.getProfileSummary(/* IsCS */ false); + ASSERT_TRUE(MD); + PS = ProfileSummary::getFromMD(MD); + ASSERT_TRUE(PS); + VerifySummary(*PS); + delete PS; + } + void testRoundTrip(SampleProfileFormat Format, bool Remap, bool UseMD5) { SmallVector ProfilePath; ASSERT_TRUE(NoError(llvm::sys::fs::createTemporaryFile("profile", "", ProfilePath))); @@ -214,51 +268,16 @@ struct SampleProfTest : ::testing::Test { ASSERT_EQ(1000u, CTMap.get()[MconstructRep]); ASSERT_EQ(437u, CTMap.get()[StringviewRep]); - auto VerifySummary = [](ProfileSummary &Summary) mutable { - ASSERT_EQ(ProfileSummary::PSK_Sample, Summary.getKind()); - ASSERT_EQ(137392u, Summary.getTotalCount()); - ASSERT_EQ(8u, Summary.getNumCounts()); - ASSERT_EQ(4u, Summary.getNumFunctions()); - ASSERT_EQ(1437u, Summary.getMaxFunctionCount()); - ASSERT_EQ(60351u, Summary.getMaxCount()); - - uint32_t Cutoff = 800000; - auto Predicate = [&Cutoff](const ProfileSummaryEntry &PE) { - return PE.Cutoff == Cutoff; - }; - std::vector &Details = Summary.getDetailedSummary(); - auto EightyPerc = find_if(Details, Predicate); - Cutoff = 900000; - auto NinetyPerc = find_if(Details, Predicate); - Cutoff = 950000; - auto NinetyFivePerc = find_if(Details, Predicate); - Cutoff = 990000; - auto NinetyNinePerc = find_if(Details, Predicate); - ASSERT_EQ(60000u, EightyPerc->MinCount); - ASSERT_EQ(12557u, NinetyPerc->MinCount); - ASSERT_EQ(12557u, NinetyFivePerc->MinCount); - ASSERT_EQ(610u, NinetyNinePerc->MinCount); - }; ProfileSummary &Summary = Reader->getSummary(); - VerifySummary(Summary); + Summary.setPartialProfile(true); + verifyProfileSummary(Summary, M, true); - // Test that conversion of summary to and from Metadata works. - Metadata *MD = Summary.getMD(Context); - ASSERT_TRUE(MD); - ProfileSummary *PS = ProfileSummary::getFromMD(MD); - ASSERT_TRUE(PS); - VerifySummary(*PS); - delete PS; + Summary.setPartialProfile(false); + verifyProfileSummary(Summary, M, true); - // Test that summary can be attached to and read back from module. - M.setProfileSummary(MD, ProfileSummary::PSK_Sample); - MD = M.getProfileSummary(/* IsCS */ false); - ASSERT_TRUE(MD); - PS = ProfileSummary::getFromMD(MD); - ASSERT_TRUE(PS); - VerifySummary(*PS); - delete PS; + Summary.setPartialProfile(false); + verifyProfileSummary(Summary, M, false); } void addFunctionSamples(StringMap *Smap, const char *Fname, diff --git a/llvm/unittests/Support/AllocatorTest.cpp b/llvm/unittests/Support/AllocatorTest.cpp index 57c3252fc701c..c41f597c404dd 100644 --- a/llvm/unittests/Support/AllocatorTest.cpp +++ b/llvm/unittests/Support/AllocatorTest.cpp @@ -206,7 +206,7 @@ class MockSlabAllocator { return Slab; } - void Deallocate(void *Slab, size_t Size) { + void Deallocate(void *Slab, size_t /*Size*/, size_t /*Alignment*/) { free(((void**)Slab)[-1]); } diff --git a/llvm/unittests/Support/FileOutputBufferTest.cpp b/llvm/unittests/Support/FileOutputBufferTest.cpp index 6b6196f971643..f7bb0833e5a0e 100644 --- a/llvm/unittests/Support/FileOutputBufferTest.cpp +++ b/llvm/unittests/Support/FileOutputBufferTest.cpp @@ -140,6 +140,21 @@ TEST(FileOutputBuffer, Test) { ASSERT_NO_ERROR(fs::file_size(Twine(File5), File5Size)); ASSERT_EQ(File5Size, 8000ULL); ASSERT_NO_ERROR(fs::remove(File5.str())); + + // TEST 6: Create an empty file. + SmallString<128> File6(TestDirectory); + File6.append("/file6"); + { + Expected> BufferOrErr = + FileOutputBuffer::create(File6, 0); + ASSERT_NO_ERROR(errorToErrorCode(BufferOrErr.takeError())); + ASSERT_NO_ERROR(errorToErrorCode((*BufferOrErr)->commit())); + } + uint64_t File6Size; + ASSERT_NO_ERROR(fs::file_size(Twine(File6), File6Size)); + ASSERT_EQ(File6Size, 0ULL); + ASSERT_NO_ERROR(fs::remove(File6.str())); + // Clean up. ASSERT_NO_ERROR(fs::remove(TestDirectory.str())); } diff --git a/llvm/unittests/Support/Host.cpp b/llvm/unittests/Support/Host.cpp index 36ca70a707b24..4edeb40ed3ea7 100644 --- a/llvm/unittests/Support/Host.cpp +++ b/llvm/unittests/Support/Host.cpp @@ -262,6 +262,21 @@ CPU part : 0x001 )"; EXPECT_EQ(sys::detail::getHostCPUNameForARM(A64FXProcCpuInfo), "a64fx"); + + // Verify Nvidia Carmel. + const std::string CarmelProcCpuInfo = R"( +processor : 0 +model name : ARMv8 Processor rev 0 (v8l) +BogoMIPS : 62.50 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm dcpop +CPU implementer : 0x4e +CPU architecture: 8 +CPU variant : 0x0 +CPU part : 0x004 +CPU revision : 0 +)"; + + EXPECT_EQ(sys::detail::getHostCPUNameForARM(CarmelProcCpuInfo), "carmel"); } #if defined(__APPLE__) || defined(_AIX) diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp index 1a2ac1818eb04..a577f1b744bc9 100644 --- a/llvm/unittests/Support/Path.cpp +++ b/llvm/unittests/Support/Path.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" @@ -305,15 +306,45 @@ TEST(Support, AbsolutePathIteratorEnd) { } } -TEST(Support, HomeDirectory) { - std::string expected; #ifdef _WIN32 - if (wchar_t const *path = ::_wgetenv(L"USERPROFILE")) { +std::string getEnvWin(const wchar_t *Var) { + std::string expected; + if (wchar_t const *path = ::_wgetenv(Var)) { auto pathLen = ::wcslen(path); ArrayRef ref{reinterpret_cast(path), pathLen * sizeof(wchar_t)}; convertUTF16ToUTF8String(ref, expected); } + return expected; +} +#else +// RAII helper to set and restore an environment variable. +class WithEnv { + const char *Var; + llvm::Optional OriginalValue; + +public: + WithEnv(const char *Var, const char *Value) : Var(Var) { + if (const char *V = ::getenv(Var)) + OriginalValue.emplace(V); + if (Value) + ::setenv(Var, Value, 1); + else + ::unsetenv(Var); + } + ~WithEnv() { + if (OriginalValue) + ::setenv(Var, OriginalValue->c_str(), 1); + else + ::unsetenv(Var); + } +}; +#endif + +TEST(Support, HomeDirectory) { + std::string expected; +#ifdef _WIN32 + expected = getEnvWin(L"USERPROFILE"); #else if (char const *path = ::getenv("HOME")) expected = path; @@ -330,31 +361,48 @@ TEST(Support, HomeDirectory) { #ifdef LLVM_ON_UNIX TEST(Support, HomeDirectoryWithNoEnv) { - std::string OriginalStorage; - char const *OriginalEnv = ::getenv("HOME"); - if (OriginalEnv) { - // We're going to unset it, so make a copy and save a pointer to the copy - // so that we can reset it at the end of the test. - OriginalStorage = OriginalEnv; - OriginalEnv = OriginalStorage.c_str(); - } + WithEnv Env("HOME", nullptr); // Don't run the test if we have nothing to compare against. struct passwd *pw = getpwuid(getuid()); if (!pw || !pw->pw_dir) return; - - ::unsetenv("HOME"); - EXPECT_EQ(nullptr, ::getenv("HOME")); std::string PwDir = pw->pw_dir; SmallString<128> HomeDir; - auto status = path::home_directory(HomeDir); - EXPECT_TRUE(status); + EXPECT_TRUE(path::home_directory(HomeDir)); EXPECT_EQ(PwDir, HomeDir); +} + +TEST(Support, CacheDirectoryWithEnv) { + WithEnv Env("XDG_CACHE_HOME", "/xdg/cache"); + + SmallString<128> CacheDir; + EXPECT_TRUE(path::cache_directory(CacheDir)); + EXPECT_EQ("/xdg/cache", CacheDir); +} + +TEST(Support, CacheDirectoryNoEnv) { + WithEnv Env("XDG_CACHE_HOME", nullptr); + + SmallString<128> Fallback; + ASSERT_TRUE(path::home_directory(Fallback)); + path::append(Fallback, ".cache"); - // Now put the environment back to its original state (meaning that if it was - // unset before, we don't reset it). - if (OriginalEnv) ::setenv("HOME", OriginalEnv, 1); + SmallString<128> CacheDir; + EXPECT_TRUE(path::cache_directory(CacheDir)); + EXPECT_EQ(Fallback, CacheDir); +} +#endif + +#ifdef _WIN32 +TEST(Support, CacheDirectory) { + std::string Expected = getEnvWin(L"LOCALAPPDATA"); + // Do not try to test it if we don't know what to expect. + if (!Expected.empty()) { + SmallString<128> CacheDir; + EXPECT_TRUE(path::cache_directory(CacheDir)); + EXPECT_EQ(Expected, CacheDir); + } } #endif @@ -1128,14 +1176,16 @@ TEST_F(FileSystemTest, FileMapping) { } TEST(Support, NormalizePath) { + // Input, Expected Win, Expected Posix using TestTuple = std::tuple; std::vector Tests; Tests.emplace_back("a", "a", "a"); Tests.emplace_back("a/b", "a\\b", "a/b"); Tests.emplace_back("a\\b", "a\\b", "a/b"); - Tests.emplace_back("a\\\\b", "a\\\\b", "a\\\\b"); + Tests.emplace_back("a\\\\b", "a\\\\b", "a//b"); Tests.emplace_back("\\a", "\\a", "/a"); Tests.emplace_back("a\\", "a\\", "a/"); + Tests.emplace_back("a\\t", "a\\t", "a/t"); for (auto &T : Tests) { SmallString<64> Win(std::get<0>(T)); @@ -1204,6 +1254,30 @@ TEST(Support, RemoveDots) { remove_dots("..\\a\\b\\..\\c", true, path::Style::windows)); EXPECT_EQ("..\\..\\a\\c", remove_dots("..\\..\\a\\b\\..\\c", true, path::Style::windows)); + EXPECT_EQ("C:\\a\\c", remove_dots("C:\\foo\\bar//..\\..\\a\\c", true, + path::Style::windows)); + + // FIXME: These leading forward slashes are emergent behavior. VFS depends on + // this behavior now. + EXPECT_EQ("C:/bar", + remove_dots("C:/foo/../bar", true, path::Style::windows)); + EXPECT_EQ("C:/foo\\bar", + remove_dots("C:/foo/bar", true, path::Style::windows)); + EXPECT_EQ("C:/foo\\bar", + remove_dots("C:/foo\\bar", true, path::Style::windows)); + EXPECT_EQ("/", remove_dots("/", true, path::Style::windows)); + EXPECT_EQ("C:/", remove_dots("C:/", true, path::Style::windows)); + + // Some clients of remove_dots expect it to remove trailing slashes. Again, + // this is emergent behavior that VFS relies on, and not inherently part of + // the specification. + EXPECT_EQ("C:\\foo\\bar", + remove_dots("C:\\foo\\bar\\", true, path::Style::windows)); + EXPECT_EQ("/foo/bar", + remove_dots("/foo/bar/", true, path::Style::posix)); + + // A double separator is rewritten. + EXPECT_EQ("C:/foo\\bar", remove_dots("C:/foo//bar", true, path::Style::windows)); SmallString<64> Path1(".\\.\\c"); EXPECT_TRUE(path::remove_dots(Path1, true, path::Style::windows)); @@ -1222,6 +1296,11 @@ TEST(Support, RemoveDots) { EXPECT_EQ("/a/c", remove_dots("/../../a/c", true, path::Style::posix)); EXPECT_EQ("/a/c", remove_dots("/../a/b//../././/c", true, path::Style::posix)); + EXPECT_EQ("/", remove_dots("/", true, path::Style::posix)); + + // FIXME: Leaving behind this double leading slash seems like a bug. + EXPECT_EQ("//foo/bar", + remove_dots("//foo/bar/", true, path::Style::posix)); SmallString<64> Path2("././c"); EXPECT_TRUE(path::remove_dots(Path2, true, path::Style::posix)); diff --git a/llvm/unittests/Support/TarWriterTest.cpp b/llvm/unittests/Support/TarWriterTest.cpp index bd67e036518f2..579c06d754725 100644 --- a/llvm/unittests/Support/TarWriterTest.cpp +++ b/llvm/unittests/Support/TarWriterTest.cpp @@ -81,30 +81,32 @@ TEST_F(TarWriterTest, Basics) { } TEST_F(TarWriterTest, LongFilename) { - std::string x154(154, 'x'); - std::string x155(155, 'x'); + // The prefix is prefixed by an additional '/' so it's one longer than the + // number of x's here. + std::string x136(136, 'x'); + std::string x137(137, 'x'); std::string y99(99, 'y'); std::string y100(100, 'y'); - UstarHeader Hdr1 = createUstar("", x154 + "/" + y99); - EXPECT_EQ("/" + x154, StringRef(Hdr1.Prefix)); + UstarHeader Hdr1 = createUstar("", x136 + "/" + y99); + EXPECT_EQ("/" + x136, StringRef(Hdr1.Prefix)); EXPECT_EQ(y99, StringRef(Hdr1.Name)); - UstarHeader Hdr2 = createUstar("", x155 + "/" + y99); + UstarHeader Hdr2 = createUstar("", x137 + "/" + y99); EXPECT_EQ("", StringRef(Hdr2.Prefix)); EXPECT_EQ("", StringRef(Hdr2.Name)); - UstarHeader Hdr3 = createUstar("", x154 + "/" + y100); + UstarHeader Hdr3 = createUstar("", x136 + "/" + y100); EXPECT_EQ("", StringRef(Hdr3.Prefix)); EXPECT_EQ("", StringRef(Hdr3.Name)); - UstarHeader Hdr4 = createUstar("", x155 + "/" + y100); + UstarHeader Hdr4 = createUstar("", x137 + "/" + y100); EXPECT_EQ("", StringRef(Hdr4.Prefix)); EXPECT_EQ("", StringRef(Hdr4.Name)); std::string yz = "yyyyyyyyyyyyyyyyyyyy/zzzzzzzzzzzzzzzzzzzz"; - UstarHeader Hdr5 = createUstar("", x154 + "/" + yz); - EXPECT_EQ("/" + x154, StringRef(Hdr5.Prefix)); + UstarHeader Hdr5 = createUstar("", x136 + "/" + yz); + EXPECT_EQ("/" + x136, StringRef(Hdr5.Prefix)); EXPECT_EQ(yz, StringRef(Hdr5.Name)); } diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 0b3cc91bae9ab..3d4e591545dfa 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -975,9 +975,15 @@ TEST(TargetParserTest, testAArch64CPU) { AArch64::AEK_SIMD | AArch64::AEK_FP16 | AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_SVE | AArch64::AEK_RDM, "8.2-A")); + EXPECT_TRUE(testAArch64CPU( + "carmel", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | + AArch64::AEK_SIMD | AArch64::AEK_FP16 | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_RDM, + "8.2-A")); } -static constexpr unsigned NumAArch64CPUArchs = 37; +static constexpr unsigned NumAArch64CPUArchs = 38; TEST(TargetParserTest, testAArch64CPUArchList) { SmallVector List; @@ -1126,6 +1132,10 @@ TEST(TargetParserTest, testAArch64Extension) { AArch64::ArchKind::INVALID, "sve")); EXPECT_FALSE(testAArch64Extension("a64fx", AArch64::ArchKind::INVALID, "sve2")); + EXPECT_TRUE( + testAArch64Extension("carmel", AArch64::ArchKind::INVALID, "crypto")); + EXPECT_TRUE( + testAArch64Extension("carmel", AArch64::ArchKind::INVALID, "fp16")); EXPECT_FALSE(testAArch64Extension( "generic", AArch64::ArchKind::ARMV8A, "ras")); diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/llvmlibc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/llvmlibc/BUILD.gn index 4c2c6f090f06a..8e42997adf158 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/llvmlibc/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/llvmlibc/BUILD.gn @@ -12,6 +12,7 @@ static_library("llvmlibc") { "//llvm/lib/Support", ] sources = [ + "CalleeNamespaceCheck.cpp", "ImplementationInNamespaceCheck.cpp", "LLVMLibcTidyModule.cpp", "RestrictSystemLibcHeadersCheck.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index b0f6d7f6405b6..135e195fa35fd 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -10,9 +10,7 @@ config("features_config") { write_cmake_config("features") { input = "Features.inc.in" output = "$target_gen_dir/Features.inc" - - # FIXME: Real remote support. - values = [ "CLANGD_ENABLE_REMOTE=0" ] + values = [] if (clangd_build_xpc) { values += [ "CLANGD_BUILD_XPC=1" ] } else { @@ -28,6 +26,7 @@ static_library("clangd") { ":features", "//clang-tools-extra/clang-tidy", "//clang-tools-extra/clang-tidy:all-checks", + "//clang-tools-extra/clangd/support", "//clang/lib/AST", "//clang/lib/ASTMatchers", "//clang/lib/Basic", @@ -45,7 +44,6 @@ static_library("clangd") { "//clang/lib/Tooling/Syntax", "//llvm/lib/Support", "//llvm/lib/Target:AllTargetsInfos", - "//llvm/utils/gn/build/libs/atomic", "//llvm/utils/gn/build/libs/pthread", ] include_dirs = [ @@ -56,7 +54,6 @@ static_library("clangd") { ] sources = [ "AST.cpp", - "Cancellation.cpp", "ClangdLSPServer.cpp", "ClangdServer.cpp", "CodeComplete.cpp", @@ -64,17 +61,14 @@ static_library("clangd") { "CollectMacros.cpp", "CompileCommands.cpp", "Compiler.cpp", - "Context.cpp", "Diagnostics.cpp", "DraftStore.cpp", "ExpectedTypes.cpp", "FS.cpp", - "FSProvider.cpp", "FileDistance.cpp", "FindSymbols.cpp", "FindTarget.cpp", "Format.cpp", - "FormattedString.cpp", "FuzzyMatch.cpp", "GlobalCompilationDatabase.cpp", "HeaderSourceSwitch.cpp", @@ -82,7 +76,6 @@ static_library("clangd") { "Hover.cpp", "IncludeFixer.cpp", "JSONTransport.cpp", - "Logger.cpp", "ParsedAST.cpp", "PathMapping.cpp", "Preamble.cpp", @@ -93,11 +86,8 @@ static_library("clangd") { "Selection.cpp", "SemanticHighlighting.cpp", "SemanticSelection.cpp", - "Shutdown.cpp", "SourceCode.cpp", "TUScheduler.cpp", - "Threading.cpp", - "Trace.cpp", "URI.cpp", "XRefs.cpp", "index/Background.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/BUILD.gn index c5e646b954353..de3c8f8b395bb 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/BUILD.gn @@ -2,6 +2,7 @@ source_set("remote") { configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang-tools-extra/clangd", + "//clang-tools-extra/clangd/support", "//llvm/lib/Support", ] include_dirs = [ "../.." ] diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/unimplemented/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/unimplemented/BUILD.gn index 2d75d373c84b5..3343fc9c315bb 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/unimplemented/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/unimplemented/BUILD.gn @@ -2,6 +2,7 @@ source_set("unimplemented") { configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang-tools-extra/clangd", + "//clang-tools-extra/clangd/support", "//llvm/lib/Support", ] include_dirs = [ "../../.." ] diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/refactor/tweaks/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/refactor/tweaks/BUILD.gn index 687b8720cc3ea..aa0a63f1a9edb 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/refactor/tweaks/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/refactor/tweaks/BUILD.gn @@ -6,6 +6,7 @@ source_set("tweaks") { configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang-tools-extra/clangd", + "//clang-tools-extra/clangd/support", "//clang/lib/AST", "//clang/lib/Tooling/Core", "//llvm/lib/Support", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/support/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/support/BUILD.gn new file mode 100644 index 0000000000000..db843fed4c07a --- /dev/null +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/support/BUILD.gn @@ -0,0 +1,21 @@ +# clangd/support contains low-level support libraries that do not depend +# on clang either programmatically or conceptually. +static_library("support") { + output_name = "clangdSupport" + configs += [ "//llvm/utils/gn/build:clang_code" ] + deps = [ + "//llvm/lib/Support", + "//llvm/utils/gn/build/libs/atomic", + ] + include_dirs = [ ".." ] + sources = [ + "Cancellation.cpp", + "Context.cpp", + "FSProvider.cpp", + "Logger.cpp", + "Markup.cpp", + "Shutdown.cpp", + "Threading.cpp", + "Trace.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn index 4dc4c278ebcdc..540a8a3642c49 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn @@ -21,9 +21,8 @@ write_lit_config("lit_site_cfg") { output = clangd_lit_site_cfg_file extra_values = [ - "CMAKE_CURRENT_BINARY_DIR=" + - rebase_path(get_label_info("//clang-tools-extra/clangd/test", - "target_out_dir")), + "CMAKE_CURRENT_BINARY_DIR=" + rebase_path( + get_label_info("//clang-tools-extra/clangd/test", "target_out_dir")), "CMAKE_CURRENT_SOURCE_DIR=" + rebase_path("//clang-tools-extra/clangd/test"), diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn index e334a4320b445..b60fc421cfb74 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/BUILD.gn @@ -23,6 +23,7 @@ unittest("ClangdTests") { "//llvm/lib/Testing/Support", ] include_dirs = [ + ".", "//clang-tools-extra/clangd", # To pick up the generated inc files. @@ -32,7 +33,6 @@ unittest("ClangdTests") { "ASTTests.cpp", "Annotations.cpp", "BackgroundIndexTests.cpp", - "CancellationTests.cpp", "CanonicalIncludesTests.cpp", "ClangdLSPServerTests.cpp", "ClangdTests.cpp", @@ -40,7 +40,6 @@ unittest("ClangdTests") { "CodeCompletionStringsTests.cpp", "CollectMacrosTests.cpp", "CompileCommandsTests.cpp", - "ContextTests.cpp", "DexTests.cpp", "DiagnosticsTests.cpp", "DraftStoreTests.cpp", @@ -51,8 +50,6 @@ unittest("ClangdTests") { "FindSymbolsTests.cpp", "FindTargetTests.cpp", "FormatTests.cpp", - "FormattedStringTests.cpp", - "FunctionTests.cpp", "FuzzyMatchTests.cpp", "GlobalCompilationDatabaseTests.cpp", "HeaderSourceSwitchTests.cpp", @@ -81,12 +78,17 @@ unittest("ClangdTests") { "TestFS.cpp", "TestIndex.cpp", "TestTU.cpp", - "ThreadingTests.cpp", - "TraceTests.cpp", "TweakTesting.cpp", "TweakTests.cpp", "TypeHierarchyTests.cpp", "URITests.cpp", "XRefsTests.cpp", + "support/CancellationTests.cpp", + "support/ContextTests.cpp", + "support/FunctionTests.cpp", + "support/MarkupTests.cpp", + "support/TestTracer.cpp", + "support/ThreadingTests.cpp", + "support/TraceTests.cpp", ] } diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn index 79b9c238f291e..e15340ec990ac 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn @@ -3,6 +3,7 @@ static_library("conversions") { configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang-tools-extra/clangd", + "//clang-tools-extra/clangd/support", "//llvm/lib/Support", ] include_dirs = [ ".." ] @@ -18,6 +19,7 @@ static_library("transport") { deps = [ ":conversions", "//clang-tools-extra/clangd", + "//clang-tools-extra/clangd/support", "//llvm/lib/Support", ] include_dirs = [ ".." ] diff --git a/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn index 97715cf8d9c90..832028dce6546 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn @@ -34,7 +34,7 @@ write_lit_config("lit_site_cfg") { "CLANG_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"), "LLVM_LIT_TOOLS_DIR=", # Intentionally empty, matches cmake build. "LLVM_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"), - "PYTHON_EXECUTABLE=$python_path", + "Python3_EXECUTABLE=$python_path", ] if (clang_enable_static_analyzer) { diff --git a/llvm/utils/gn/secondary/clang/lib/CrossTU/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/CrossTU/BUILD.gn index 9f4df242fa8a7..83411bac3eada 100644 --- a/llvm/utils/gn/secondary/clang/lib/CrossTU/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/CrossTU/BUILD.gn @@ -6,10 +6,6 @@ static_library("CrossTU") { "//clang/lib/Basic", "//clang/lib/Frontend", "//clang/lib/Index", - "//clang/lib/Serialization", - - # FIXME: This shouldn't be here, https://reviews.llvm.org/D75665#2004891 - "//clang/lib/Tooling", "//llvm/lib/Support", ] sources = [ diff --git a/llvm/utils/gn/secondary/clang/test/BUILD.gn b/llvm/utils/gn/secondary/clang/test/BUILD.gn index f215c35d6aedc..12648f8465066 100644 --- a/llvm/utils/gn/secondary/clang/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/test/BUILD.gn @@ -61,7 +61,7 @@ write_lit_config("lit_site_cfg") { "LLVM_HOST_TRIPLE=$llvm_current_triple", "LLVM_LIT_TOOLS_DIR=", # Intentionally empty, matches cmake build. "LLVM_USE_SANITIZER=", - "PYTHON_EXECUTABLE=$python_path", + "Python3_EXECUTABLE=$python_path", "USE_Z3_SOLVER=", ] diff --git a/llvm/utils/gn/secondary/clang/tools/scan-build/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/scan-build/BUILD.gn index df65d461b741d..a4c52089a87b4 100644 --- a/llvm/utils/gn/secondary/clang/tools/scan-build/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/tools/scan-build/BUILD.gn @@ -19,13 +19,13 @@ copy("bin") { copy("libexec") { sources = [ - "libexec/ccc-analyzer", "libexec/c++-analyzer", + "libexec/ccc-analyzer", ] if (host_os == "win") { sources += [ - "libexec/ccc-analyzer.bat", "libexec/c++-analyzer.bat", + "libexec/ccc-analyzer.bat", ] } outputs = [ "$root_build_dir/libexec/{{source_file_part}}" ] @@ -38,8 +38,8 @@ copy("man") { copy("share") { sources = [ - "share/scan-build/sorttable.js", "share/scan-build/scanview.css", + "share/scan-build/sorttable.js", ] outputs = [ "$root_build_dir/share/scan-build/{{source_file_part}}" ] } diff --git a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn index c03399193baba..ca24b4b403783 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn @@ -40,7 +40,7 @@ write_cmake_config("lit_common_configured") { "COMPILER_RT_RESOLVED_TEST_COMPILER=" + rebase_path("$root_build_dir/bin/clang"), "COMPILER_RT_TEST_COMPILER_ID=Clang", - "PYTHON_EXECUTABLE=$python_path", + "Python3_EXECUTABLE=$python_path", "COMPILER_RT_DEBUG_PYBOOL=False", "COMPILER_RT_INTERCEPT_LIBDISPATCH_PYBOOL=False", "COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR=" + diff --git a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn index f6e37a8243b76..e472b3a1255dd 100644 --- a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn @@ -21,8 +21,11 @@ static_library("MachO2") { sources = [ "Arch/X86_64.cpp", "Driver.cpp", + "ExportTrie.cpp", "InputFiles.cpp", "InputSection.cpp", + "MergedOutputSection.cpp", + "OutputSection.cpp", "OutputSegment.cpp", "SymbolTable.cpp", "Symbols.cpp", diff --git a/llvm/utils/gn/secondary/lld/test/BUILD.gn b/llvm/utils/gn/secondary/lld/test/BUILD.gn index b0ccb7ea00324..71d20751c5d72 100644 --- a/llvm/utils/gn/secondary/lld/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/test/BUILD.gn @@ -23,7 +23,7 @@ template("write_lit_cfg") { "LLVM_RUNTIME_OUTPUT_INTDIR=" + rebase_path("$root_out_dir/bin"), "LLVM_SOURCE_DIR=" + rebase_path("//llvm"), "LLVM_TOOLS_DIR=" + rebase_path("$root_out_dir/bin"), - "PYTHON_EXECUTABLE=$python_path", + "Python3_EXECUTABLE=$python_path", "TARGET_TRIPLE=$llvm_target_triple", ] values += invoker.extra_values diff --git a/llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn new file mode 100644 index 0000000000000..7102024b109d5 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn @@ -0,0 +1,7 @@ +static_library("Extensions") { + output_name = "LLVMExtensions" + sources = [ + # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. + "Extensions.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/LTO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/LTO/BUILD.gn index f24270f834d50..20821b61150ac 100644 --- a/llvm/utils/gn/secondary/llvm/lib/LTO/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/LTO/BUILD.gn @@ -6,6 +6,7 @@ static_library("LTO") { "//llvm/lib/Bitcode/Reader", "//llvm/lib/Bitcode/Writer", "//llvm/lib/CodeGen", + "//llvm/lib/Extensions", "//llvm/lib/IR", "//llvm/lib/Linker", "//llvm/lib/MC", diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index 427b2c067884f..27c96d29f9ef6 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -64,7 +64,7 @@ write_lit_config("lit_site_cfg") { "LLVM_NATIVE_ARCH=$native_target", "LLVM_USE_INTEL_JITEVENTS=0", "LLVM_USE_SANITIZER=", - "PYTHON_EXECUTABLE=$python_path", + "Python3_EXECUTABLE=$python_path", "TARGETS_TO_BUILD=$llvm_targets_to_build_string", "TARGET_TRIPLE=$llvm_target_triple", diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn index 5bc2427887d5b..70c06909c8eb2 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn @@ -111,6 +111,240 @@ action("LibraryDependencies.inc") { "--write-library-table=" + rebase_path(output, root_out_dir), ] outputs = [ output ] + + # Listing all these sources isn't great. Ideally there'd be a depfile + # written by the tool with all the imported .py files, and we'd + # hand the LLVMBuild files to the tool in a rspfile and the tool + # would verify that the list is complete. But since the plan is to + # make this target not use llvm-build eventually, this is good enough. + sources = [ + "//llvm/utils/llvm-build/llvmbuild/__init__.py", + "//llvm/utils/llvm-build/llvmbuild/componentinfo.py", + "//llvm/utils/llvm-build/llvmbuild/main.py", + "//llvm/utils/llvm-build/llvmbuild/util.py", + + # LLVMBuild.txt files. Obtained by adding `print(llvmbuild_path)` to + # main.py in llvmbuild. (`find llvm -name LLVMBuild.txt` finds a few + # more LLVMBuild.txt files that aren't referenced and are dead.) + "//llvm/LLVMBuild.txt", + "//llvm/bindings/LLVMBuild.txt", + "//llvm/docs/LLVMBuild.txt", + "//llvm/examples/LLVMBuild.txt", + "//llvm/lib/Analysis/LLVMBuild.txt", + "//llvm/lib/AsmParser/LLVMBuild.txt", + "//llvm/lib/BinaryFormat/LLVMBuild.txt", + "//llvm/lib/Bitcode/LLVMBuild.txt", + "//llvm/lib/Bitcode/Reader/LLVMBuild.txt", + "//llvm/lib/Bitcode/Writer/LLVMBuild.txt", + "//llvm/lib/Bitstream/LLVMBuild.txt", + "//llvm/lib/Bitstream/Reader/LLVMBuild.txt", + "//llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt", + "//llvm/lib/CodeGen/GlobalISel/LLVMBuild.txt", + "//llvm/lib/CodeGen/LLVMBuild.txt", + "//llvm/lib/CodeGen/MIRParser/LLVMBuild.txt", + "//llvm/lib/CodeGen/SelectionDAG/LLVMBuild.txt", + "//llvm/lib/DWARFLinker/LLVMBuild.txt", + "//llvm/lib/DebugInfo/CodeView/LLVMBuild.txt", + "//llvm/lib/DebugInfo/DWARF/LLVMBuild.txt", + "//llvm/lib/DebugInfo/GSYM/LLVMBuild.txt", + "//llvm/lib/DebugInfo/LLVMBuild.txt", + "//llvm/lib/DebugInfo/MSF/LLVMBuild.txt", + "//llvm/lib/DebugInfo/PDB/LLVMBuild.txt", + "//llvm/lib/DebugInfo/Symbolize/LLVMBuild.txt", + "//llvm/lib/Demangle/LLVMBuild.txt", + "//llvm/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt", + "//llvm/lib/ExecutionEngine/Interpreter/LLVMBuild.txt", + "//llvm/lib/ExecutionEngine/JITLink/LLVMBuild.txt", + "//llvm/lib/ExecutionEngine/LLVMBuild.txt", + "//llvm/lib/ExecutionEngine/MCJIT/LLVMBuild.txt", + "//llvm/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt", + "//llvm/lib/ExecutionEngine/Orc/LLVMBuild.txt", + "//llvm/lib/ExecutionEngine/OrcError/LLVMBuild.txt", + "//llvm/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt", + "//llvm/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt", + "//llvm/lib/Extensions/LLVMBuild.txt", + "//llvm/lib/Frontend/LLVMBuild.txt", + "//llvm/lib/Frontend/OpenMP/LLVMBuild.txt", + "//llvm/lib/FuzzMutate/LLVMBuild.txt", + "//llvm/lib/IR/LLVMBuild.txt", + "//llvm/lib/IRReader/LLVMBuild.txt", + "//llvm/lib/LLVMBuild.txt", + "//llvm/lib/LTO/LLVMBuild.txt", + "//llvm/lib/LineEditor/LLVMBuild.txt", + "//llvm/lib/Linker/LLVMBuild.txt", + "//llvm/lib/MC/LLVMBuild.txt", + "//llvm/lib/MC/MCDisassembler/LLVMBuild.txt", + "//llvm/lib/MC/MCParser/LLVMBuild.txt", + "//llvm/lib/MCA/LLVMBuild.txt", + "//llvm/lib/Object/LLVMBuild.txt", + "//llvm/lib/ObjectYAML/LLVMBuild.txt", + "//llvm/lib/Option/LLVMBuild.txt", + "//llvm/lib/Passes/LLVMBuild.txt", + "//llvm/lib/ProfileData/Coverage/LLVMBuild.txt", + "//llvm/lib/ProfileData/LLVMBuild.txt", + "//llvm/lib/Remarks/LLVMBuild.txt", + "//llvm/lib/Support/LLVMBuild.txt", + "//llvm/lib/TableGen/LLVMBuild.txt", + "//llvm/lib/Target/AArch64/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/AArch64/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/AArch64/LLVMBuild.txt", + "//llvm/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/AArch64/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/AArch64/Utils/LLVMBuild.txt", + "//llvm/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/AMDGPU/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/AMDGPU/LLVMBuild.txt", + "//llvm/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/AMDGPU/Utils/LLVMBuild.txt", + "//llvm/lib/Target/ARC/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/ARC/LLVMBuild.txt", + "//llvm/lib/Target/ARC/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/ARC/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/ARM/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/ARM/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/ARM/LLVMBuild.txt", + "//llvm/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/ARM/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/ARM/Utils/LLVMBuild.txt", + "//llvm/lib/Target/AVR/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/AVR/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/AVR/LLVMBuild.txt", + "//llvm/lib/Target/AVR/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/AVR/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/BPF/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/BPF/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/BPF/LLVMBuild.txt", + "//llvm/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/BPF/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/Hexagon/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/Hexagon/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/Hexagon/LLVMBuild.txt", + "//llvm/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/LLVMBuild.txt", + "//llvm/lib/Target/Lanai/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/Lanai/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/Lanai/LLVMBuild.txt", + "//llvm/lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/Lanai/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/MSP430/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/MSP430/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/MSP430/LLVMBuild.txt", + "//llvm/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/MSP430/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/Mips/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/Mips/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/Mips/LLVMBuild.txt", + "//llvm/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/Mips/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/NVPTX/LLVMBuild.txt", + "//llvm/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/PowerPC/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/PowerPC/LLVMBuild.txt", + "//llvm/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/RISCV/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/RISCV/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/RISCV/LLVMBuild.txt", + "//llvm/lib/Target/RISCV/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/RISCV/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/RISCV/Utils/LLVMBuild.txt", + "//llvm/lib/Target/Sparc/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/Sparc/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/Sparc/LLVMBuild.txt", + "//llvm/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/Sparc/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/SystemZ/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/SystemZ/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/SystemZ/LLVMBuild.txt", + "//llvm/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/SystemZ/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/VE/LLVMBuild.txt", + "//llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/VE/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/WebAssembly/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/WebAssembly/LLVMBuild.txt", + "//llvm/lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/X86/AsmParser/LLVMBuild.txt", + "//llvm/lib/Target/X86/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/X86/LLVMBuild.txt", + "//llvm/lib/Target/X86/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/X86/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Target/XCore/Disassembler/LLVMBuild.txt", + "//llvm/lib/Target/XCore/LLVMBuild.txt", + "//llvm/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt", + "//llvm/lib/Target/XCore/TargetInfo/LLVMBuild.txt", + "//llvm/lib/Testing/LLVMBuild.txt", + "//llvm/lib/Testing/Support/LLVMBuild.txt", + "//llvm/lib/TextAPI/LLVMBuild.txt", + "//llvm/lib/ToolDrivers/LLVMBuild.txt", + "//llvm/lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt", + "//llvm/lib/ToolDrivers/llvm-lib/LLVMBuild.txt", + "//llvm/lib/Transforms/AggressiveInstCombine/LLVMBuild.txt", + "//llvm/lib/Transforms/CFGuard/LLVMBuild.txt", + "//llvm/lib/Transforms/Coroutines/LLVMBuild.txt", + "//llvm/lib/Transforms/IPO/LLVMBuild.txt", + "//llvm/lib/Transforms/InstCombine/LLVMBuild.txt", + "//llvm/lib/Transforms/Instrumentation/LLVMBuild.txt", + "//llvm/lib/Transforms/LLVMBuild.txt", + "//llvm/lib/Transforms/ObjCARC/LLVMBuild.txt", + "//llvm/lib/Transforms/Scalar/LLVMBuild.txt", + "//llvm/lib/Transforms/Utils/LLVMBuild.txt", + "//llvm/lib/Transforms/Vectorize/LLVMBuild.txt", + "//llvm/lib/WindowsManifest/LLVMBuild.txt", + "//llvm/lib/XRay/LLVMBuild.txt", + "//llvm/projects/LLVMBuild.txt", + "//llvm/tools/LLVMBuild.txt", + "//llvm/tools/bugpoint/LLVMBuild.txt", + "//llvm/tools/dsymutil/LLVMBuild.txt", + "//llvm/tools/llc/LLVMBuild.txt", + "//llvm/tools/lli/ChildTarget/LLVMBuild.txt", + "//llvm/tools/lli/LLVMBuild.txt", + "//llvm/tools/llvm-ar/LLVMBuild.txt", + "//llvm/tools/llvm-as/LLVMBuild.txt", + "//llvm/tools/llvm-bcanalyzer/LLVMBuild.txt", + "//llvm/tools/llvm-cat/LLVMBuild.txt", + "//llvm/tools/llvm-cfi-verify/LLVMBuild.txt", + "//llvm/tools/llvm-cov/LLVMBuild.txt", + "//llvm/tools/llvm-cvtres/LLVMBuild.txt", + "//llvm/tools/llvm-diff/LLVMBuild.txt", + "//llvm/tools/llvm-dis/LLVMBuild.txt", + "//llvm/tools/llvm-dwarfdump/LLVMBuild.txt", + "//llvm/tools/llvm-dwp/LLVMBuild.txt", + "//llvm/tools/llvm-elfabi/LLVMBuild.txt", + "//llvm/tools/llvm-exegesis/LLVMBuild.txt", + "//llvm/tools/llvm-extract/LLVMBuild.txt", + "//llvm/tools/llvm-ifs/LLVMBuild.txt", + "//llvm/tools/llvm-jitlink/LLVMBuild.txt", + "//llvm/tools/llvm-jitlistener/LLVMBuild.txt", + "//llvm/tools/llvm-link/LLVMBuild.txt", + "//llvm/tools/llvm-lto/LLVMBuild.txt", + "//llvm/tools/llvm-mc/LLVMBuild.txt", + "//llvm/tools/llvm-mca/LLVMBuild.txt", + "//llvm/tools/llvm-modextract/LLVMBuild.txt", + "//llvm/tools/llvm-mt/LLVMBuild.txt", + "//llvm/tools/llvm-nm/LLVMBuild.txt", + "//llvm/tools/llvm-objcopy/LLVMBuild.txt", + "//llvm/tools/llvm-objdump/LLVMBuild.txt", + "//llvm/tools/llvm-pdbutil/LLVMBuild.txt", + "//llvm/tools/llvm-profdata/LLVMBuild.txt", + "//llvm/tools/llvm-rc/LLVMBuild.txt", + "//llvm/tools/llvm-reduce/LLVMBuild.txt", + "//llvm/tools/llvm-rtdyld/LLVMBuild.txt", + "//llvm/tools/llvm-size/LLVMBuild.txt", + "//llvm/tools/llvm-split/LLVMBuild.txt", + "//llvm/tools/llvm-undname/LLVMBuild.txt", + "//llvm/tools/opt/LLVMBuild.txt", + "//llvm/tools/verify-uselistorder/LLVMBuild.txt", + "//llvm/utils/LLVMBuild.txt", + "//llvm/utils/TableGen/LLVMBuild.txt", + "//llvm/utils/unittest/LLVMBuild.txt", + ] } executable("llvm-config") { diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn index 1589fc603a72e..ad60a40b630df 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn @@ -11,6 +11,7 @@ unittest("IRTests") { "//llvm/lib/Transforms/Utils", ] sources = [ + "AbstractCallSiteTest.cpp", "AsmWriterTest.cpp", "AttributesTest.cpp", "BasicBlockTest.cpp", diff --git a/llvm/utils/lit/lit/Test.py b/llvm/utils/lit/lit/Test.py index d4ae528d99d41..627785829efca 100644 --- a/llvm/utils/lit/lit/Test.py +++ b/llvm/utils/lit/lit/Test.py @@ -37,6 +37,7 @@ def __repr__(self): UNSUPPORTED = ResultCode('UNSUPPORTED', False) TIMEOUT = ResultCode('TIMEOUT', True) SKIPPED = ResultCode('SKIPPED', False) +EXCLUDED = ResultCode('EXCLUDED', False) # Test metric values. @@ -367,45 +368,3 @@ def isEarlyTest(self): parallelism or where it is desirable to surface their failures early. """ return self.suite.config.is_early - - def writeJUnitXML(self, fil): - """Write the test's report xml representation to a file handle.""" - test_name = quoteattr(self.path_in_suite[-1]) - test_path = self.path_in_suite[:-1] - safe_test_path = [x.replace(".","_") for x in test_path] - safe_name = self.suite.name.replace(".","-") - - if safe_test_path: - class_name = safe_name + "." + "/".join(safe_test_path) - else: - class_name = safe_name + "." + safe_name - class_name = quoteattr(class_name) - testcase_template = '\n\t", "]]]]>")) - fil.write("]]>\n") - elif self.result.code == UNSUPPORTED: - unsupported_features = self.getMissingRequiredFeatures() - if unsupported_features: - skip_message = "Skipping because of: " + ", ".join(unsupported_features) - else: - skip_message = "Skipping because of configuration." - - fil.write(">\n\t\n\n".format(quoteattr(skip_message))) - else: - fil.write("/>") diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py index 4e185dee900f8..06b1313cb8bb1 100644 --- a/llvm/utils/lit/lit/cl_arguments.py +++ b/llvm/utils/lit/lit/cl_arguments.py @@ -3,6 +3,7 @@ import shlex import sys +import lit.reports import lit.util @@ -57,7 +58,7 @@ def parse_args(): help="Display all commandlines and output", action="store_true") format_group.add_argument("-o", "--output", - dest="output_path", + type=lit.reports.JsonReport, help="Write test results to the provided path", metavar="PATH") format_group.add_argument("--no-progress-bar", @@ -98,7 +99,7 @@ def parse_args(): help="Don't execute any tests (assume PASS)", action="store_true") execution_group.add_argument("--xunit-xml-output", - dest="xunit_output_file", + type=lit.reports.XunitReport, help="Write XUnit-compatible XML test reports to the specified file") execution_group.add_argument("--timeout", dest="maxIndividualTestTime", @@ -183,6 +184,8 @@ def parse_args(): else: opts.shard = None + opts.reports = filter(None, [opts.output, opts.xunit_xml_output]) + return opts diff --git a/llvm/utils/lit/lit/formats/googletest.py b/llvm/utils/lit/lit/formats/googletest.py index 777568b244658..67f32cf1cdf7b 100644 --- a/llvm/utils/lit/lit/formats/googletest.py +++ b/llvm/utils/lit/lit/formats/googletest.py @@ -41,7 +41,7 @@ def getGTestTests(self, path, litConfig, localConfig): litConfig.warning( "unable to discover google-tests in %r: %s. Process output: %s" % (path, sys.exc_info()[1], exc.output)) - raise StopIteration + return nested_tests = [] for ln in output.splitlines(False): # Don't keep newlines. diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py index 25f15107a211a..f9b54805d70c8 100755 --- a/llvm/utils/lit/lit/main.py +++ b/llvm/utils/lit/lit/main.py @@ -13,6 +13,7 @@ import lit.discovery import lit.display import lit.LitConfig +import lit.reports import lit.run import lit.Test import lit.util @@ -57,9 +58,11 @@ def main(builtin_params={}): opts.maxIndividualTestTime)) lit_config.maxIndividualTestTime = opts.maxIndividualTestTime - filtered_tests = [t for t in discovered_tests if + determine_order(discovered_tests, opts.order) + + selected_tests = [t for t in discovered_tests if opts.filter.search(t.getFullName())] - if not filtered_tests: + if not selected_tests: sys.stderr.write('error: filter did not match any tests ' '(of %d discovered). ' % len(discovered_tests)) if opts.allow_empty_runs: @@ -71,38 +74,33 @@ def main(builtin_params={}): 'error.\n') sys.exit(2) - determine_order(filtered_tests, opts.order) - if opts.shard: (run, shards) = opts.shard - filtered_tests = filter_by_shard(filtered_tests, run, shards, lit_config) - if not filtered_tests: + selected_tests = filter_by_shard(selected_tests, run, shards, lit_config) + if not selected_tests: sys.stderr.write('warning: shard does not contain any tests. ' 'Consider decreasing the number of shards.\n') sys.exit(0) - filtered_tests = filtered_tests[:opts.max_tests] + selected_tests = selected_tests[:opts.max_tests] - opts.workers = min(len(filtered_tests), opts.workers) + mark_excluded(discovered_tests, selected_tests) start = time.time() - run_tests(filtered_tests, lit_config, opts, len(discovered_tests)) + run_tests(selected_tests, lit_config, opts, len(discovered_tests)) elapsed = time.time() - start # TODO(yln): eventually, all functions below should act on discovered_tests executed_tests = [ - t for t in filtered_tests if t.result.code != lit.Test.SKIPPED] + t for t in selected_tests if t.result.code != lit.Test.SKIPPED] if opts.time_tests: - print_histogram(executed_tests) + print_histogram(discovered_tests) - print_results(filtered_tests, elapsed, opts) + print_results(discovered_tests, elapsed, opts) - if opts.output_path: - #TODO(yln): pass in discovered_tests - write_test_results(executed_tests, lit_config, elapsed, opts.output_path) - if opts.xunit_output_file: - write_test_results_xunit(executed_tests, opts) + for report in opts.reports: + report.write_results(executed_tests, elapsed) if lit_config.numErrors: sys.stderr.write('\n%d error(s) in tests\n' % lit_config.numErrors) @@ -111,7 +109,7 @@ def main(builtin_params={}): if lit_config.numWarnings: sys.stderr.write('\n%d warning(s) in tests\n' % lit_config.numWarnings) - has_failure = any(t.isFailure() for t in executed_tests) + has_failure = any(t.isFailure() for t in discovered_tests) if has_failure: sys.exit(1) @@ -126,16 +124,14 @@ def parse(p): def print_discovered(tests, show_suites, show_tests): - # Suite names are not necessarily unique. Include object identity in sort - # key to avoid mixing tests of different suites. - tests.sort(key=lambda t: (t.suite.name, t.suite, t.path_in_suite)) + tests.sort(key=lit.reports.by_suite_and_test_path) if show_suites: import itertools tests_by_suite = itertools.groupby(tests, lambda t: t.suite) print('-- Test Suites --') - for suite, suite_iter in tests_by_suite: - test_count = sum(1 for _ in suite_iter) + for suite, test_iter in tests_by_suite: + test_count = sum(1 for _ in test_iter) print(' %s - %d tests' % (suite.name, test_count)) print(' Source Root: %s' % suite.source_root) print(' Exec Root : %s' % suite.exec_root) @@ -189,15 +185,24 @@ def filter_by_shard(tests, run, shards, lit_config): return selected_tests +def mark_excluded(discovered_tests, selected_tests): + excluded_tests = set(discovered_tests) - set(selected_tests) + result = lit.Test.Result(lit.Test.EXCLUDED) + for t in excluded_tests: + t.setResult(result) + + def run_tests(tests, lit_config, opts, discovered_tests): + workers = min(len(tests), opts.workers) display = lit.display.create_display(opts, len(tests), discovered_tests, - opts.workers) + workers) + def progress_callback(test): display.update(test) if opts.order == 'failing-first': touch_file(test) - run = lit.run.Run(tests, lit_config, opts.workers, progress_callback, + run = lit.run.Run(tests, lit_config, workers, progress_callback, opts.max_failures, opts.timeout) display.print_header() @@ -248,33 +253,41 @@ def execute_in_tmp_dir(run, lit_config): def print_histogram(tests): - test_times = [(t.getFullName(), t.result.elapsed) for t in tests] - lit.util.printHistogram(test_times, title='Tests') + test_times = [(t.getFullName(), t.result.elapsed) + for t in tests if t.result.elapsed] + if test_times: + lit.util.printHistogram(test_times, title='Tests') + + +def add_result_category(result_code, label): + assert isinstance(result_code, lit.Test.ResultCode) + category = (result_code, "%s Tests" % label, label) + result_codes.append(category) # Status code, summary label, group label -failure_codes = [ +result_codes = [ + # Passes + (lit.Test.EXCLUDED, 'Excluded Tests', 'Excluded'), + (lit.Test.SKIPPED, 'Skipped Tests', 'Skipped'), + (lit.Test.UNSUPPORTED, 'Unsupported Tests', 'Unsupported'), + (lit.Test.PASS, 'Expected Passes', ''), + (lit.Test.FLAKYPASS, 'Passes With Retry', ''), + (lit.Test.XFAIL, 'Expected Failures', 'Expected Failing'), + # Failures (lit.Test.UNRESOLVED, 'Unresolved Tests', 'Unresolved'), (lit.Test.TIMEOUT, 'Individual Timeouts', 'Timed Out'), (lit.Test.FAIL, 'Unexpected Failures', 'Failing'), (lit.Test.XPASS, 'Unexpected Passes', 'Unexpected Passing') ] -all_codes = [ - (lit.Test.SKIPPED, 'Skipped Tests', 'Skipped'), - (lit.Test.UNSUPPORTED, 'Unsupported Tests', 'Unsupported'), - (lit.Test.PASS, 'Expected Passes', ''), - (lit.Test.FLAKYPASS, 'Passes With Retry', ''), - (lit.Test.XFAIL, 'Expected Failures', 'Expected Failing'), -] + failure_codes - def print_results(tests, elapsed, opts): - tests_by_code = {code: [] for (code, _, _) in all_codes} + tests_by_code = {code: [] for (code, _, _) in result_codes} for test in tests: tests_by_code[test.result.code].append(test) - for (code, _, group_label) in all_codes: + for (code, _, group_label) in result_codes: print_group(code, group_label, tests_by_code[code], opts) print_summary(tests_by_code, opts.quiet, elapsed) @@ -284,7 +297,7 @@ def print_group(code, label, tests, opts): if not tests: return # TODO(yln): FLAKYPASS? Make this more consistent! - if code in {lit.Test.SKIPPED, lit.Test.PASS}: + if code in {lit.Test.EXCLUDED, lit.Test.SKIPPED, lit.Test.PASS}: return if (lit.Test.XFAIL == code and not opts.show_xfail) or \ (lit.Test.UNSUPPORTED == code and not opts.show_unsupported): @@ -300,7 +313,7 @@ def print_summary(tests_by_code, quiet, elapsed): if not quiet: print('\nTesting Time: %.2fs' % elapsed) - codes = failure_codes if quiet else all_codes + codes = [c for c in result_codes if not quiet or c.isFailure] groups = [(label, len(tests_by_code[code])) for code, label, _ in codes] groups = [(label, count) for label, count in groups if count] if not groups: @@ -313,98 +326,3 @@ def print_summary(tests_by_code, quiet, elapsed): label = label.ljust(max_label_len) count = str(count).rjust(max_count_len) print(' %s: %s' % (label, count)) - - -def write_test_results(tests, lit_config, elapsed, output_path): - # TODO(yln): audit: unexecuted tests - # Construct the data we will write. - data = {} - # Encode the current lit version as a schema version. - data['__version__'] = lit.__versioninfo__ - data['elapsed'] = elapsed - # FIXME: Record some information on the lit configuration used? - # FIXME: Record information from the individual test suites? - - # Encode the tests. - data['tests'] = tests_data = [] - for test in tests: - test_data = { - 'name' : test.getFullName(), - 'code' : test.result.code.name, - 'output' : test.result.output, - 'elapsed' : test.result.elapsed } - - # Add test metrics, if present. - if test.result.metrics: - test_data['metrics'] = metrics_data = {} - for key, value in test.result.metrics.items(): - metrics_data[key] = value.todata() - - # Report micro-tests separately, if present - if test.result.microResults: - for key, micro_test in test.result.microResults.items(): - # Expand parent test name with micro test name - parent_name = test.getFullName() - micro_full_name = parent_name + ':' + key - - micro_test_data = { - 'name' : micro_full_name, - 'code' : micro_test.code.name, - 'output' : micro_test.output, - 'elapsed' : micro_test.elapsed } - if micro_test.metrics: - micro_test_data['metrics'] = micro_metrics_data = {} - for key, value in micro_test.metrics.items(): - micro_metrics_data[key] = value.todata() - - tests_data.append(micro_test_data) - - tests_data.append(test_data) - - # Write the output. - f = open(output_path, 'w') - try: - import json - json.dump(data, f, indent=2, sort_keys=True) - f.write('\n') - finally: - f.close() - -def write_test_results_xunit(tests, opts): - # TODO(yln): audit: unexecuted tests - from xml.sax.saxutils import quoteattr - # Collect the tests, indexed by test suite - by_suite = {} - for result_test in tests: - suite = result_test.suite.config.name - if suite not in by_suite: - by_suite[suite] = { - 'passes' : 0, - 'failures' : 0, - 'skipped': 0, - 'tests' : [] } - by_suite[suite]['tests'].append(result_test) - if result_test.isFailure(): - by_suite[suite]['failures'] += 1 - elif result_test.result.code == lit.Test.UNSUPPORTED: - by_suite[suite]['skipped'] += 1 - else: - by_suite[suite]['passes'] += 1 - xunit_output_file = open(opts.xunit_output_file, "w") - xunit_output_file.write("\n") - xunit_output_file.write("\n") - for suite_name, suite in by_suite.items(): - safe_suite_name = quoteattr(suite_name.replace(".", "-")) - xunit_output_file.write("\n") - - for result_test in suite['tests']: - result_test.writeJUnitXML(xunit_output_file) - xunit_output_file.write("\n") - xunit_output_file.write("\n") - xunit_output_file.write("") - xunit_output_file.close() diff --git a/llvm/utils/lit/lit/reports.py b/llvm/utils/lit/lit/reports.py new file mode 100755 index 0000000000000..8499d2126be59 --- /dev/null +++ b/llvm/utils/lit/lit/reports.py @@ -0,0 +1,139 @@ +import itertools +import json + +from xml.sax.saxutils import quoteattr as quo + +import lit.Test + + +def by_suite_and_test_path(test): + # Suite names are not necessarily unique. Include object identity in sort + # key to avoid mixing tests of different suites. + return (test.suite.name, id(test.suite), test.path_in_suite) + + +class JsonReport(object): + def __init__(self, output_file): + self.output_file = output_file + + def write_results(self, tests, elapsed): + assert not any(t.result.code in {lit.Test.EXCLUDED, lit.Test.SKIPPED} for t in tests) + # Construct the data we will write. + data = {} + # Encode the current lit version as a schema version. + data['__version__'] = lit.__versioninfo__ + data['elapsed'] = elapsed + # FIXME: Record some information on the lit configuration used? + # FIXME: Record information from the individual test suites? + + # Encode the tests. + data['tests'] = tests_data = [] + for test in tests: + test_data = { + 'name': test.getFullName(), + 'code': test.result.code.name, + 'output': test.result.output, + 'elapsed': test.result.elapsed} + + # Add test metrics, if present. + if test.result.metrics: + test_data['metrics'] = metrics_data = {} + for key, value in test.result.metrics.items(): + metrics_data[key] = value.todata() + + # Report micro-tests separately, if present + if test.result.microResults: + for key, micro_test in test.result.microResults.items(): + # Expand parent test name with micro test name + parent_name = test.getFullName() + micro_full_name = parent_name + ':' + key + + micro_test_data = { + 'name': micro_full_name, + 'code': micro_test.code.name, + 'output': micro_test.output, + 'elapsed': micro_test.elapsed} + if micro_test.metrics: + micro_test_data['metrics'] = micro_metrics_data = {} + for key, value in micro_test.metrics.items(): + micro_metrics_data[key] = value.todata() + + tests_data.append(micro_test_data) + + tests_data.append(test_data) + + with open(self.output_file, 'w') as file: + json.dump(data, file, indent=2, sort_keys=True) + file.write('\n') + + +class XunitReport(object): + def __init__(self, output_file): + self.output_file = output_file + self.skipped_codes = {lit.Test.EXCLUDED, + lit.Test.SKIPPED, lit.Test.UNSUPPORTED} + + # TODO(yln): elapsed unused, put it somewhere? + def write_results(self, tests, elapsed): + assert not any(t.result.code in {lit.Test.EXCLUDED, lit.Test.SKIPPED} for t in tests) + tests.sort(key=by_suite_and_test_path) + tests_by_suite = itertools.groupby(tests, lambda t: t.suite) + + with open(self.output_file, 'w') as file: + file.write('\n') + file.write('\n') + for suite, test_iter in tests_by_suite: + self._write_testsuite(file, suite, list(test_iter)) + file.write('\n') + + def _write_testsuite(self, file, suite, tests): + skipped = sum(1 for t in tests if t.result.code in self.skipped_codes) + failures = sum(1 for t in tests if t.isFailure()) + + name = suite.config.name.replace('.', '-') + # file.write(f'\n') + file.write('\n'.format( + name=quo(name), tests=len(tests), failures=failures, skipped=skipped)) + for test in tests: + self._write_test(file, test, name) + file.write('\n') + + def _write_test(self, file, test, suite_name): + path = '/'.join(test.path_in_suite[:-1]).replace('.', '_') + # class_name = f'{suite_name}.{path or suite_name}' + class_name = suite_name + '.' + (path or suite_name) + name = test.path_in_suite[-1] + time = test.result.elapsed or 0.0 + # file.write(f'\n ', ']]]]>') + if isinstance(output, bytes): + output.decode("utf-8", 'ignore') + file.write(output) + file.write(']]>\n\n') + elif test.result.code in self.skipped_codes: + reason = self._get_skip_reason(test) + # file.write(f'>\n \n\n') + file.write('>\n \n\n'.format( + reason=quo(reason))) + else: + file.write('/>\n') + + def _get_skip_reason(self, test): + code = test.result.code + if code == lit.Test.EXCLUDED: + return 'Test not selected (--filter, --max-tests, --run-shard)' + if code == lit.Test.SKIPPED: + return 'User interrupt' + + assert code == lit.Test.UNSUPPORTED + features = test.getMissingRequiredFeatures() + if features: + return 'Missing required feature(s): ' + ', '.join(features) + return 'Skipping because of configuration' diff --git a/llvm/utils/lit/lit/run.py b/llvm/utils/lit/lit/run.py index d0e7e65aae1ec..5aef77e5f6059 100644 --- a/llvm/utils/lit/lit/run.py +++ b/llvm/utils/lit/lit/run.py @@ -99,9 +99,11 @@ def _wait_for(self, async_results, deadline): # Update local test object "in place" from remote test object. This # ensures that the original test object which is used for printing test - # results reflect the changes. + # results reflects the changes. def _update_test(self, local_test, remote_test): - local_test.__dict__.update(remote_test.__dict__) + # Needed for getMissingRequiredFeatures() + local_test.requires = remote_test.requires + local_test.result = remote_test.result # TODO(yln): interferes with progress bar # Some tests use threads internally, and at least on Linux each of these diff --git a/llvm/utils/lit/tests/Inputs/custom-result-category/format.py b/llvm/utils/lit/tests/Inputs/custom-result-category/format.py new file mode 100644 index 0000000000000..b0c97ec71bb9a --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/custom-result-category/format.py @@ -0,0 +1,18 @@ +import lit +import lit.formats + +CUSTOM_PASS = lit.Test.ResultCode('CUSTOM_PASS', False) +CUSTOM_FAILURE = lit.Test.ResultCode('CUSTOM_FAILURE', True) + +lit.main.add_result_category(CUSTOM_PASS, "My Passed") +lit.main.add_result_category(CUSTOM_FAILURE, "My Failed") + + +class MyFormat(lit.formats.ShTest): + def execute(self, test, lit_config): + result = super(MyFormat, self).execute(test, lit_config) + if result.code.isFailure: + result.code = CUSTOM_FAILURE + else: + result.code = CUSTOM_PASS + return result diff --git a/llvm/utils/lit/tests/Inputs/custom-result-category/lit.cfg b/llvm/utils/lit/tests/Inputs/custom-result-category/lit.cfg new file mode 100644 index 0000000000000..842ea612ebf03 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/custom-result-category/lit.cfg @@ -0,0 +1,10 @@ +import lit +import site +site.addsitedir(os.path.dirname(__file__)) +import format + +config.name = 'custom-result-category' +config.suffixes = ['.txt'] +config.test_format = format.MyFormat() +config.test_source_root = None +config.test_exec_root = None diff --git a/llvm/utils/lit/tests/Inputs/custom-result-category/test1.txt b/llvm/utils/lit/tests/Inputs/custom-result-category/test1.txt new file mode 100644 index 0000000000000..b80b60b7a2794 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/custom-result-category/test1.txt @@ -0,0 +1 @@ +# RUN: true diff --git a/llvm/utils/lit/tests/Inputs/custom-result-category/test2.txt b/llvm/utils/lit/tests/Inputs/custom-result-category/test2.txt new file mode 100644 index 0000000000000..49932c3006e15 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/custom-result-category/test2.txt @@ -0,0 +1 @@ +# RUN: false diff --git a/llvm/utils/lit/tests/Inputs/shtest-inject/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-inject/lit.cfg index dfec55a3d427d..755edf8b5bfce 100644 --- a/llvm/utils/lit/tests/Inputs/shtest-inject/lit.cfg +++ b/llvm/utils/lit/tests/Inputs/shtest-inject/lit.cfg @@ -3,7 +3,7 @@ import lit preamble_commands = [ 'echo "THIS WAS"', 'echo "INJECTED"' -]; +] config.name = 'shtest-inject' config.suffixes = ['.txt'] diff --git a/llvm/utils/lit/tests/custom-result-category.py b/llvm/utils/lit/tests/custom-result-category.py new file mode 100644 index 0000000000000..85efe397735ae --- /dev/null +++ b/llvm/utils/lit/tests/custom-result-category.py @@ -0,0 +1,15 @@ +# UNSUPPORTED: system-windows +# Test lit.main.add_result_category() extension API. + +# RUN: not %{lit} -j 1 %{inputs}/custom-result-category | FileCheck %s + +# CHECK: CUSTOM_PASS: custom-result-category :: test1.txt +# CHECK: CUSTOM_FAILURE: custom-result-category :: test2.txt + +# TODO(yln): Passing tests shouldn't be printed by default. +# CHECK: My Passed Tests (1) +# CHECK: My Failed Tests (1) +# CHECK: custom-result-category :: test2.txt + +# CHECK: My Passed Tests: 1 +# CHECK: My Failed Tests: 1 diff --git a/llvm/utils/lit/tests/selecting.py b/llvm/utils/lit/tests/selecting.py index f5adb3228ea83..8e912de96f462 100644 --- a/llvm/utils/lit/tests/selecting.py +++ b/llvm/utils/lit/tests/selecting.py @@ -22,12 +22,14 @@ # RUN: %{lit} --filter 'O[A-Z]E' %{inputs}/discovery | FileCheck --check-prefix=CHECK-FILTER %s # RUN: env LIT_FILTER='o[a-z]e' %{lit} %{inputs}/discovery | FileCheck --check-prefix=CHECK-FILTER %s # CHECK-FILTER: Testing: 2 of 5 tests +# CHECK-FILTER: Excluded Tests : 3 # Check that maximum counts work # # RUN: %{lit} --max-tests 3 %{inputs}/discovery | FileCheck --check-prefix=CHECK-MAX %s # CHECK-MAX: Testing: 3 of 5 tests +# CHECK-MAX: Excluded Tests : 2 # Check that sharding partitions the testsuite in a way that distributes the @@ -38,6 +40,7 @@ # RUN: FileCheck --check-prefix=CHECK-SHARD0-OUT < %t.out %s # CHECK-SHARD0-ERR: note: Selecting shard 1/3 = size 2/5 = tests #(3*k)+1 = [1, 4] # CHECK-SHARD0-OUT: Testing: 2 of 5 tests +# CHECK-SHARD0-OUT: Excluded Tests : 3 # # RUN: %{lit} --num-shards 3 --run-shard 2 %{inputs}/discovery >%t.out 2>%t.err # RUN: FileCheck --check-prefix=CHECK-SHARD1-ERR < %t.err %s diff --git a/llvm/utils/lit/tests/shtest-format.py b/llvm/utils/lit/tests/shtest-format.py index 75d08895f966e..c679b2987ca71 100644 --- a/llvm/utils/lit/tests/shtest-format.py +++ b/llvm/utils/lit/tests/shtest-format.py @@ -86,7 +86,7 @@ # CHECK: Unexpected Passes : 1 -# XUNIT: +# XUNIT: # XUNIT-NEXT: # XUNIT-NEXT: @@ -118,7 +118,7 @@ # XUNIT: # XUNIT: -# XUNIT-NEXT: +# XUNIT-NEXT: # XUNIT: @@ -129,12 +129,12 @@ # XUNIT: -# XUNIT-NEXT: +# XUNIT-NEXT: # XUNIT: # XUNIT: -# XUNIT-NEXT: +# XUNIT-NEXT: # XUNIT: # XUNIT-NEXT: @@ -142,7 +142,7 @@ # XUNIT-NEXT: # XUNIT: -# XUNIT-NEXT: +# XUNIT-NEXT: # XUNIT: diff --git a/llvm/utils/lit/tests/xunit-output.py b/llvm/utils/lit/tests/xunit-output.py index 930768e61dae2..9cfe2cb9aa68c 100644 --- a/llvm/utils/lit/tests/xunit-output.py +++ b/llvm/utils/lit/tests/xunit-output.py @@ -7,10 +7,10 @@ # RUN: sh -c 'if command -v xmllint 2>/dev/null; then xmllint --noout %t.xunit.xml; fi' # RUN: FileCheck < %t.xunit.xml %s -# CHECK: +# CHECK: # CHECK: # CHECK: # CHECK: -# CHECK-NEXT: ]]]]> &"]]> +# CHECK-NEXT: ]]]]> &"]]> # CHECK: # CHECK: diff --git a/llvm/utils/llvm-locstats/llvm-locstats.py b/llvm/utils/llvm-locstats/llvm-locstats.py index dec87f9caf7d3..7114661e70fe9 100755 --- a/llvm/utils/llvm-locstats/llvm-locstats.py +++ b/llvm/utils/llvm-locstats/llvm-locstats.py @@ -188,74 +188,85 @@ def parse_locstats(opts, binary): print ('error: No valid llvm-dwarfdump statistics found.') sys.exit(1) + # TODO: Parse the statistics Version from JSON. + if opts.only_variables: # Read the JSON only for local variables. variables_total_locstats = \ - json_parsed['total vars procesed by location statistics'] + json_parsed['#local vars processed by location statistics'] variables_scope_bytes_covered = \ - json_parsed['vars scope bytes covered'] + json_parsed['sum_all_local_vars(#bytes in parent scope covered' \ + ' by DW_AT_location)'] variables_scope_bytes = \ - json_parsed['vars scope bytes total'] + json_parsed['sum_all_local_vars(#bytes in parent scope)'] if not opts.ignore_debug_entry_values: for cov_bucket in coverage_buckets(): - cov_category = "vars with {} of its scope covered".format(cov_bucket) + cov_category = "#local vars with {} of parent scope covered " \ + "by DW_AT_location".format(cov_bucket) variables_coverage_map[cov_bucket] = json_parsed[cov_category] else: variables_scope_bytes_entry_values = \ - json_parsed['vars entry value scope bytes covered'] + json_parsed['sum_all_local_vars(#bytes in parent scope ' \ + 'covered by DW_OP_entry_value)'] variables_scope_bytes_covered = variables_scope_bytes_covered \ - variables_scope_bytes_entry_values for cov_bucket in coverage_buckets(): cov_category = \ - "vars (excluding the debug entry values) " \ - "with {} of its scope covered".format(cov_bucket) + "#local vars - entry values with {} of parent scope " \ + "covered by DW_AT_location".format(cov_bucket) variables_coverage_map[cov_bucket] = json_parsed[cov_category] elif opts.only_formal_parameters: # Read the JSON only for formal parameters. variables_total_locstats = \ - json_parsed['total params procesed by location statistics'] + json_parsed['#params processed by location statistics'] variables_scope_bytes_covered = \ - json_parsed['formal params scope bytes covered'] + json_parsed['sum_all_params(#bytes in parent scope covered ' \ + 'by DW_AT_location)'] variables_scope_bytes = \ - json_parsed['formal params scope bytes total'] + json_parsed['sum_all_params(#bytes in parent scope)'] if not opts.ignore_debug_entry_values: for cov_bucket in coverage_buckets(): - cov_category = "params with {} of its scope covered".format(cov_bucket) + cov_category = "#params with {} of parent scope covered " \ + "by DW_AT_location".format(cov_bucket) variables_coverage_map[cov_bucket] = json_parsed[cov_category] else: variables_scope_bytes_entry_values = \ - json_parsed['formal params entry value scope bytes covered'] + json_parsed['sum_all_params(#bytes in parent scope covered ' \ + 'by DW_OP_entry_value)'] variables_scope_bytes_covered = variables_scope_bytes_covered \ - variables_scope_bytes_entry_values for cov_bucket in coverage_buckets(): cov_category = \ - "params (excluding the debug entry values) " \ - "with {} of its scope covered".format(cov_bucket) + "#params - entry values with {} of parent scope covered" \ + " by DW_AT_location".format(cov_bucket) variables_coverage_map[cov_bucket] = json_parsed[cov_category] else: # Read the JSON for both local variables and formal parameters. variables_total = \ - json_parsed['source variables'] - variables_with_loc = json_parsed['variables with location'] + json_parsed['#source variables'] + variables_with_loc = json_parsed['#source variables with location'] variables_total_locstats = \ - json_parsed['total variables procesed by location statistics'] + json_parsed['#variables processed by location statistics'] variables_scope_bytes_covered = \ - json_parsed['scope bytes covered'] + json_parsed['sum_all_variables(#bytes in parent scope covered ' \ + 'by DW_AT_location)'] variables_scope_bytes = \ - json_parsed['scope bytes total'] + json_parsed['sum_all_variables(#bytes in parent scope)'] if not opts.ignore_debug_entry_values: for cov_bucket in coverage_buckets(): - cov_category = "variables with {} of its scope covered". \ - format(cov_bucket) + cov_category = "#variables with {} of parent scope covered " \ + "by DW_AT_location".format(cov_bucket) variables_coverage_map[cov_bucket] = json_parsed[cov_category] else: variables_scope_bytes_entry_values = \ - json_parsed['entry value scope bytes covered'] + json_parsed['sum_all_variables(#bytes in parent scope covered ' \ + 'by DW_OP_entry_value)'] variables_scope_bytes_covered = variables_scope_bytes_covered \ - variables_scope_bytes_entry_values for cov_bucket in coverage_buckets(): - cov_category = "variables (excluding the debug entry values) " \ - "with {} of its scope covered". format(cov_bucket) + cov_category = \ + "#variables - entry values with {} of parent scope covered " \ + "by DW_AT_location".format(cov_bucket) variables_coverage_map[cov_bucket] = json_parsed[cov_category] return LocationStats(binary, variables_total, variables_total_locstats, diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt index 5c36eecd35b66..3671a97395d4e 100644 --- a/mlir/CMakeLists.txt +++ b/mlir/CMakeLists.txt @@ -12,8 +12,13 @@ include(AddMLIR) # Installing the headers and docs needs to depend on generating any public # tablegen'd targets. +# mlir-generic-headers are dialect-independent. +add_custom_target(mlir-generic-headers) +set_target_properties(mlir-generic-headers PROPERTIES FOLDER "Misc") +# mlir-headers may be dialect-dependent. add_custom_target(mlir-headers) set_target_properties(mlir-headers PROPERTIES FOLDER "Misc") +add_dependencies(mlir-headers mlir-generic-headers) add_custom_target(mlir-doc) # Build the CUDA conversions and run according tests if the NVPTX backend diff --git a/mlir/cmake/modules/AddMLIR.cmake b/mlir/cmake/modules/AddMLIR.cmake index f1de17c682d28..6c1ec381a675b 100644 --- a/mlir/cmake/modules/AddMLIR.cmake +++ b/mlir/cmake/modules/AddMLIR.cmake @@ -29,26 +29,160 @@ function(add_mlir_doc doc_filename command output_file output_directory) add_dependencies(mlir-doc ${output_file}DocGen) endfunction() -# Declare a library which can be compiled in libMLIR.so -macro(add_mlir_library name) - set_property(GLOBAL APPEND PROPERTY MLIR_ALL_LIBS ${name}) - add_llvm_library(${ARGV}) -endmacro(add_mlir_library) +# Declare an mlir library which can be compiled in libMLIR.so +# In addition to everything that llvm_add_librar accepts, this +# also has the following option: +# EXCLUDE_FROM_LIBMLIR +# Don't include this library in libMLIR.so. This option should be used +# for test libraries, executable-specific libraries, or rarely used libraries +# with large dependencies. +function(add_mlir_library name) + cmake_parse_arguments(ARG + "SHARED;INSTALL_WITH_TOOLCHAIN;EXCLUDE_FROM_LIBMLIR" + "" + "ADDITIONAL_HEADERS;DEPENDS;LINK_COMPONENTS;LINK_LIBS" + ${ARGN}) + set(srcs) + if(MSVC_IDE OR XCODE) + # Add public headers + file(RELATIVE_PATH lib_path + ${MLIR_SOURCE_DIR}/lib/ + ${CMAKE_CURRENT_SOURCE_DIR} + ) + if(NOT lib_path MATCHES "^[.][.]") + file( GLOB_RECURSE headers + ${MLIR_SOURCE_DIR}/include/mlir/${lib_path}/*.h + ${MLIR_SOURCE_DIR}/include/mlir/${lib_path}/*.def + ) + set_source_files_properties(${headers} PROPERTIES HEADER_FILE_ONLY ON) + + file( GLOB_RECURSE tds + ${MLIR_SOURCE_DIR}/include/mlir/${lib_path}/*.td + ) + source_group("TableGen descriptions" FILES ${tds}) + set_source_files_properties(${tds}} PROPERTIES HEADER_FILE_ONLY ON) + + if(headers OR tds) + set(srcs ${headers} ${tds}) + endif() + endif() + endif(MSVC_IDE OR XCODE) + if(srcs OR ARG_ADDITIONAL_HEADERS) + set(srcs + ADDITIONAL_HEADERS + ${srcs} + ${ARG_ADDITIONAL_HEADERS} # It may contain unparsed unknown args. + ) + endif() + if(ARG_SHARED) + set(LIBTYPE SHARED) + else() + # llvm_add_library ignores BUILD_SHARED_LIBS if STATIC is explicitly set, + # so we need to handle it here. + if(BUILD_SHARED_LIBS) + set(LIBTYPE SHARED) + else() + set(LIBTYPE STATIC) + endif() + if(NOT XCODE) + # The Xcode generator doesn't handle object libraries correctly. + list(APPEND LIBTYPE OBJECT) + endif() + # Test libraries and such shouldn't be include in libMLIR.so + if(NOT ARG_EXCLUDE_FROM_LIBMLIR) + set_property(GLOBAL APPEND PROPERTY MLIR_STATIC_LIBS ${name}) + set_property(GLOBAL APPEND PROPERTY MLIR_LLVM_LINK_COMPONENTS ${ARG_LINK_COMPONENTS}) + set_property(GLOBAL APPEND PROPERTY MLIR_LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS}) + endif() + endif() + + # MLIR libraries uniformly depend on LLVMSupport. Just specify it once here. + list(APPEND ARG_LINK_COMPONENTS Support) + list(APPEND ARG_DEPENDS mlir-generic-headers) + llvm_add_library(${name} ${LIBTYPE} ${ARG_UNPARSED_ARGUMENTS} ${srcs} DEPENDS ${ARG_DEPENDS} LINK_COMPONENTS ${ARG_LINK_COMPONENTS} LINK_LIBS ${ARG_LINK_LIBS}) + + if(TARGET ${name}) + target_link_libraries(${name} INTERFACE ${LLVM_COMMON_LIBS}) + + if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + set(export_to_mlirtargets) + if (${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR + "mlir-libraries" IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR + NOT LLVM_DISTRIBUTION_COMPONENTS) + set(export_to_mlirtargets EXPORT MLIRTargets) + set_property(GLOBAL PROPERTY MLIR_HAS_EXPORTS True) + endif() + + install(TARGETS ${name} + COMPONENT ${name} + ${export_to_mlirtargets} + LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} + ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} + RUNTIME DESTINATION bin) + + if (NOT LLVM_ENABLE_IDE) + add_llvm_install_targets(install-${name} + DEPENDS ${name} + COMPONENT ${name}) + endif() + set_property(GLOBAL APPEND PROPERTY MLIR_ALL_LIBS ${name}) + endif() + set_property(GLOBAL APPEND PROPERTY MLIR_EXPORTS ${name}) + else() + # Add empty "phony" target + add_custom_target(${name}) + endif() + set_target_properties(${name} PROPERTIES FOLDER "MLIR libraries") +endfunction(add_mlir_library) # Declare the library associated with a dialect. function(add_mlir_dialect_library name) set_property(GLOBAL APPEND PROPERTY MLIR_DIALECT_LIBS ${name}) - add_mlir_library(${ARGV}) + add_mlir_library(${ARGV} DEPENDS mlir-headers) endfunction(add_mlir_dialect_library) # Declare the library associated with a conversion. function(add_mlir_conversion_library name) set_property(GLOBAL APPEND PROPERTY MLIR_CONVERSION_LIBS ${name}) - add_mlir_library(${ARGV}) + add_mlir_library(${ARGV} DEPENDS mlir-headers) endfunction(add_mlir_conversion_library) # Declare the library associated with a translation. function(add_mlir_translation_library name) set_property(GLOBAL APPEND PROPERTY MLIR_TRANSLATION_LIBS ${name}) - add_mlir_library(${ARGV}) + add_mlir_library(${ARGV} DEPENDS mlir-headers) endfunction(add_mlir_translation_library) + +# Verification tools to aid debugging. +function(mlir_check_link_libraries name) + if(TARGET ${name}) + get_target_property(libs ${name} LINK_LIBRARIES) + # message("${name} libs are: ${libs}") + set(linking_llvm 0) + foreach(lib ${libs}) + if(lib) + if(${lib} MATCHES "^LLVM$") + set(linking_llvm 1) + endif() + if((${lib} MATCHES "^LLVM.+") AND ${linking_llvm}) + # This will almost always cause execution problems, since the + # same symbol might be loaded from 2 separate libraries. This + # often comes from referring to an LLVM library target + # explicitly in target_link_libraries() + message("WARNING: ${l} links LLVM and ${lib}!") + endif() + endif() + endforeach() + endif() +endfunction(mlir_check_link_libraries) + +function(mlir_check_all_link_libraries name) + mlir_check_link_libraries(${name}) + if(TARGET ${name}) + get_target_property(libs ${name} LINK_LIBRARIES) + # message("${name} libs are: ${libs}") + foreach(lib ${libs}) + mlir_check_link_libraries(${lib}) + endforeach() + endif() +endfunction(mlir_check_all_link_libraries) diff --git a/mlir/cmake/modules/MLIRConfig.cmake.in b/mlir/cmake/modules/MLIRConfig.cmake.in index da518762919cd..de38f94add75e 100644 --- a/mlir/cmake/modules/MLIRConfig.cmake.in +++ b/mlir/cmake/modules/MLIRConfig.cmake.in @@ -19,9 +19,7 @@ set_property(GLOBAL PROPERTY MLIR_DIALECT_LIBS "@MLIR_DIALECT_LIBS@") set_property(GLOBAL PROPERTY MLIR_CONVERSION_LIBS "@MLIR_CONVERSION_LIBS@") # Provide all our library targets to users. -if(EXISTS @MLIR_CONFIG_EXPORTS_FILE@) - include("@MLIR_CONFIG_EXPORTS_FILE@") -endif() +include("@MLIR_CONFIG_EXPORTS_FILE@") # By creating these targets here, subprojects that depend on MLIR's # tablegen-generated headers can always depend on these targets whether building @@ -32,6 +30,9 @@ endif() if(NOT TARGET mlir-headers) add_custom_target(mlir-headers) endif() +if(NOT TARGET mlir-generic-headers) + add_custom_target(mlir-generic-headers) +endif() if(NOT TARGET mlir-doc) add_custom_target(mlir-doc) endif() diff --git a/mlir/docs/DeclarativeRewrites.md b/mlir/docs/DeclarativeRewrites.md index 83eb3544da63a..4781f9f2648e8 100644 --- a/mlir/docs/DeclarativeRewrites.md +++ b/mlir/docs/DeclarativeRewrites.md @@ -153,7 +153,7 @@ bound symbol, for example, `def : Pat<(AOp $a, F32Attr), ...>`. #### Matching DAG of operations -To match an DAG of ops, use nested `dag` objects: +To match a DAG of ops, use nested `dag` objects: ```tablegen @@ -265,7 +265,7 @@ For example, for the above `AOp`, a possible builder is: ```c++ -void AOp::build(Builder *builder, OperationState &state, +void AOp::build(OpBuilder &builder, OperationState &state, Value input, Attribute attr) { state.addOperands({input}); state.addAttribute("a_attr", attr); @@ -530,7 +530,7 @@ the `TwoResultOp`'s two results, respectively. The above example also shows how to replace a matched multi-result op. -To replace a `N`-result op, the result patterns must generate at least `N` +To replace an `N`-result op, the result patterns must generate at least `N` declared values (see [Declared vs. actual value](#declared-vs-actual-value) for definition). If there are more than `N` declared values generated, only the last `N` declared values will be used to replace the matched op. Note that @@ -668,12 +668,12 @@ directive to provide finer control. `location` is of the following syntax: -```tablgen +```tablegen (location $symbol0, $symbol1, ...) ``` where all `$symbol` should be bound previously in the pattern and one optional -string may be specified as an attribute. The following locations are creted: +string may be specified as an attribute. The following locations are created: * If only 1 symbol is specified then that symbol's location is used, * If multiple are specified then a fused location is created; diff --git a/mlir/docs/Dialects/Affine.md b/mlir/docs/Dialects/Affine.md index f2d9fdabab51b..7eab93ace42e5 100644 --- a/mlir/docs/Dialects/Affine.md +++ b/mlir/docs/Dialects/Affine.md @@ -60,20 +60,26 @@ Example: ### Restrictions on Dimensions and Symbols The affine dialect imposes certain restrictions on dimension and symbolic -identifiers to enable powerful analysis and transformation. A symbolic -identifier can be bound to an SSA value that is either an argument to the -function, a value defined at the top level of that function (outside of all -loops and if operations), the result of a -[`constant` operation](Standard.md#constant-operation), or the result of an -[`affine.apply` operation](#affineapply-operation) that recursively takes as -arguments any symbolic identifiers, or the result of a [`dim` -operation](Standard.md#dim-operation) on either a memref that is a function -argument or a memref where the corresponding dimension is either static or a -dynamic one in turn bound to a symbolic identifier. Dimensions may be bound not -only to anything that a symbol is bound to, but also to induction variables of -enclosing [`affine.for`](#affinefor-affineforop) and -[`afffine.parallel`](#affineparallel-affineparallelop) operations, and the -result of an +identifiers to enable powerful analysis and transformation. An SSA value's use +can be bound to a symbolic identifier if that SSA value is either +1. a region argument for an op with trait `PolyhedralScope` (eg. `FuncOp`), +2. a value defined at the top level of a `PolyhedralScope` op (i.e., immediately +enclosed by the latter), +3. a value that dominates the `PolyhedralScope` op enclosing the value's use, +4. the result of a [`constant` operation](Standard.md#constant-operation), +5. the result of an [`affine.apply` +operation](#affineapply-operation) that recursively takes as arguments any valid +symbolic identifiers, or +6. the result of a [`dim` operation](Standard.md#dim-operation) on either a +memref that is an argument to a `PolyhedralScope` op or a memref where the +corresponding dimension is either static or a dynamic one in turn bound to a +valid symbol. + +Note that as a result of rule (3) above, symbol validity is sensitive to the +location of the SSA use. Dimensions may be bound not only to anything that a +symbol is bound to, but also to induction variables of enclosing +[`affine.for`](#affinefor-operation) and +[`affine.parallel`](#affineparallel-operation) operations, and the result of an [`affine.apply` operation](#affineapply-operation) (which recursively may use other dimensions and symbols). diff --git a/mlir/docs/Dialects/Vector.md b/mlir/docs/Dialects/Vector.md index bc01d15d32a86..31a530a913b25 100644 --- a/mlir/docs/Dialects/Vector.md +++ b/mlir/docs/Dialects/Vector.md @@ -365,7 +365,7 @@ unsurprising because historically, the vast majority of HW only supports `1-D` vector registers. We note that multiple HW vendors are in the process of evolving to higher-dimensional physical vectors. -In the following discussion, let's assume the HW vector size is `1-D and the +In the following discussion, let's assume the HW vector size is `1-D` and the SW vector size is `n-D`, with `n >= 1`. The same discussion would apply with `2-D` HW `vector` size and `n >= 2`. In this context, most HW exhibit a vector register file. The number of such vectors is fixed. diff --git a/mlir/docs/Interfaces.md b/mlir/docs/Interfaces.md index c5800ca2171a4..d1050e76de5bc 100644 --- a/mlir/docs/Interfaces.md +++ b/mlir/docs/Interfaces.md @@ -1,12 +1,14 @@ # Interfaces -MLIR is generic and very extensible; it allows for opaquely representing many -different dialects that have their own operations, attributes, types, and so on. -This allows for dialects to be very expressive in their semantics and for MLIR -to capture many different levels of abstraction. The downside to this is that -transformations and analyses must be extremely conservative about the operations -that they encounter, and must special-case the different dialects that they -support. To combat this, MLIR provides the concept of `interfaces`. +MLIR is a generic and extensible framework, representing different +dialects with their own operations, attributes, types, and so on. +MLIR Dialects can express operations with a wide variety of semantics +and different levels of abstraction. The downside to this is that MLIR +transformations and analyses need to account for the semantics of +every operation, or handle operations conservatively. Without care, +this can result in code with special-cases for each supported +operation type. To combat this, MLIR provides the concept of +`interfaces`. ## Motivation @@ -19,8 +21,9 @@ transformations/analyses. ### Dialect Interfaces -Dialect interfaces are generally useful for transformation passes or analyses -that want to opaquely operate on operations, even *across* dialects. These +Dialect interfaces are generally useful for transformation passes or +analyses that want to operate generically on a set of operations, +which might even be defined in different dialects. These interfaces generally involve wide coverage over the entire dialect and are only used for a handful of transformations/analyses. In these cases, registering the interface directly on each operation is overly complex and cumbersome. The @@ -68,8 +71,9 @@ AffineDialect::AffineDialect(MLIRContext *context) ... { } ``` -Once registered, these interfaces can be opaquely queried from the dialect by -the transformation/analysis that wants to use them: +Once registered, these interfaces can be queried from the dialect by +the transformation/analysis that wants to use them, without +determining the particular dialect subclass: ```c++ Dialect *dialect = ...; @@ -105,7 +109,7 @@ if(!interface.isLegalToInline(...)) ### Operation Interfaces Operation interfaces, as the name suggests, are those registered at the -Operation level. These interfaces provide an opaque view into derived operations +Operation level. These interfaces provide access to derived operations by providing a virtual interface that must be implemented. As an example, the `Linalg` dialect may implement an interface that provides general queries about some of the dialects library operations. These queries may provide things like: diff --git a/mlir/docs/LangRef.md b/mlir/docs/LangRef.md index c001b0f3cf220..ff3af3e4e496f 100644 --- a/mlir/docs/LangRef.md +++ b/mlir/docs/LangRef.md @@ -307,11 +307,11 @@ Example: module ::= `module` symbol-ref-id? (`attributes` attribute-dict)? region ``` -An MLIR module represents an opaque top-level container operation. It contains a -single region containing a single block that is comprised of any operations. -Operations within this region must not implicitly capture values defined above -it. Modules have an optional symbol name that can be used to refer to them in -operations. +An MLIR Module represents a top-level container operation. It contains +a single region containing a single block which can contain any +operations. Operations within this region must not implicitly capture +values defined outside the module. Modules have an optional symbol +name that can be used to refer to them in operations. ### Functions @@ -509,16 +509,16 @@ where the control flow is transmitted next. It may, for example, enter a region of the same op, including the same region that returned the control flow. The enclosing operation determines the way in which control is transmitted into -the entry block of a Region. The successor to a region’s exit points may not +the entry block of a Region. The successor to a Region’s exit points may not necessarily exist: for example a call to a function that does not return. -Concurrent or asynchronous execution of regions is unspecified. Operations may +Concurrent or asynchronous execution of Regions is unspecified. Operations may define specific rules of execution, e.g. sequential loops or switch cases. A Region may also enter another region within the enclosing operation. If an operation has multiple regions, the semantics of the operation defines into which regions the control flows and in which order, if any. An operation may -transmit control into regions that were specified in other operations, in -particular those that defined the values the given operation uses. Thus such +transmit control into Regions that were specified in other operations, in +particular those that defined the values the given operation uses. Thus, such operations can be treated opaquely in the enclosing control flow graph, providing a level of control flow isolation similar to that of the call operation. @@ -1334,9 +1334,10 @@ dense-elements-attribute ::= `dense` `<` attribute-value `>` `:` ``` A dense elements attribute is an elements attribute where the storage for the -constant vector or tensor value has been packed to the element bitwidth. The -element type of the vector or tensor constant must be of integer, index, or -floating point type. +constant vector or tensor value has been densely packed. The attribute supports +storing integer or floating point elements, with integer/index/floating element +types. It also support storing string elements with a custom dialect string +element type. ##### Opaque Elements Attribute @@ -1465,10 +1466,11 @@ This attribute can only be held internally by attribute dictionary), i.e. no other attribute kinds such as Locations or extended attribute kinds. -**Rationale:** Given that MLIR models global accesses with symbol references, to -enable efficient multi-threading, it becomes difficult to effectively reason -about their uses. By restricting the places that can legally hold a symbol -reference, we can always opaquely reason about a symbols usage characteristics. +**Rationale:** Identifying accesses to global data is critical to +enabling efficient multi-threaded compilation. Restricting global +data access to occur through symbols and limiting the places that can +legally hold a symbol reference simplifies reasoning about these data +accesses. See [`Symbols And SymbolTables`](SymbolsAndSymbolTables.md) for more information. diff --git a/mlir/docs/OpDefinitions.md b/mlir/docs/OpDefinitions.md index 9a9e18d79633c..ddabae2225e7b 100644 --- a/mlir/docs/OpDefinitions.md +++ b/mlir/docs/OpDefinitions.md @@ -146,7 +146,7 @@ template parameter to the `Op` class. ### Operation documentation -This includes both an one-line `summary` and a longer human-readable +This includes both a one-line `summary` and a longer human-readable `description`. They will be used to drive automatic generation of dialect documentation. They need to be provided in the operation's definition body: @@ -348,13 +348,14 @@ class. See [Constraints](#constraints) for more information. ### Operation interfaces -[Operation interfaces](Interfaces.md#operation-interfaces) are a mechanism by -which to opaquely call methods and access information on an *Op instance*, -without knowing the exact operation type. Operation interfaces defined in C++ -can be accessed in the ODS framework via the `OpInterfaceTrait` class. Aside -from using pre-existing interfaces in the C++ API, the ODS framework also -provides a simplified mechanism for defining such interfaces; that removes much -of the boilerplate necessary. +[Operation interfaces](Interfaces.md#operation-interfaces) allow +operations to expose method calls without the +caller needing to know the exact operation type. Operation interfaces +defined in C++ can be accessed in the ODS framework via the +`OpInterfaceTrait` class. Aside from using pre-existing interfaces in +the C++ API, the ODS framework also provides a simplified mechanism +for defining such interfaces which removes much of the boilerplate +necessary. Providing a definition of the `OpInterface` class will auto-generate the C++ classes for the interface. An `OpInterface` includes a name, for the C++ class, @@ -442,7 +443,7 @@ def MyInterface : OpInterface<"MyInterface"> { // Provide only a default definition of the method. // Note: `ConcreteOp` corresponds to the derived operation typename. InterfaceMethod<"/*insert doc here*/", - "unsigned", "getNumInputsAndOutputs", (ins), /*methodBody=*/[{}], [{ + "unsigned", "getNumWithDefault", (ins), /*methodBody=*/[{}], [{ ConcreteOp op = cast(getOperation()); return op.getNumInputs() + op.getNumOutputs(); }]>, @@ -455,6 +456,13 @@ def MyInterface : OpInterface<"MyInterface"> { // declaration but instead handled by the op interface trait directly. def OpWithInferTypeInterfaceOp : Op<... [DeclareOpInterfaceMethods]> { ... } + +// Methods that have a default implementation do not have declarations +// generated. If an operation wishes to override the default behavior, it can +// explicitly specify the method that it wishes to override. This will force +// the generation of a declaration for those methods. +def OpWithOverrideInferTypeInterfaceOp : Op<... + [DeclareOpInterfaceMethods]> { ... } ``` A verification method can also be specified on the `OpInterface` by setting @@ -490,14 +498,14 @@ The following builders are generated: ```c++ // All result-types/operands/attributes have one aggregate parameter. -static void build(Builder *odsBuilder, OperationState &odsState, +static void build(OpBuilder &odsBuilder, OperationState &odsState, ArrayRef resultTypes, ValueRange operands, ArrayRef attributes); // Each result-type/operand/attribute has a separate parameter. The parameters // for attributes are of mlir::Attribute types. -static void build(Builder *odsBuilder, OperationState &odsState, +static void build(OpBuilder &odsBuilder, OperationState &odsState, Type i32_result, Type f32_result, ..., Value i32_operand, Value f32_operand, ..., IntegerAttr i32_attr, FloatAttr f32_attr, ...); @@ -506,20 +514,20 @@ static void build(Builder *odsBuilder, OperationState &odsState, // for attributes are raw values unwrapped with mlir::Attribute instances. // (Note that this builder will not always be generated. See the following // explanation for more details.) -static void build(Builder *odsBuilder, OperationState &odsState, +static void build(OpBuilder &odsBuilder, OperationState &odsState, Type i32_result, Type f32_result, ..., Value i32_operand, Value f32_operand, ..., APInt i32_attr, StringRef f32_attr, ...); // Each operand/attribute has a separate parameter but result type is aggregate. -static void build(Builder *odsBuilder, OperationState &odsState, +static void build(OpBuilder &odsBuilder, OperationState &odsState, ArrayRef resultTypes, Value i32_operand, Value f32_operand, ..., IntegerAttr i32_attr, FloatAttr f32_attr, ...); // All operands/attributes have aggregate parameters. // Generated if InferTypeOpInterface interface is specified. -static void build(Builder *odsBuilder, OperationState &odsState, +static void build(OpBuilder &odsBuilder, OperationState &odsState, ValueRange operands, ArrayRef attributes); @@ -581,8 +589,8 @@ def MyOp : ... { ... let builders = [ - OpBuilder<"Builder *builder, OperationState &state, float val = 0.5f", [{ - state.addAttribute("attr", builder->getF32FloatAttr(val)); + OpBuilder<"OpBuilder &builder, OperationState &state, float val = 0.5f", [{ + state.addAttribute("attr", builder.getF32FloatAttr(val)); }]> ]; } @@ -591,8 +599,8 @@ def MyOp : ... { The generated builder will look like: ```c++ -static void build(Builder *builder, OperationState &state, float val = 0.5f) { - state.addAttribute("attr", builder->getF32FloatAttr(val)); +static void build(OpBuilder &builder, OperationState &state, float val = 0.5f) { + state.addAttribute("attr", builder.getF32FloatAttr(val)); } ``` @@ -863,7 +871,7 @@ significantly involve writing constraints. We have the `Constraint` class in An operation's constraint can cover different range; it may -* Only concern a single attribute (e.g. being an 32-bit integer greater than 5), +* Only concern a single attribute (e.g. being a 32-bit integer greater than 5), * Multiple operands and results (e.g., the 1st result's shape must be the same as the 1st operand), or * Intrinsic to the operation itself (e.g., having no side effect). @@ -1039,13 +1047,13 @@ optionality, default values, etc.: * `DefaultValuedAttr`: specifies the [default value](#attributes-with-default-values) for an attribute. -* `OptionalAttr`: specfies an attribute as [optional](#optional-attributes). +* `OptionalAttr`: specifies an attribute as [optional](#optional-attributes). * `Confined`: adapts an attribute with [further constraints](#confining-attributes). ### Enum attributes -Some attributes can only take values from an predefined enum, e.g., the +Some attributes can only take values from a predefined enum, e.g., the comparison kind of a comparison op. To define such attributes, ODS provides several mechanisms: `StrEnumAttr`, `IntEnumAttr`, and `BitEnumAttr`. diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md index ea3dac60c7338..04a4ca0a7b3c5 100644 --- a/mlir/docs/PassManagement.md +++ b/mlir/docs/PassManagement.md @@ -382,7 +382,7 @@ static PassPipelineRegistration<> pipeline( ``` Pipeline registration also allows for simplified registration of -specifializations for existing passes: +specializations for existing passes: ```c++ static PassPipelineRegistration<> foo10( @@ -801,7 +801,7 @@ pipeline. This display mode is available in mlir-opt via `-pass-timing-display=list`. ```shell -$ mlir-opt foo.mlir -disable-pass-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing -pass-timing-display=list +$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing -pass-timing-display=list ===-------------------------------------------------------------------------=== ... Pass execution timing report ... @@ -826,7 +826,7 @@ the most time, and can also be used to identify when analyses are being invalidated and recomputed. This is the default display mode. ```shell -$ mlir-opt foo.mlir -disable-pass-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing +$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing ===-------------------------------------------------------------------------=== ... Pass execution timing report ... @@ -943,10 +943,10 @@ func @simple_constant() -> (i32, i32) { * Always print the top-level module operation, regardless of pass type or operation nesting level. * Note: Printing at module scope should only be used when multi-threading - is disabled(`-disable-pass-threading`) + is disabled(`-mlir-disable-threading`) ```shell -$ mlir-opt foo.mlir -disable-pass-threading -pass-pipeline='func(cse)' -print-ir-after=cse -print-ir-module-scope +$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func(cse)' -print-ir-after=cse -print-ir-module-scope *** IR Dump After CSE *** ('func' operation: @bar) func @bar(%arg0: f32, %arg1: f32) -> f32 { @@ -992,3 +992,28 @@ module { } } ``` + +### Local Reproducer Generation + +An additional flag may be passed to +`PassManager::enableCrashReproducerGeneration`, and specified via +`pass-pipeline-local-reproducer` on the command line, that signals that the pass +manager should attempt to generate a "local" reproducer. This will attempt to +generate a reproducer containing IR right before the pass that fails. This is +useful for situations where the crash is known to be within a specific pass, or +when the original input relies on components (like dialects or passes) that may +not always be available. + +For example, if the failure in the previous example came from `canonicalize`, +the following reproducer will be generated: + +```mlir +// configuration: -pass-pipeline='func(canonicalize)' +// note: verifyPasses=false + +module { + func @foo() { + ... + } +} +``` diff --git a/mlir/docs/Passes.md b/mlir/docs/Passes.md index b1f1a2eb2e17d..0f48396b220ea 100644 --- a/mlir/docs/Passes.md +++ b/mlir/docs/Passes.md @@ -39,3 +39,7 @@ This document describes the available MLIR passes and their contracts. ## `spv` Dialect Passes [include "SPIRVPasses.md"] + +## `standard` Dialect Passes + +[include "StandardPasses.md"] diff --git a/mlir/docs/Quantization.md b/mlir/docs/Quantization.md index 3b45dce11a587..54eae406c87ea 100644 --- a/mlir/docs/Quantization.md +++ b/mlir/docs/Quantization.md @@ -232,7 +232,7 @@ which tensors can have fake_quant applied are somewhat involved), then TensorFlow Lite would use the attributes of the fake_quant operations to make a judgment about how to convert to use kernels from its quantized operations subset. -In MLIR-based quantization, fake_quant_\* operationss are handled by converting them to +In MLIR-based quantization, fake_quant_\* operations are handled by converting them to a sequence of *qcast* (quantize) followed by *dcast* (dequantize) with an appropriate *UniformQuantizedType* as the target of the qcast operation. @@ -242,7 +242,7 @@ flexibility to move the casts as it simplifies the computation and converts it to a form based on integral arithmetic. This scheme also naturally allows computations that are *partially quantized* -where the parts which could not be reduced to integral operationss are still carried out +where the parts which could not be reduced to integral operations are still carried out in floating point with appropriate conversions at the boundaries. ## TFLite native quantization diff --git a/mlir/docs/Rationale/Rationale.md b/mlir/docs/Rationale/Rationale.md index 7c08a71940d11..a3c3e5ecc4bfc 100644 --- a/mlir/docs/Rationale/Rationale.md +++ b/mlir/docs/Rationale/Rationale.md @@ -67,7 +67,7 @@ their layouts, and subscripted accesses to these tensors in memory. The information captured in the IR allows a compact expression of all loop transformations, data remappings, explicit copying necessary for explicitly -addressed memory in accelerators, mapping to pre-tuned expert written +addressed memory in accelerators, mapping to pre-tuned expert-written primitives, and mapping to specialized vector instructions. Loop transformations that can be easily implemented include the body of affine transformations: these subsume all traditional loop transformations (unimodular and non-unimodular) @@ -229,7 +229,7 @@ specifically abstracts the target-specific aspects that intersect with the code-generation-related/lowering-related concerns explained above. In fact, the `tensor` type even allows dialect-specific types as element types. -### Bit width of a non-primitive types and `index` is undefined +### Bit width of a non-primitive type and `index` is undefined The bit width of a compound type is not defined by MLIR, it may be defined by a specific lowering pass. In MLIR, bit width is a property of certain primitive @@ -259,7 +259,7 @@ abstraction, especially closer to source language, might want to differentiate signedness with integer types; while others, especially closer to machine instruction, might want signless integers. Instead of forcing each abstraction to adopt the same integer modelling or develop its own one in house, Integer -types provides this as an option to help code reuse and consistency. +type provides this as an option to help code reuse and consistency. For the standard dialect, the choice is to have signless integer types. An integer value does not have an intrinsic sign, and it's up to the specific op @@ -861,11 +861,12 @@ func @matmul(%A, %B, %C, %M, %N, %K) : (...) { // %M, N, K are symbols ### Affine Relations -The current MLIR spec includes affine maps and integer sets, but not affine -relations. Affine relations are a natural way to model read and write access -information, which can be very useful to capture the behavior of opaque external -library calls, high-performance vendor libraries, or user-provided / user-tuned -routines. +The current MLIR spec includes affine maps and integer sets, but not +affine relations. Affine relations are a natural way to model read and +write access information, which can be very useful to capture the +behavior of external library calls where no implementation is +available, high-performance vendor libraries, or user-provided / +user-tuned routines. An affine relation is a relation between input and output dimension identifiers while being symbolic on a list of symbolic identifiers and with affine diff --git a/mlir/docs/Rationale/RationaleLinalgDialect.md b/mlir/docs/Rationale/RationaleLinalgDialect.md index 8ca25e5a23475..3aaf17efc2d71 100644 --- a/mlir/docs/Rationale/RationaleLinalgDialect.md +++ b/mlir/docs/Rationale/RationaleLinalgDialect.md @@ -45,7 +45,7 @@ However, as the design of Linalg co-evolved with the design of MLIR, it became apparent that it could extend to larger application domains than just machine learning on dense tensors. -The design and evolution of Linalg follows a *codegen-friendly* approach where +The design and evolution of Linalg follow a *codegen-friendly* approach where the IR and the transformations evolve hand-in-hand. The key idea is that op semantics *declare* and transport information that is traditionally obtained by compiler analyses. @@ -77,7 +77,7 @@ https://drive.google.com/drive/u/0/folders/1sRAsgsd8Bvpm_IxREmZf2agsGU2KvrK-), with Linalg becoming its incarnation on tensors and buffers. It is complemented by the [Vector dialect](https://mlir.llvm.org/docs/Dialects/Vector/), -which define structured operations on vectors, following the same rationale and +which defines structured operations on vectors, following the same rationale and design principles as Linalg. (Vector dialect includes the higher-level operations on multi-dimensional vectors and abstracts away the lowering to single-dimensional vectors). @@ -191,7 +191,7 @@ Linalg builds on, and helps separate concerns in the LIFT approach as follows: structure abstractions) potentially reusable across different dialects in the MLIR's open ecosystem. -LIFT is expected to further influence the design of Linalg as it evolve. In +LIFT is expected to further influence the design of Linalg as it evolves. In particular, extending the data structure abstractions to support non-dense tensors can use the experience of LIFT abstractions for [sparse](https://www.lift-project.org/publications/2016/harries16sparse.pdf) @@ -255,9 +255,9 @@ Linalg hopes to additionally address the following: transformations. But it's still too hard for newcomers to use or extend. The level of performance you get from Halide is very different depending on whether one is a seasoned veteran or a newcomer. This is especially true as -the number of transformations grow. +the number of transformations grows. - Halide raises rather than lowers in two ways, going counter-current to the -design goals we set for high-level codegen abstractions in in MLIR. First, +design goals we set for high-level codegen abstractions in MLIR. First, canonical Halide front-end code uses explicit indexing and math on scalar values, so to target BLAS/DNN libraries one needs to add pattern matching which is similarly brittle as in the affine case. While Halide's performance @@ -425,7 +425,7 @@ The problem at hand is fundamentally driven by compilation of domain-specific workloads for high-performance and parallel hardware architectures: **this is an HPC compilation problem**. -The selection of relevant transformations follows a codesign approach and +The selection of relevant transformations follows a co-design approach and involves considerations related to: - concrete current and future needs of the application domain, - concrete current and future hardware properties and ISAs, @@ -462,7 +462,7 @@ levels of abstraction led to the following 2 principles. #### Declarative Specification: Avoid Raising Compiler transformations need static structural information (e.g. loop-nests, -graphs of basic blocks, pure functions etc). When that structural information +graphs of basic blocks, pure functions, etc). When that structural information is lost, it needs to be reconstructed. A good illustration of this phenomenon is the notion of *raising* in polyhedral @@ -518,7 +518,7 @@ declaratively. In turn this allows using local pattern rewrite rules in MLIR - Allow creating customizable passes declaratively by simply selecting rewrite rules. This allows mixing transformations, canonicalizations, constant folding and other enabling rewrites in a single pass. The result is a system where pass -fusion is very simple to obtain and gives hope to solving certain +fusion is very simple to obtain and gives hope for solving certain [phase ordering issues](https://dl.acm.org/doi/10.1145/201059.201061). ### Suitability for Search and Machine Learning @@ -551,7 +551,7 @@ ragged, sparse and mixed dens/sparse tensors as well as to trees, hash tables, tables of records and maybe even graphs. For such more advanced data types, the control-flow required to traverse the -data structures, termination conditions etc are much less simple to analyze and +data structures, termination conditions, etc are much less simple to analyze and characterize statically. As a consequence we need to also design solutions that stand a chance of evolving into runtime-adaptive computations (e.g. inspector-executor in which an *inspector* runs a cheap runtime @@ -582,7 +582,7 @@ occurred, ### The Dialect Need not be Closed Under Transformations This is probably the most surprising and counter-intuitive observation. When one designs IR for transformations, closed-ness is -often a nonnegotiable property. +often a non-negotiable property. This is a key design principle of polyhedral IRs such as [URUK](http://icps.u-strasbg.fr/~bastoul/research/papers/GVBCPST06-IJPP.pdf) and diff --git a/mlir/docs/ShapeInference.md b/mlir/docs/ShapeInference.md index 0a6448355fec0..76a97791d8153 100644 --- a/mlir/docs/ShapeInference.md +++ b/mlir/docs/ShapeInference.md @@ -117,7 +117,7 @@ impose a particular shape inference approach here. is, these two type systems differ and both should be supported, but the intersection of the two should not be required. As a particular example, if a compiler only wants to differentiate exact shapes vs dynamic - shapes, then it need not consider a more generic shape latice even + shapes, then it need not consider a more generic shape lattice even though the shape description supports it. * Declarative (e.g., analyzable at compile time, possible to generate diff --git a/mlir/docs/Traits.md b/mlir/docs/Traits.md index 5931fd3f96987..f55d0a8250f6f 100644 --- a/mlir/docs/Traits.md +++ b/mlir/docs/Traits.md @@ -4,7 +4,7 @@ MLIR allows for a truly open operation ecosystem, as any dialect may define operations that suit a specific level of abstraction. `Traits` are a mechanism -in which to abstract implementation details and properties that are common +which abstracts implementation details and properties that are common across many different operations. `Traits` may be used to specify special properties and constraints of the operation, including whether the operation has side effects or whether its output has the same type as the input. Some examples @@ -219,6 +219,22 @@ foo.region_op { This trait is an important structural property of the IR, and enables operations to have [passes](PassManagement.md) scheduled under them. + +### PolyhedralScope + +* `OpTrait::PolyhedralScope` -- `PolyhedralScope` + +This trait is carried by region holding operations that define a new scope for +the purposes of polyhedral optimization and the affine dialect in particular. +Any SSA values of 'index' type that either dominate such operations, or are +defined at the top-level of such operations, or appear as region arguments for +such operations automatically become valid symbols for the polyhedral scope +defined by that operation. As a result, such SSA values could be used as the +operands or index operands of various affine dialect operations like affine.for, +affine.load, and affine.store. The polyhedral scope defined by an operation +with this trait includes all operations in its region excluding operations that +are nested inside of other operations that themselves have this trait. + ### Single Block with Implicit Terminator * `OpTrait::SingleBlockImplicitTerminator` : diff --git a/mlir/docs/Tutorials/CreatingADialect.md b/mlir/docs/Tutorials/CreatingADialect.md index a1ecb6770aa96..9f9eb7a8317b0 100644 --- a/mlir/docs/Tutorials/CreatingADialect.md +++ b/mlir/docs/Tutorials/CreatingADialect.md @@ -72,12 +72,14 @@ add_mlir_dialect_library(FooOps DEPENDS MLIRFooOpsIncGen MLIRFooTransformsIncGen - ) -target_link_libraries(FooOps - PUBLIC + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC BarOps - ) + ) ``` @@ -99,8 +101,15 @@ corresponding IncGen targets. The PUBLIC link dependency is sufficient. Also note that we avoid using add_dependencies explicitly, since the dependencies need to be available to the underlying add_llvm_library() call, allowing it to correctly create -new targets with the same sources. +new targets with the same sources. However, dialects that depend on +LLVM IR may need to depend on the LLVM 'intrinsics_gen' target to +ensure that tablegen'd LLVM header files have been generated. +In addition, linkage to MLIR libraries is specified using the +LINK_LIBS descriptor and linkage to LLVM libraries is specified using +the LINK_COMPONENTS descriptor. This allows cmake infrastructure to +generate new library targets with correct linkage, in particular, when +BUILD_SHARED_LIBS=on or LLVM_LINK_LLVM_DYLIB=on are specified. # Dialect Conversions @@ -134,11 +143,10 @@ using target_link_libraries() and the PUBLIC keyword. For instance: add_mlir_conversion_library(MLIRBarToFoo BarToFoo.cpp - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/BarToFoo - ) -target_link_libraries(MLIRBarToFoo - PUBLIC + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/BarToFoo + + LINK_LIBS PUBLIC BarOps FooOps ) @@ -157,3 +165,10 @@ MLIR_CONVERSION_LIBS global property: get_property(dialect_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) ``` + +Note that it is only necessary to specify a PUBLIC dependence against +dialects to generate compile-time and link-time dependencies, and it +is not necessary to explicitly depend on the dialects' IncGen targets. +However, conversions that directly include LLVM IR header files may +need to depend on the LLVM 'intrinsics_gen' target to ensure that +tablegen'd LLVM header files have been generated. diff --git a/mlir/docs/Tutorials/Toy/Ch-2.md b/mlir/docs/Tutorials/Toy/Ch-2.md index e57bd7da4b227..4265b06c4e8b0 100755 --- a/mlir/docs/Tutorials/Toy/Ch-2.md +++ b/mlir/docs/Tutorials/Toy/Ch-2.md @@ -83,10 +83,10 @@ Let's break down the anatomy of this MLIR operation: * This is the location in the source code from which this operation originated. -Shown here is the general form of an operation. As described above, the set of -operations in MLIR is extensible. This means that the infrastructure must be -able to opaquely reason about the structure of an operation. This is done by -boiling down the composition of an operation into discrete pieces: +Shown here is the general form of an operation. As described above, +the set of operations in MLIR is extensible. Operations are modeled +using a small set of concepts, enabling operations to be reasoned +about and manipulated generically. These concepts are: - A name for the operation. - A list of SSA operand values. @@ -115,12 +115,14 @@ compiler passes - does not include locations in the output by default. The ### Opaque API -MLIR is designed to be a completely extensible system, and as such, the -infrastructure has the capability to opaquely represent all of its core -components: attributes, operations, types, etc. This allows MLIR to parse, -represent, and [round-trip](../../../getting_started/Glossary.md#round-trip) any valid IR. For -example, we could place our Toy operation from above into an `.mlir` file and -round-trip through *mlir-opt* without registering any dialect: +MLIR is designed to allow most IR elements, such as attributes, +operations, and types, to be customized. At the same time, IR +elements can always be reduced to the above fundmental concepts. This +allows MLIR to parse, represent, and +[round-trip](../../../getting_started/Glossary.md#round-trip) IR for +*any* operation. For example, we could place our Toy operation from +above into an `.mlir` file and round-trip through *mlir-opt* without +registering any dialect: ```mlir func @toy_func(%tensor: tensor<2x3xf64>) -> tensor<3x2xf64> { @@ -129,12 +131,16 @@ func @toy_func(%tensor: tensor<2x3xf64>) -> tensor<3x2xf64> { } ``` -In the cases of unregistered attributes, operations, and types, MLIR will -enforce some structural constraints (SSA, block termination, etc.), but -otherwise they are completely opaque. This can be useful for bootstrapping -purposes, but it is generally advised against. Opaque operations must be treated -conservatively by transformations and analyses, and they are much harder to -construct and manipulate. +In the cases of unregistered attributes, operations, and types, MLIR +will enforce some structural constraints (SSA, block termination, +etc.), but otherwise they are completely opaque. For instance, MLIR +has little information about whether an unregisted operation can +operate on particular datatypes, how many operands it can take, or how +many results it produces. This flexibility can be useful for +bootstrapping purposes, but it is generally advised against in mature +systems. Unregistered operations must be treated conservatively by +transformations and analyses, and they are much harder to construct +and manipulate. This handling can be observed by crafting what should be an invalid IR for Toy and seeing it round-trip without tripping the verifier: @@ -155,7 +161,7 @@ verifier, and add nicer APIs to manipulate our operations. ## Defining a Toy Dialect To effectively interface with MLIR, we will define a new Toy dialect. This -dialect will properly model the semantics of the Toy language, as well as +dialect will model the structure of the Toy language, as well as provide an easy avenue for high-level analysis and transformation. ```c++ @@ -232,13 +238,13 @@ class ConstantOp : public mlir::Op { // using `builder.create(...)`. let builders = [ // Build a constant with a given constant tensor value. - OpBuilder<"Builder *builder, OperationState &result, " + OpBuilder<"OpBuilder &builder, OperationState &result, " "DenseElementsAttr value", [{ // Call into an autogenerated `build` method. build(builder, result, value.getType(), value); @@ -484,7 +500,7 @@ def ConstantOp : Toy_Op<"constant"> { // Build a constant with a given constant floating-point value. This builder // creates a declaration for `ConstantOp::build` with the given parameters. - OpBuilder<"Builder *builder, OperationState &result, double value"> + OpBuilder<"OpBuilder &builder, OperationState &result, double value"> ]; } ``` diff --git a/mlir/docs/doxygen.cfg.in b/mlir/docs/doxygen.cfg.in index 7db0309ddb50c..307981eed5f21 100644 --- a/mlir/docs/doxygen.cfg.in +++ b/mlir/docs/doxygen.cfg.in @@ -46,7 +46,7 @@ PROJECT_NUMBER = @PACKAGE_VERSION@ PROJECT_BRIEF = -# With the PROJECT_LOGO tag one can specify an logo or icon that is included in +# With the PROJECT_LOGO tag one can specify a logo or icon that is included in # the documentation. The maximum height of the logo should not exceed 55 pixels # and the maximum width should not exceed 200 pixels. Doxygen will copy the logo # to the output directory. diff --git a/mlir/examples/standalone/CMakeLists.txt b/mlir/examples/standalone/CMakeLists.txt index d0514cbad7c1a..fe309b501963a 100644 --- a/mlir/examples/standalone/CMakeLists.txt +++ b/mlir/examples/standalone/CMakeLists.txt @@ -15,6 +15,8 @@ endif() project(standalone-dialect LANGUAGES CXX C) +set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard to conform to") + find_package(MLIR REQUIRED CONFIG) message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") diff --git a/mlir/examples/standalone/lib/Standalone/CMakeLists.txt b/mlir/examples/standalone/lib/Standalone/CMakeLists.txt index 1f58307e0014f..c8b16b7ae4aba 100644 --- a/mlir/examples/standalone/lib/Standalone/CMakeLists.txt +++ b/mlir/examples/standalone/lib/Standalone/CMakeLists.txt @@ -7,6 +7,7 @@ add_mlir_dialect_library(MLIRStandalone DEPENDS MLIRStandaloneOpsIncGen - ) -target_link_libraries(MLIRStandalone PUBLIC MLIRIR) + LINK_LIBS PUBLIC + MLIRIR + ) diff --git a/mlir/examples/toy/Ch2/include/toy/Ops.td b/mlir/examples/toy/Ch2/include/toy/Ops.td index e82302952e6f1..4e06634c6d4e5 100644 --- a/mlir/examples/toy/Ch2/include/toy/Ops.td +++ b/mlir/examples/toy/Ch2/include/toy/Ops.td @@ -68,13 +68,13 @@ def ConstantOp : Toy_Op<"constant", [NoSideEffect]> { // using `builder.create(...)`. let builders = [ // Build a constant with a given constant tensor value. - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "DenseElementsAttr value", [{ build(builder, state, value.getType(), value); }]>, // Build a constant with a given constant floating-point value. - OpBuilder<"Builder *builder, OperationState &state, double value"> + OpBuilder<"OpBuilder &builder, OperationState &state, double value"> ]; // Invoke a static verify method to verify this constant operation. @@ -97,7 +97,7 @@ def AddOp : Toy_Op<"add"> { // Allow building an AddOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -132,7 +132,7 @@ def GenericCallOp : Toy_Op<"generic_call"> { // Add custom build methods for the generic call operation. let builders = [ - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "StringRef callee, ArrayRef arguments"> ]; } @@ -153,7 +153,7 @@ def MulOp : Toy_Op<"mul"> { // Allow building a MulOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -217,7 +217,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">, // Allow building a ReturnOp with no return operand. let builders = [OpBuilder< - "Builder *b, OperationState &state", [{ build(b, state, llvm::None); }] + "OpBuilder &b, OperationState &state", [{ build(b, state, llvm::None); }] >]; // Provide extra utility definitions on the c++ operation class definition. @@ -241,7 +241,7 @@ def TransposeOp : Toy_Op<"transpose"> { // Allow building a TransposeOp with from the input operand. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value input"> + OpBuilder<"OpBuilder &b, OperationState &state, Value input"> ]; // Invoke a static verify method to verify this transpose operation. diff --git a/mlir/examples/toy/Ch2/mlir/Dialect.cpp b/mlir/examples/toy/Ch2/mlir/Dialect.cpp index 4aa33c048f6e8..b60d792e2e014 100644 --- a/mlir/examples/toy/Ch2/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch2/mlir/Dialect.cpp @@ -91,9 +91,9 @@ static void printBinaryOp(mlir::OpAsmPrinter &printer, mlir::Operation *op) { /// Build a constant operation. /// The builder is passed as an argument, so is the state that this method is /// expected to fill in order to build the operation. -void ConstantOp::build(mlir::Builder *builder, mlir::OperationState &state, +void ConstantOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, double value) { - auto dataType = RankedTensorType::get({}, builder->getF64Type()); + auto dataType = RankedTensorType::get({}, builder.getF64Type()); auto dataAttribute = DenseElementsAttr::get(dataType, value); ConstantOp::build(builder, state, dataType, dataAttribute); } @@ -158,29 +158,29 @@ static mlir::LogicalResult verify(ConstantOp op) { //===----------------------------------------------------------------------===// // AddOp -void AddOp::build(mlir::Builder *builder, mlir::OperationState &state, +void AddOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } //===----------------------------------------------------------------------===// // GenericCallOp -void GenericCallOp::build(mlir::Builder *builder, mlir::OperationState &state, +void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, StringRef callee, ArrayRef arguments) { // Generic call always returns an unranked Tensor initially. - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(arguments); - state.addAttribute("callee", builder->getSymbolRefAttr(callee)); + state.addAttribute("callee", builder.getSymbolRefAttr(callee)); } //===----------------------------------------------------------------------===// // MulOp -void MulOp::build(mlir::Builder *builder, mlir::OperationState &state, +void MulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } @@ -225,9 +225,9 @@ static mlir::LogicalResult verify(ReturnOp op) { //===----------------------------------------------------------------------===// // TransposeOp -void TransposeOp::build(mlir::Builder *builder, mlir::OperationState &state, +void TransposeOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value value) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); } diff --git a/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp index ef9862505e8c2..2541faef4cf50 100644 --- a/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch2/mlir/MLIRGen.cpp @@ -15,13 +15,13 @@ #include "toy/AST.h" #include "toy/Dialect.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Function.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" #include "mlir/IR/StandardTypes.h" +#include "mlir/IR/Verifier.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" diff --git a/mlir/examples/toy/Ch2/toyc.cpp b/mlir/examples/toy/Ch2/toyc.cpp index a558f64e865ba..22a33aad6217f 100644 --- a/mlir/examples/toy/Ch2/toyc.cpp +++ b/mlir/examples/toy/Ch2/toyc.cpp @@ -15,9 +15,9 @@ #include "toy/Parser.h" #include -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" +#include "mlir/IR/Verifier.h" #include "mlir/Parser.h" #include "llvm/ADT/StringRef.h" diff --git a/mlir/examples/toy/Ch3/include/toy/Ops.td b/mlir/examples/toy/Ch3/include/toy/Ops.td index 999ea86f03de8..5a407603164ab 100644 --- a/mlir/examples/toy/Ch3/include/toy/Ops.td +++ b/mlir/examples/toy/Ch3/include/toy/Ops.td @@ -67,13 +67,13 @@ def ConstantOp : Toy_Op<"constant", [NoSideEffect]> { // using `builder.create(...)`. let builders = [ // Build a constant with a given constant tensor value. - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "DenseElementsAttr value", [{ build(builder, state, value.getType(), value); }]>, // Build a constant with a given constant floating-point value. - OpBuilder<"Builder *builder, OperationState &state, double value"> + OpBuilder<"OpBuilder &builder, OperationState &state, double value"> ]; // Invoke a static verify method to verify this constant operation. @@ -96,7 +96,7 @@ def AddOp : Toy_Op<"add", [NoSideEffect]> { // Allow building an AddOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -131,7 +131,7 @@ def GenericCallOp : Toy_Op<"generic_call"> { // Add custom build methods for the generic call operation. let builders = [ - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "StringRef callee, ArrayRef arguments"> ]; } @@ -152,7 +152,7 @@ def MulOp : Toy_Op<"mul", [NoSideEffect]> { // Allow building a MulOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -219,7 +219,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">, // Allow building a ReturnOp with no return operand. let builders = [OpBuilder< - "Builder *b, OperationState &state", [{ build(b, state, llvm::None); }] + "OpBuilder &b, OperationState &state", [{ build(b, state, llvm::None); }] >]; // Provide extra utility definitions on the c++ operation class definition. @@ -246,7 +246,7 @@ def TransposeOp : Toy_Op<"transpose", [NoSideEffect]> { // Allow building a TransposeOp with from the input operand. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value input"> + OpBuilder<"OpBuilder &b, OperationState &state, Value input"> ]; // Invoke a static verify method to verify this transpose operation. diff --git a/mlir/examples/toy/Ch3/mlir/Dialect.cpp b/mlir/examples/toy/Ch3/mlir/Dialect.cpp index 4aa33c048f6e8..b60d792e2e014 100644 --- a/mlir/examples/toy/Ch3/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch3/mlir/Dialect.cpp @@ -91,9 +91,9 @@ static void printBinaryOp(mlir::OpAsmPrinter &printer, mlir::Operation *op) { /// Build a constant operation. /// The builder is passed as an argument, so is the state that this method is /// expected to fill in order to build the operation. -void ConstantOp::build(mlir::Builder *builder, mlir::OperationState &state, +void ConstantOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, double value) { - auto dataType = RankedTensorType::get({}, builder->getF64Type()); + auto dataType = RankedTensorType::get({}, builder.getF64Type()); auto dataAttribute = DenseElementsAttr::get(dataType, value); ConstantOp::build(builder, state, dataType, dataAttribute); } @@ -158,29 +158,29 @@ static mlir::LogicalResult verify(ConstantOp op) { //===----------------------------------------------------------------------===// // AddOp -void AddOp::build(mlir::Builder *builder, mlir::OperationState &state, +void AddOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } //===----------------------------------------------------------------------===// // GenericCallOp -void GenericCallOp::build(mlir::Builder *builder, mlir::OperationState &state, +void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, StringRef callee, ArrayRef arguments) { // Generic call always returns an unranked Tensor initially. - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(arguments); - state.addAttribute("callee", builder->getSymbolRefAttr(callee)); + state.addAttribute("callee", builder.getSymbolRefAttr(callee)); } //===----------------------------------------------------------------------===// // MulOp -void MulOp::build(mlir::Builder *builder, mlir::OperationState &state, +void MulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } @@ -225,9 +225,9 @@ static mlir::LogicalResult verify(ReturnOp op) { //===----------------------------------------------------------------------===// // TransposeOp -void TransposeOp::build(mlir::Builder *builder, mlir::OperationState &state, +void TransposeOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value value) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); } diff --git a/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp index ef9862505e8c2..2541faef4cf50 100644 --- a/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch3/mlir/MLIRGen.cpp @@ -15,13 +15,13 @@ #include "toy/AST.h" #include "toy/Dialect.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Function.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" #include "mlir/IR/StandardTypes.h" +#include "mlir/IR/Verifier.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" diff --git a/mlir/examples/toy/Ch3/toyc.cpp b/mlir/examples/toy/Ch3/toyc.cpp index d9ae7aa0a68c0..0f09acb85c544 100644 --- a/mlir/examples/toy/Ch3/toyc.cpp +++ b/mlir/examples/toy/Ch3/toyc.cpp @@ -14,9 +14,9 @@ #include "toy/MLIRGen.h" #include "toy/Parser.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" +#include "mlir/IR/Verifier.h" #include "mlir/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" diff --git a/mlir/examples/toy/Ch4/include/toy/Ops.td b/mlir/examples/toy/Ch4/include/toy/Ops.td index 537177bbd680e..1cd20802a0ca4 100644 --- a/mlir/examples/toy/Ch4/include/toy/Ops.td +++ b/mlir/examples/toy/Ch4/include/toy/Ops.td @@ -69,13 +69,13 @@ def ConstantOp : Toy_Op<"constant", [NoSideEffect]> { // using `builder.create(...)`. let builders = [ // Build a constant with a given constant tensor value. - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "DenseElementsAttr value", [{ build(builder, state, value.getType(), value); }]>, // Build a constant with a given constant floating-point value. - OpBuilder<"Builder *builder, OperationState &state, double value"> + OpBuilder<"OpBuilder &builder, OperationState &state, double value"> ]; // Invoke a static verify method to verify this constant operation. @@ -99,7 +99,7 @@ def AddOp : Toy_Op<"add", // Allow building an AddOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -156,7 +156,7 @@ def GenericCallOp : Toy_Op<"generic_call", // Add custom build methods for the generic call operation. let builders = [ - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "StringRef callee, ArrayRef arguments"> ]; } @@ -178,7 +178,7 @@ def MulOp : Toy_Op<"mul", // Allow building a MulOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -245,7 +245,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">, // Allow building a ReturnOp with no return operand. let builders = [OpBuilder< - "Builder *b, OperationState &state", [{ build(b, state, llvm::None); }] + "OpBuilder &b, OperationState &state", [{ build(b, state, llvm::None); }] >]; // Provide extra utility definitions on the c++ operation class definition. @@ -273,7 +273,7 @@ def TransposeOp : Toy_Op<"transpose", // Allow building a TransposeOp with from the input operand. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value input"> + OpBuilder<"OpBuilder &b, OperationState &state, Value input"> ]; // Invoke a static verify method to verify this transpose operation. diff --git a/mlir/examples/toy/Ch4/mlir/Dialect.cpp b/mlir/examples/toy/Ch4/mlir/Dialect.cpp index 9a0a3a6c95a61..02a70218af6de 100644 --- a/mlir/examples/toy/Ch4/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch4/mlir/Dialect.cpp @@ -141,9 +141,9 @@ static void printBinaryOp(mlir::OpAsmPrinter &printer, mlir::Operation *op) { /// Build a constant operation. /// The builder is passed as an argument, so is the state that this method is /// expected to fill in order to build the operation. -void ConstantOp::build(mlir::Builder *builder, mlir::OperationState &state, +void ConstantOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, double value) { - auto dataType = RankedTensorType::get({}, builder->getF64Type()); + auto dataType = RankedTensorType::get({}, builder.getF64Type()); auto dataAttribute = DenseElementsAttr::get(dataType, value); ConstantOp::build(builder, state, dataType, dataAttribute); } @@ -208,9 +208,9 @@ static mlir::LogicalResult verify(ConstantOp op) { //===----------------------------------------------------------------------===// // AddOp -void AddOp::build(mlir::Builder *builder, mlir::OperationState &state, +void AddOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } @@ -228,12 +228,12 @@ void CastOp::inferShapes() { getResult().setType(getOperand().getType()); } //===----------------------------------------------------------------------===// // GenericCallOp -void GenericCallOp::build(mlir::Builder *builder, mlir::OperationState &state, +void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, StringRef callee, ArrayRef arguments) { // Generic call always returns an unranked Tensor initially. - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(arguments); - state.addAttribute("callee", builder->getSymbolRefAttr(callee)); + state.addAttribute("callee", builder.getSymbolRefAttr(callee)); } /// Return the callee of the generic call operation, this is required by the @@ -249,9 +249,9 @@ Operation::operand_range GenericCallOp::getArgOperands() { return inputs(); } //===----------------------------------------------------------------------===// // MulOp -void MulOp::build(mlir::Builder *builder, mlir::OperationState &state, +void MulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } @@ -300,9 +300,9 @@ static mlir::LogicalResult verify(ReturnOp op) { //===----------------------------------------------------------------------===// // TransposeOp -void TransposeOp::build(mlir::Builder *builder, mlir::OperationState &state, +void TransposeOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value value) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); } diff --git a/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp index 9d770d478c68e..78399786007a7 100644 --- a/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch4/mlir/MLIRGen.cpp @@ -15,13 +15,13 @@ #include "toy/AST.h" #include "toy/Dialect.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Function.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" #include "mlir/IR/StandardTypes.h" +#include "mlir/IR/Verifier.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" diff --git a/mlir/examples/toy/Ch4/toyc.cpp b/mlir/examples/toy/Ch4/toyc.cpp index 6a89d0006239f..b757d66a692a2 100644 --- a/mlir/examples/toy/Ch4/toyc.cpp +++ b/mlir/examples/toy/Ch4/toyc.cpp @@ -15,9 +15,9 @@ #include "toy/Parser.h" #include "toy/Passes.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" +#include "mlir/IR/Verifier.h" #include "mlir/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" diff --git a/mlir/examples/toy/Ch5/include/toy/Ops.td b/mlir/examples/toy/Ch5/include/toy/Ops.td index 3645141a0962a..7e7d5d10638c9 100644 --- a/mlir/examples/toy/Ch5/include/toy/Ops.td +++ b/mlir/examples/toy/Ch5/include/toy/Ops.td @@ -69,13 +69,13 @@ def ConstantOp : Toy_Op<"constant", [NoSideEffect]> { // using `builder.create(...)`. let builders = [ // Build a constant with a given constant tensor value. - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "DenseElementsAttr value", [{ build(builder, state, value.getType(), value); }]>, // Build a constant with a given constant floating-point value. - OpBuilder<"Builder *builder, OperationState &state, double value"> + OpBuilder<"OpBuilder &builder, OperationState &state, double value"> ]; // Invoke a static verify method to verify this constant operation. @@ -99,7 +99,7 @@ def AddOp : Toy_Op<"add", // Allow building an AddOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -156,7 +156,7 @@ def GenericCallOp : Toy_Op<"generic_call", // Add custom build methods for the generic call operation. let builders = [ - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "StringRef callee, ArrayRef arguments"> ]; } @@ -178,7 +178,7 @@ def MulOp : Toy_Op<"mul", // Allow building a MulOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -246,7 +246,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">, // Allow building a ReturnOp with no return operand. let builders = [OpBuilder< - "Builder *b, OperationState &state", [{ build(b, state, llvm::None); }] + "OpBuilder &b, OperationState &state", [{ build(b, state, llvm::None); }] >]; // Provide extra utility definitions on the c++ operation class definition. @@ -274,7 +274,7 @@ def TransposeOp : Toy_Op<"transpose", // Allow building a TransposeOp with from the input operand. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value input"> + OpBuilder<"OpBuilder &b, OperationState &state, Value input"> ]; // Invoke a static verify method to verify this transpose operation. diff --git a/mlir/examples/toy/Ch5/mlir/Dialect.cpp b/mlir/examples/toy/Ch5/mlir/Dialect.cpp index 9a0a3a6c95a61..02a70218af6de 100644 --- a/mlir/examples/toy/Ch5/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch5/mlir/Dialect.cpp @@ -141,9 +141,9 @@ static void printBinaryOp(mlir::OpAsmPrinter &printer, mlir::Operation *op) { /// Build a constant operation. /// The builder is passed as an argument, so is the state that this method is /// expected to fill in order to build the operation. -void ConstantOp::build(mlir::Builder *builder, mlir::OperationState &state, +void ConstantOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, double value) { - auto dataType = RankedTensorType::get({}, builder->getF64Type()); + auto dataType = RankedTensorType::get({}, builder.getF64Type()); auto dataAttribute = DenseElementsAttr::get(dataType, value); ConstantOp::build(builder, state, dataType, dataAttribute); } @@ -208,9 +208,9 @@ static mlir::LogicalResult verify(ConstantOp op) { //===----------------------------------------------------------------------===// // AddOp -void AddOp::build(mlir::Builder *builder, mlir::OperationState &state, +void AddOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } @@ -228,12 +228,12 @@ void CastOp::inferShapes() { getResult().setType(getOperand().getType()); } //===----------------------------------------------------------------------===// // GenericCallOp -void GenericCallOp::build(mlir::Builder *builder, mlir::OperationState &state, +void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, StringRef callee, ArrayRef arguments) { // Generic call always returns an unranked Tensor initially. - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(arguments); - state.addAttribute("callee", builder->getSymbolRefAttr(callee)); + state.addAttribute("callee", builder.getSymbolRefAttr(callee)); } /// Return the callee of the generic call operation, this is required by the @@ -249,9 +249,9 @@ Operation::operand_range GenericCallOp::getArgOperands() { return inputs(); } //===----------------------------------------------------------------------===// // MulOp -void MulOp::build(mlir::Builder *builder, mlir::OperationState &state, +void MulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } @@ -300,9 +300,9 @@ static mlir::LogicalResult verify(ReturnOp op) { //===----------------------------------------------------------------------===// // TransposeOp -void TransposeOp::build(mlir::Builder *builder, mlir::OperationState &state, +void TransposeOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value value) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); } diff --git a/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp index 9d770d478c68e..78399786007a7 100644 --- a/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch5/mlir/MLIRGen.cpp @@ -15,13 +15,13 @@ #include "toy/AST.h" #include "toy/Dialect.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Function.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" #include "mlir/IR/StandardTypes.h" +#include "mlir/IR/Verifier.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" diff --git a/mlir/examples/toy/Ch5/toyc.cpp b/mlir/examples/toy/Ch5/toyc.cpp index 4bc2af2ff899e..b4d398812088d 100644 --- a/mlir/examples/toy/Ch5/toyc.cpp +++ b/mlir/examples/toy/Ch5/toyc.cpp @@ -15,9 +15,9 @@ #include "toy/Parser.h" #include "toy/Passes.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" +#include "mlir/IR/Verifier.h" #include "mlir/InitAllDialects.h" #include "mlir/Parser.h" #include "mlir/Pass/Pass.h" diff --git a/mlir/examples/toy/Ch6/CMakeLists.txt b/mlir/examples/toy/Ch6/CMakeLists.txt index be797c6c1e965..c821c77f461c9 100644 --- a/mlir/examples/toy/Ch6/CMakeLists.txt +++ b/mlir/examples/toy/Ch6/CMakeLists.txt @@ -4,6 +4,8 @@ add_subdirectory(include) set(LLVM_LINK_COMPONENTS Core Support + nativecodegen + OrcJIT ) set(LLVM_TARGET_DEFINITIONS mlir/ToyCombine.td) diff --git a/mlir/examples/toy/Ch6/include/toy/Ops.td b/mlir/examples/toy/Ch6/include/toy/Ops.td index b70e14c2a7a2b..dee06c1f3b3c9 100644 --- a/mlir/examples/toy/Ch6/include/toy/Ops.td +++ b/mlir/examples/toy/Ch6/include/toy/Ops.td @@ -69,13 +69,13 @@ def ConstantOp : Toy_Op<"constant", [NoSideEffect]> { // using `builder.create(...)`. let builders = [ // Build a constant with a given constant tensor value. - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "DenseElementsAttr value", [{ build(builder, state, value.getType(), value); }]>, // Build a constant with a given constant floating-point value. - OpBuilder<"Builder *builder, OperationState &state, double value"> + OpBuilder<"OpBuilder &builder, OperationState &state, double value"> ]; // Invoke a static verify method to verify this constant operation. @@ -99,7 +99,7 @@ def AddOp : Toy_Op<"add", // Allow building an AddOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -156,7 +156,7 @@ def GenericCallOp : Toy_Op<"generic_call", // Add custom build methods for the generic call operation. let builders = [ - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "StringRef callee, ArrayRef arguments"> ]; } @@ -178,7 +178,7 @@ def MulOp : Toy_Op<"mul", // Allow building a MulOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -246,7 +246,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">, // Allow building a ReturnOp with no return operand. let builders = [OpBuilder< - "Builder *b, OperationState &state", [{ build(b, state, llvm::None); }] + "OpBuilder &b, OperationState &state", [{ build(b, state, llvm::None); }] >]; // Provide extra utility definitions on the c++ operation class definition. @@ -274,7 +274,7 @@ def TransposeOp : Toy_Op<"transpose", // Allow building a TransposeOp with from the input operand. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value input"> + OpBuilder<"OpBuilder &b, OperationState &state, Value input"> ]; // Invoke a static verify method to verify this transpose operation. diff --git a/mlir/examples/toy/Ch6/mlir/Dialect.cpp b/mlir/examples/toy/Ch6/mlir/Dialect.cpp index 9a0a3a6c95a61..02a70218af6de 100644 --- a/mlir/examples/toy/Ch6/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch6/mlir/Dialect.cpp @@ -141,9 +141,9 @@ static void printBinaryOp(mlir::OpAsmPrinter &printer, mlir::Operation *op) { /// Build a constant operation. /// The builder is passed as an argument, so is the state that this method is /// expected to fill in order to build the operation. -void ConstantOp::build(mlir::Builder *builder, mlir::OperationState &state, +void ConstantOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, double value) { - auto dataType = RankedTensorType::get({}, builder->getF64Type()); + auto dataType = RankedTensorType::get({}, builder.getF64Type()); auto dataAttribute = DenseElementsAttr::get(dataType, value); ConstantOp::build(builder, state, dataType, dataAttribute); } @@ -208,9 +208,9 @@ static mlir::LogicalResult verify(ConstantOp op) { //===----------------------------------------------------------------------===// // AddOp -void AddOp::build(mlir::Builder *builder, mlir::OperationState &state, +void AddOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } @@ -228,12 +228,12 @@ void CastOp::inferShapes() { getResult().setType(getOperand().getType()); } //===----------------------------------------------------------------------===// // GenericCallOp -void GenericCallOp::build(mlir::Builder *builder, mlir::OperationState &state, +void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, StringRef callee, ArrayRef arguments) { // Generic call always returns an unranked Tensor initially. - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(arguments); - state.addAttribute("callee", builder->getSymbolRefAttr(callee)); + state.addAttribute("callee", builder.getSymbolRefAttr(callee)); } /// Return the callee of the generic call operation, this is required by the @@ -249,9 +249,9 @@ Operation::operand_range GenericCallOp::getArgOperands() { return inputs(); } //===----------------------------------------------------------------------===// // MulOp -void MulOp::build(mlir::Builder *builder, mlir::OperationState &state, +void MulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } @@ -300,9 +300,9 @@ static mlir::LogicalResult verify(ReturnOp op) { //===----------------------------------------------------------------------===// // TransposeOp -void TransposeOp::build(mlir::Builder *builder, mlir::OperationState &state, +void TransposeOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value value) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); } diff --git a/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp index 9d770d478c68e..78399786007a7 100644 --- a/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch6/mlir/MLIRGen.cpp @@ -15,13 +15,13 @@ #include "toy/AST.h" #include "toy/Dialect.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Function.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" #include "mlir/IR/StandardTypes.h" +#include "mlir/IR/Verifier.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" diff --git a/mlir/examples/toy/Ch6/toyc.cpp b/mlir/examples/toy/Ch6/toyc.cpp index 558141c2ca890..a713b1a7d17e2 100644 --- a/mlir/examples/toy/Ch6/toyc.cpp +++ b/mlir/examples/toy/Ch6/toyc.cpp @@ -15,11 +15,11 @@ #include "toy/Parser.h" #include "toy/Passes.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/ExecutionEngine/ExecutionEngine.h" #include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" +#include "mlir/IR/Verifier.h" #include "mlir/InitAllDialects.h" #include "mlir/Parser.h" #include "mlir/Pass/Pass.h" diff --git a/mlir/examples/toy/Ch7/CMakeLists.txt b/mlir/examples/toy/Ch7/CMakeLists.txt index 9a9f335d3a920..f622bf5ac5270 100644 --- a/mlir/examples/toy/Ch7/CMakeLists.txt +++ b/mlir/examples/toy/Ch7/CMakeLists.txt @@ -4,6 +4,8 @@ add_subdirectory(include) set(LLVM_LINK_COMPONENTS Core Support + nativecodegen + OrcJIT ) set(LLVM_TARGET_DEFINITIONS mlir/ToyCombine.td) diff --git a/mlir/examples/toy/Ch7/include/toy/Ops.td b/mlir/examples/toy/Ch7/include/toy/Ops.td index adf56dc040d72..b453f13871ef0 100644 --- a/mlir/examples/toy/Ch7/include/toy/Ops.td +++ b/mlir/examples/toy/Ch7/include/toy/Ops.td @@ -79,13 +79,13 @@ def ConstantOp : Toy_Op<"constant", // using `builder.create(...)`. let builders = [ // Build a constant with a given constant tensor value. - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "DenseElementsAttr value", [{ build(builder, state, value.getType(), value); }]>, // Build a constant with a given constant floating-point value. - OpBuilder<"Builder *builder, OperationState &state, double value"> + OpBuilder<"OpBuilder &builder, OperationState &state, double value"> ]; // Invoke a static verify method to verify this constant operation. @@ -112,7 +112,7 @@ def AddOp : Toy_Op<"add", // Allow building an AddOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -170,7 +170,7 @@ def GenericCallOp : Toy_Op<"generic_call", // Add custom build methods for the generic call operation. let builders = [ - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "StringRef callee, ArrayRef arguments"> ]; } @@ -192,7 +192,7 @@ def MulOp : Toy_Op<"mul", // Allow building a MulOp with from the two input operands. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value lhs, Value rhs"> + OpBuilder<"OpBuilder &b, OperationState &state, Value lhs, Value rhs"> ]; } @@ -260,7 +260,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">, // Allow building a ReturnOp with no return operand. let builders = [OpBuilder< - "Builder *b, OperationState &state", [{ build(b, state, llvm::None); }] + "OpBuilder &b, OperationState &state", [{ build(b, state, llvm::None); }] >]; // Provide extra utility definitions on the c++ operation class definition. @@ -287,7 +287,7 @@ def StructAccessOp : Toy_Op<"struct_access", [NoSideEffect]> { // Allow building a StructAccessOp with just a struct value and an index. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value input, size_t index"> + OpBuilder<"OpBuilder &b, OperationState &state, Value input, size_t index"> ]; let verifier = [{ return ::verify(*this); }]; @@ -335,7 +335,7 @@ def TransposeOp : Toy_Op<"transpose", // Allow building a TransposeOp with from the input operand. let builders = [ - OpBuilder<"Builder *b, OperationState &state, Value input"> + OpBuilder<"OpBuilder &b, OperationState &state, Value input"> ]; // Invoke a static verify method to verify this transpose operation. diff --git a/mlir/examples/toy/Ch7/mlir/Dialect.cpp b/mlir/examples/toy/Ch7/mlir/Dialect.cpp index b3e893e76a154..d653edea6e674 100644 --- a/mlir/examples/toy/Ch7/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch7/mlir/Dialect.cpp @@ -154,9 +154,9 @@ static void printBinaryOp(mlir::OpAsmPrinter &printer, mlir::Operation *op) { /// Build a constant operation. /// The builder is passed as an argument, so is the state that this method is /// expected to fill in order to build the operation. -void ConstantOp::build(mlir::Builder *builder, mlir::OperationState &state, +void ConstantOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, double value) { - auto dataType = RankedTensorType::get({}, builder->getF64Type()); + auto dataType = RankedTensorType::get({}, builder.getF64Type()); auto dataAttribute = DenseElementsAttr::get(dataType, value); ConstantOp::build(builder, state, dataType, dataAttribute); } @@ -260,9 +260,9 @@ void ConstantOp::inferShapes() { getResult().setType(value().getType()); } //===----------------------------------------------------------------------===// // AddOp -void AddOp::build(mlir::Builder *builder, mlir::OperationState &state, +void AddOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } @@ -280,12 +280,12 @@ void CastOp::inferShapes() { getResult().setType(getOperand().getType()); } //===----------------------------------------------------------------------===// // GenericCallOp -void GenericCallOp::build(mlir::Builder *builder, mlir::OperationState &state, +void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, StringRef callee, ArrayRef arguments) { // Generic call always returns an unranked Tensor initially. - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(arguments); - state.addAttribute("callee", builder->getSymbolRefAttr(callee)); + state.addAttribute("callee", builder.getSymbolRefAttr(callee)); } /// Return the callee of the generic call operation, this is required by the @@ -301,9 +301,9 @@ Operation::operand_range GenericCallOp::getArgOperands() { return inputs(); } //===----------------------------------------------------------------------===// // MulOp -void MulOp::build(mlir::Builder *builder, mlir::OperationState &state, +void MulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value lhs, mlir::Value rhs) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands({lhs, rhs}); } @@ -352,7 +352,7 @@ static mlir::LogicalResult verify(ReturnOp op) { //===----------------------------------------------------------------------===// // StructAccessOp -void StructAccessOp::build(mlir::Builder *b, mlir::OperationState &state, +void StructAccessOp::build(mlir::OpBuilder &b, mlir::OperationState &state, mlir::Value input, size_t index) { // Extract the result type from the input type. StructType structTy = input.getType().cast(); @@ -360,7 +360,7 @@ void StructAccessOp::build(mlir::Builder *b, mlir::OperationState &state, mlir::Type resultType = structTy.getElementTypes()[index]; // Call into the auto-generated build method. - build(b, state, resultType, input, b->getI64IntegerAttr(index)); + build(b, state, resultType, input, b.getI64IntegerAttr(index)); } static mlir::LogicalResult verify(StructAccessOp op) { @@ -379,9 +379,9 @@ static mlir::LogicalResult verify(StructAccessOp op) { //===----------------------------------------------------------------------===// // TransposeOp -void TransposeOp::build(mlir::Builder *builder, mlir::OperationState &state, +void TransposeOp::build(mlir::OpBuilder &builder, mlir::OperationState &state, mlir::Value value) { - state.addTypes(UnrankedTensorType::get(builder->getF64Type())); + state.addTypes(UnrankedTensorType::get(builder.getF64Type())); state.addOperands(value); } diff --git a/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp b/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp index 2794768bee7b8..58a153b6127b5 100644 --- a/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp +++ b/mlir/examples/toy/Ch7/mlir/MLIRGen.cpp @@ -15,13 +15,13 @@ #include "toy/AST.h" #include "toy/Dialect.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Function.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" #include "mlir/IR/StandardTypes.h" +#include "mlir/IR/Verifier.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" diff --git a/mlir/examples/toy/Ch7/toyc.cpp b/mlir/examples/toy/Ch7/toyc.cpp index f3e12fb986cf0..a8795d8a35a3b 100644 --- a/mlir/examples/toy/Ch7/toyc.cpp +++ b/mlir/examples/toy/Ch7/toyc.cpp @@ -15,11 +15,11 @@ #include "toy/Parser.h" #include "toy/Passes.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/ExecutionEngine/ExecutionEngine.h" #include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" +#include "mlir/IR/Verifier.h" #include "mlir/InitAllDialects.h" #include "mlir/Parser.h" #include "mlir/Pass/Pass.h" diff --git a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h index 341526fc99645..1722ae628e88c 100644 --- a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h +++ b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h @@ -11,11 +11,19 @@ #include namespace mlir { +class LLVMTypeConverter; +class OwningRewritePatternList; + +template +class OperationPass; namespace gpu { class GPUModuleOp; } // namespace gpu -template class OperationPass; + +/// Collect a set of patterns to convert from the GPU dialect to ROCDL. +void populateGpuToROCDLConversionPatterns(LLVMTypeConverter &converter, + OwningRewritePatternList &patterns); /// Creates a pass that lowers GPU dialect operations to ROCDL counterparts. std::unique_ptr> diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h index 5556667533601..2eae578fc966a 100644 --- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h +++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h @@ -26,6 +26,7 @@ class Type; namespace mlir { +class ComplexType; class LLVMTypeConverter; class UnrankedMemRefType; @@ -139,24 +140,29 @@ class LLVMTypeConverter : public TypeConverter { LLVM::LLVMDialect *llvmDialect; private: - // Convert a function type. The arguments and results are converted one by - // one. Additionally, if the function returns more than one value, pack the - // results into an LLVM IR structure type so that the converted function type - // returns at most one result. + /// Convert a function type. The arguments and results are converted one by + /// one. Additionally, if the function returns more than one value, pack the + /// results into an LLVM IR structure type so that the converted function type + /// returns at most one result. Type convertFunctionType(FunctionType type); - // Convert the index type. Uses llvmModule data layout to create an integer - // of the pointer bitwidth. + /// Convert the index type. Uses llvmModule data layout to create an integer + /// of the pointer bitwidth. Type convertIndexType(IndexType type); - // Convert an integer type `i*` to `!llvm<"i*">`. + /// Convert an integer type `i*` to `!llvm<"i*">`. Type convertIntegerType(IntegerType type); - // Convert a floating point type: `f16` to `!llvm.half`, `f32` to - // `!llvm.float` and `f64` to `!llvm.double`. `bf16` is not supported - // by LLVM. + /// Convert a floating point type: `f16` to `!llvm.half`, `f32` to + /// `!llvm.float` and `f64` to `!llvm.double`. `bf16` is not supported + /// by LLVM. Type convertFloatType(FloatType type); + /// Convert complex number type: `complex` to `!llvm<"{ half, half }">`, + /// `complex` to `!llvm<"{ float, float }">`, and `complex` to + /// `!llvm<"{ double, double }">`. `complex` is not supported. + Type convertComplexType(ComplexType type); + /// Convert a memref type into an LLVM type that captures the relevant data. Type convertMemRefType(MemRefType type); @@ -221,6 +227,25 @@ class StructBuilder { void setPtr(OpBuilder &builder, Location loc, unsigned pos, Value ptr); }; +class ComplexStructBuilder : public StructBuilder { +public: + /// Construct a helper for the given complex number value. + using StructBuilder::StructBuilder; + /// Build IR creating an `undef` value of the complex number type. + static ComplexStructBuilder undef(OpBuilder &builder, Location loc, + Type type); + + // Build IR extracting the real value from the complex number struct. + Value real(OpBuilder &builder, Location loc); + // Build IR inserting the real value into the complex number struct. + void setReal(OpBuilder &builder, Location loc, Value real); + + // Build IR extracting the imaginary value from the complex number struct. + Value imaginary(OpBuilder &builder, Location loc); + // Build IR inserting the imaginary value into the complex number struct. + void setImaginary(OpBuilder &builder, Location loc, Value imaginary); +}; + /// Helper class to produce LLVM dialect operations extracting or inserting /// elements of a MemRef descriptor. Wraps a Value pointing to the descriptor. /// The Value may be null, in which case none of the operations are valid. @@ -476,8 +501,8 @@ class OneToOneConvertToLLVMPattern : public ConvertOpToLLVMPattern { } }; -/// Basic lowering implementation for rewriting from Ops to LLVM Dialect Ops -/// with one result. This supports higher-dimensional vector types. +/// Basic lowering implementation to rewrite Ops with just one result to the +/// LLVM Dialect. This supports higher-dimensional vector types. template class VectorConvertToLLVMPattern : public ConvertOpToLLVMPattern { public: diff --git a/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h b/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h index e96b7cfe76e54..61d4de77458f1 100644 --- a/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h +++ b/mlir/include/mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h @@ -14,8 +14,8 @@ class MLIRContext; class OwningRewritePatternList; /// Collect a set of patterns to convert from the Vector dialect to loops + std. -void populateVectorToAffineLoopsConversionPatterns( - MLIRContext *context, OwningRewritePatternList &patterns); +void populateVectorToLoopsConversionPatterns(OwningRewritePatternList &patterns, + MLIRContext *context); } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h b/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h index 863915aa15bf3..0d9da1fe0dce9 100644 --- a/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h @@ -127,34 +127,34 @@ Value TemplatedIndexedValue::operator^(Value e) { /// Assignment-arithmetic operator overloadings. template -OperationHandle TemplatedIndexedValue::operator+=(Value e) { +Store TemplatedIndexedValue::operator+=(Value e) { using op::operator+; - return Store(*this + e, getBase(), {indices.begin(), indices.end()}); + return Store(*this + e, getBase(), indices); } template -OperationHandle TemplatedIndexedValue::operator-=(Value e) { +Store TemplatedIndexedValue::operator-=(Value e) { using op::operator-; - return Store(*this - e, getBase(), {indices.begin(), indices.end()}); + return Store(*this - e, getBase(), indices); } template -OperationHandle TemplatedIndexedValue::operator*=(Value e) { +Store TemplatedIndexedValue::operator*=(Value e) { using op::operator*; - return Store(*this * e, getBase(), {indices.begin(), indices.end()}); + return Store(*this * e, getBase(), indices); } template -OperationHandle TemplatedIndexedValue::operator/=(Value e) { +Store TemplatedIndexedValue::operator/=(Value e) { using op::operator/; - return Store(*this / e, getBase(), {indices.begin(), indices.end()}); + return Store(*this / e, getBase(), indices); } template -OperationHandle TemplatedIndexedValue::operator%=(Value e) { +Store TemplatedIndexedValue::operator%=(Value e) { using op::operator%; - return Store(*this % e, getBase(), {indices.begin(), indices.end()}); + return Store(*this % e, getBase(), indices); } template -OperationHandle TemplatedIndexedValue::operator^=(Value e) { +Store TemplatedIndexedValue::operator^=(Value e) { using op::operator^; - return Store(*this ^ e, getBase(), {indices.begin(), indices.end()}); + return Store(*this ^ e, getBase(), indices); } /// Logical operator overloadings. diff --git a/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h index 392e2433b9924..04595b7fd8620 100644 --- a/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h @@ -9,7 +9,6 @@ #define MLIR_DIALECT_AFFINE_EDSC_INTRINSICS_H_ #include "mlir/Dialect/Affine/EDSC/Builders.h" -#include "mlir/EDSC/Intrinsics.h" namespace mlir { namespace edsc { diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h index 6ce38bcddddc5..b0bba011eb6b0 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h @@ -31,9 +31,10 @@ class AffineTerminatorOp; class FlatAffineConstraints; class OpBuilder; -/// A utility function to check if a value is defined at the top level of a -/// function. A value of index type defined at the top level is always a valid -/// symbol. +/// A utility function to check if a value is defined at the top level of an +/// op with trait `PolyhedralScope` or is a region argument for such an op. A +/// value of index type defined at the top level is always a valid symbol for +/// all its uses. bool isTopLevelValue(Value value); /// AffineDmaStartOp starts a non-blocking DMA operation that transfers data @@ -82,7 +83,7 @@ class AffineDmaStartOp : public Op &results); }; -/// The "affine.load" op reads an element from a memref, where the index -/// for each memref dimension is an affine expression of loop induction -/// variables and symbols. The output of 'affine.load' is a new value with the -/// same type as the elements of the memref. An affine expression of loop IVs -/// and symbols must be specified for each dimension of the memref. The keyword -/// 'symbol' can be used to indicate SSA identifiers which are symbolic. -// -// Example 1: -// -// %1 = affine.load %0[%i0 + 3, %i1 + 7] : memref<100x100xf32> -// -// Example 2: Uses 'symbol' keyword for symbols '%n' and '%m'. -// -// %1 = affine.load %0[%i0 + symbol(%n), %i1 + symbol(%m)] -// : memref<100x100xf32> -// -class AffineLoadOp : public Op::Impl> { -public: - using Op::Op; - - /// Builds an affine load op with the specified map and operands. - static void build(Builder *builder, OperationState &result, AffineMap map, - ValueRange operands); - /// Builds an affine load op with an identity map and operands. - static void build(Builder *builder, OperationState &result, Value memref, - ValueRange indices = {}); - /// Builds an affine load op with the specified map and its operands. - static void build(Builder *builder, OperationState &result, Value memref, - AffineMap map, ValueRange mapOperands); - - /// Returns the operand index of the memref. - unsigned getMemRefOperandIndex() { return 0; } - - /// Get memref operand. - Value getMemRef() { return getOperand(getMemRefOperandIndex()); } - void setMemRef(Value value) { setOperand(getMemRefOperandIndex(), value); } - MemRefType getMemRefType() { - return getMemRef().getType().cast(); - } - - /// Get affine map operands. - operand_range getMapOperands() { return llvm::drop_begin(getOperands(), 1); } - - /// Returns the affine map used to index the memref for this operation. - AffineMap getAffineMap() { return getAffineMapAttr().getValue(); } - AffineMapAttr getAffineMapAttr() { - return getAttr(getMapAttrName()).cast(); - } - - /// Returns the AffineMapAttr associated with 'memref'. - NamedAttribute getAffineMapAttrForMemRef(Value memref) { - assert(memref == getMemRef()); - return {Identifier::get(getMapAttrName(), getContext()), - getAffineMapAttr()}; - } - - static StringRef getMapAttrName() { return "map"; } - static StringRef getOperationName() { return "affine.load"; } - - // Hooks to customize behavior of this op. - static ParseResult parse(OpAsmParser &parser, OperationState &result); - void print(OpAsmPrinter &p); - LogicalResult verify(); - static void getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context); - OpFoldResult fold(ArrayRef operands); -}; - -/// The "affine.store" op writes an element to a memref, where the index -/// for each memref dimension is an affine expression of loop induction -/// variables and symbols. The 'affine.store' op stores a new value which is the -/// same type as the elements of the memref. An affine expression of loop IVs -/// and symbols must be specified for each dimension of the memref. The keyword -/// 'symbol' can be used to indicate SSA identifiers which are symbolic. -// -// Example 1: -// -// affine.store %v0, %0[%i0 + 3, %i1 + 7] : memref<100x100xf32> -// -// Example 2: Uses 'symbol' keyword for symbols '%n' and '%m'. -// -// affine.store %v0, %0[%i0 + symbol(%n), %i1 + symbol(%m)] -// : memref<100x100xf32> -// -class AffineStoreOp : public Op::Impl> { -public: - using Op::Op; - - /// Builds an affine store operation with the provided indices (identity map). - static void build(Builder *builder, OperationState &result, - Value valueToStore, Value memref, ValueRange indices); - /// Builds an affine store operation with the specified map and its operands. - static void build(Builder *builder, OperationState &result, - Value valueToStore, Value memref, AffineMap map, - ValueRange mapOperands); - - /// Get value to be stored by store operation. - Value getValueToStore() { return getOperand(0); } - - /// Returns the operand index of the memref. - unsigned getMemRefOperandIndex() { return 1; } - - /// Get memref operand. - Value getMemRef() { return getOperand(getMemRefOperandIndex()); } - void setMemRef(Value value) { setOperand(getMemRefOperandIndex(), value); } - - MemRefType getMemRefType() { - return getMemRef().getType().cast(); - } - - /// Get affine map operands. - operand_range getMapOperands() { return llvm::drop_begin(getOperands(), 2); } - - /// Returns the affine map used to index the memref for this operation. - AffineMap getAffineMap() { return getAffineMapAttr().getValue(); } - AffineMapAttr getAffineMapAttr() { - return getAttr(getMapAttrName()).cast(); - } - - /// Returns the AffineMapAttr associated with 'memref'. - NamedAttribute getAffineMapAttrForMemRef(Value memref) { - assert(memref == getMemRef()); - return {Identifier::get(getMapAttrName(), getContext()), - getAffineMapAttr()}; - } - - static StringRef getMapAttrName() { return "map"; } - static StringRef getOperationName() { return "affine.store"; } - - // Hooks to customize behavior of this op. - static ParseResult parse(OpAsmParser &parser, OperationState &result); - void print(OpAsmPrinter &p); - LogicalResult verify(); - static void getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context); - LogicalResult fold(ArrayRef cstOperands, - SmallVectorImpl &results); -}; - -/// Returns true if the given Value can be used as a dimension id. +/// Returns true if the given Value can be used as a dimension id in the region +/// of the closest surrounding op that has the trait `PolyhedralScope`. bool isValidDim(Value value); -/// Returns true if the given Value can be used as a symbol. +/// Returns true if the given Value can be used as a dimension id in `region`, +/// i.e., for all its uses in `region`. +bool isValidDim(Value value, Region *region); + +/// Returns true if the given value can be used as a symbol in the region of the +/// closest surrounding op that has the trait `PolyhedralScope`. bool isValidSymbol(Value value); +/// Returns true if the given Value can be used as a symbol for `region`, i.e., +/// for all its uses in `region`. +bool isValidSymbol(Value value, Region *region); + /// Modifies both `map` and `operands` in-place so as to: /// 1. drop duplicate operands /// 2. drop unused dims and symbols from map diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td index 75aaff9468e94..e2b7716fc3341 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td @@ -70,9 +70,9 @@ def AffineApplyOp : Affine_Op<"apply", [NoSideEffect]> { // has a constant builder. That way we wouldn't need to explicitly specify the // result types here. let builders = [ - OpBuilder<"Builder *builder, OperationState &result, " + OpBuilder<"OpBuilder &builder, OperationState &result, " "AffineMap map, ValueRange mapOperands", [{ - build(builder, result, builder->getIndexType(), map, mapOperands); + build(builder, result, builder.getIndexType(), map, mapOperands); }]> ]; @@ -83,12 +83,22 @@ def AffineApplyOp : Affine_Op<"apply", [NoSideEffect]> { /// Returns the affine value map computed from this operation. AffineValueMap getAffineValueMap(); - /// Returns true if the result of this operation can be used as dimension id. + /// Returns true if the result of this operation can be used as dimension id + /// in the region of the closest surrounding op with trait PolyhedralScope. bool isValidDim(); - /// Returns true if the result of this operation is a symbol. + /// Returns true if the result of this operation can be used as dimension id + /// within 'region', i.e., for all its uses with `region`. + bool isValidDim(Region *region); + + /// Returns true if the result of this operation is a symbol in the region + /// of the closest surrounding op that has the trait PolyhedralScope. bool isValidSymbol(); + /// Returns true if the result of this operation is a symbol for all its + /// uses in `region`. + bool isValidSymbol(Region *region); + operand_range getMapOperands() { return getOperands(); } }]; @@ -167,9 +177,9 @@ def AffineForOp : Affine_Op<"for", let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, " + OpBuilder<"OpBuilder &builder, OperationState &result, " "int64_t lowerBound, int64_t upperBound, int64_t step = 1">, - OpBuilder<"Builder *builder, OperationState &result, " + OpBuilder<"OpBuilder &builder, OperationState &result, " "ValueRange lbOperands, AffineMap lbMap, " "ValueRange ubOperands, AffineMap ubMap, " "int64_t step = 1"> @@ -180,11 +190,7 @@ def AffineForOp : Affine_Op<"for", static StringRef getLowerBoundAttrName() { return "lower_bound"; } static StringRef getUpperBoundAttrName() { return "upper_bound"; } - Block *getBody() { return ®ion().front(); } Value getInductionVar() { return getBody()->getArgument(0); } - OpBuilder getBodyBuilder() { - return OpBuilder(getBody(), std::prev(getBody()->end())); - } // TODO: provide iterators for the lower and upper bound operands // if the current access via getLowerBound(), getUpperBound() is too slow. @@ -322,7 +328,7 @@ def AffineIfOp : Affine_Op<"if", let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, " + OpBuilder<"OpBuilder &builder, OperationState &result, " "IntegerSet set, ValueRange args, bool withElseRegion"> ]; @@ -364,16 +370,90 @@ def AffineIfOp : Affine_Op<"if", let hasFolder = 1; } +def AffineLoadOp : Affine_Op<"load", []> { + let summary = "affine load operation"; + let description = [{ + The "affine.load" op reads an element from a memref, where the index + for each memref dimension is an affine expression of loop induction + variables and symbols. The output of 'affine.load' is a new value with the + same type as the elements of the memref. An affine expression of loop IVs + and symbols must be specified for each dimension of the memref. The keyword + 'symbol' can be used to indicate SSA identifiers which are symbolic. + + Example 1: + + ```mlir + %1 = affine.load %0[%i0 + 3, %i1 + 7] : memref<100x100xf32> + ``` + + Example 2: Uses 'symbol' keyword for symbols '%n' and '%m'. + + ```mlir + %1 = affine.load %0[%i0 + symbol(%n), %i1 + symbol(%m)] : memref<100x100xf32> + ``` + }]; + + let arguments = (ins Arg:$memref, + Variadic:$indices); + let results = (outs AnyType:$result); + + let builders = [ + /// Builds an affine load op with the specified map and operands. + OpBuilder<"OpBuilder &builder, OperationState &result, AffineMap map, " + "ValueRange operands">, + /// Builds an affine load op with an identity map and operands. + OpBuilder<"OpBuilder &builder, OperationState &result, Value memref, " + "ValueRange indices = {}">, + /// Builds an affine load op with the specified map and its operands. + OpBuilder<"OpBuilder &builder, OperationState &result, Value memref, " + "AffineMap map, ValueRange mapOperands"> + ]; + + let extraClassDeclaration = [{ + /// Returns the operand index of the memref. + unsigned getMemRefOperandIndex() { return 0; } + + /// Get memref operand. + Value getMemRef() { return getOperand(getMemRefOperandIndex()); } + void setMemRef(Value value) { setOperand(getMemRefOperandIndex(), value); } + MemRefType getMemRefType() { + return getMemRef().getType().cast(); + } + + /// Get affine map operands. + operand_range getMapOperands() { return llvm::drop_begin(getOperands(), 1); } + + /// Returns the affine map used to index the memref for this operation. + AffineMap getAffineMap() { return getAffineMapAttr().getValue(); } + AffineMapAttr getAffineMapAttr() { + return getAttr(getMapAttrName()).cast(); + } + + /// Returns the AffineMapAttr associated with 'memref'. + NamedAttribute getAffineMapAttrForMemRef(Value memref) { + assert(memref == getMemRef()); + return {Identifier::get(getMapAttrName(), getContext()), + getAffineMapAttr()}; + } + + static StringRef getMapAttrName() { return "map"; } + }]; + + let hasCanonicalizer = 1; + let hasFolder = 1; +} + class AffineMinMaxOpBase traits = []> : Op { let arguments = (ins AffineMapAttr:$map, Variadic:$operands); let results = (outs Index); let builders = [ - OpBuilder<"Builder *builder, OperationState &result, AffineMap affineMap, " + OpBuilder<"OpBuilder &builder, OperationState &result, AffineMap affineMap, " "ValueRange mapOperands", [{ - build(builder, result, builder->getIndexType(), affineMap, mapOperands); + build(builder, result, builder.getIndexType(), affineMap, mapOperands); }]> ]; @@ -467,11 +547,11 @@ def AffineParallelOp : Affine_Op<"parallel", [ImplicitAffineTerminator]> { let regions = (region SizedRegion<1>:$region); let builders = [ - OpBuilder<"Builder* builder, OperationState& result," + OpBuilder<"OpBuilder &builder, OperationState &result," "ArrayRef ranges">, - OpBuilder<"Builder* builder, OperationState& result, AffineMap lbMap," + OpBuilder<"OpBuilder &builder, OperationState &result, AffineMap lbMap," "ValueRange lbArgs, AffineMap ubMap, ValueRange ubArgs">, - OpBuilder<"Builder* builder, OperationState& result, AffineMap lbMap," + OpBuilder<"OpBuilder &builder, OperationState &result, AffineMap lbMap," "ValueRange lbArgs, AffineMap ubMap, ValueRange ubArgs," "ArrayRef steps"> ]; @@ -529,15 +609,15 @@ def AffinePrefetchOp : Affine_Op<"prefetch"> { BoolAttr:$isDataCache); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value memref," + "OpBuilder &builder, OperationState &result, Value memref," "AffineMap map, ArrayRef mapOperands, bool isWrite," "unsigned localityHint, bool isDataCache", [{ assert(map.getNumInputs() == mapOperands.size() && "inconsistent index info"); - auto localityHintAttr = builder->getI32IntegerAttr(localityHint); - auto isWriteAttr = builder->getBoolAttr(isWrite); - auto isDataCacheAttr = builder->getBoolAttr(isDataCache); + auto localityHintAttr = builder.getI32IntegerAttr(localityHint); + auto isWriteAttr = builder.getBoolAttr(isWrite); + auto isDataCacheAttr = builder.getBoolAttr(isDataCache); result.addOperands(memref); result.addAttribute(getMapAttrName(), AffineMapAttr::get(map)); result.addOperands(mapOperands); @@ -579,6 +659,81 @@ def AffinePrefetchOp : Affine_Op<"prefetch"> { let hasFolder = 1; } +def AffineStoreOp : Affine_Op<"store", []> { + let summary = "affine store operation"; + let description = [{ + The "affine.store" op writes an element to a memref, where the index + for each memref dimension is an affine expression of loop induction + variables and symbols. The 'affine.store' op stores a new value which is the + same type as the elements of the memref. An affine expression of loop IVs + and symbols must be specified for each dimension of the memref. The keyword + 'symbol' can be used to indicate SSA identifiers which are symbolic. + + Example 1: + + ```mlir + affine.store %v0, %0[%i0 + 3, %i1 + 7] : memref<100x100xf32> + ``` + + Example 2: Uses 'symbol' keyword for symbols '%n' and '%m'. + + ```mlir + affine.store %v0, %0[%i0 + symbol(%n), %i1 + symbol(%m)] : memref<100x100xf32> + ``` + }]; + let arguments = (ins AnyType:$value, + Arg:$memref, + Variadic:$indices); + + + let skipDefaultBuilders = 1; + let builders = [ + OpBuilder<"OpBuilder &builder, OperationState &result, " + "Value valueToStore, Value memref, ValueRange indices">, + OpBuilder<"OpBuilder &builder, OperationState &result, " + "Value valueToStore, Value memref, AffineMap map, " + "ValueRange mapOperands"> + ]; + + let extraClassDeclaration = [{ + /// Get value to be stored by store operation. + Value getValueToStore() { return getOperand(0); } + + /// Returns the operand index of the memref. + unsigned getMemRefOperandIndex() { return 1; } + + /// Get memref operand. + Value getMemRef() { return getOperand(getMemRefOperandIndex()); } + void setMemRef(Value value) { setOperand(getMemRefOperandIndex(), value); } + + MemRefType getMemRefType() { + return getMemRef().getType().cast(); + } + + /// Get affine map operands. + operand_range getMapOperands() { return llvm::drop_begin(getOperands(), 2); } + + /// Returns the affine map used to index the memref for this operation. + AffineMap getAffineMap() { return getAffineMapAttr().getValue(); } + AffineMapAttr getAffineMapAttr() { + return getAttr(getMapAttrName()).cast(); + } + + /// Returns the AffineMapAttr associated with 'memref'. + NamedAttribute getAffineMapAttrForMemRef(Value memref) { + assert(memref == getMemRef()); + return {Identifier::get(getMapAttrName(), getContext()), + getAffineMapAttr()}; + } + + static StringRef getMapAttrName() { return "map"; } + }]; + + let hasCanonicalizer = 1; + let hasFolder = 1; +} + def AffineTerminatorOp : Affine_Op<"terminator", [NoSideEffect, Terminator]> { let summary = "affine terminator operation"; diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index c65c6c5f44aa0..82d94557f0196 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -15,6 +15,7 @@ include "mlir/Dialect/GPU/GPUBase.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" +include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/SideEffects.td" // Type constraint accepting standard integers, indices and wrapped LLVM integer @@ -153,7 +154,7 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">, let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, StringRef name, " + OpBuilder<"OpBuilder &builder, OperationState &result, StringRef name, " "FunctionType type, ArrayRef workgroupAttributions = {}, " "ArrayRef privateAttributions = {}, " "ArrayRef attrs = {}"> @@ -322,11 +323,11 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">, let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, GPUFuncOp kernelFunc, " + OpBuilder<"OpBuilder &builder, OperationState &result, GPUFuncOp kernelFunc, " "Value gridSizeX, Value gridSizeY, Value gridSizeZ, " "Value blockSizeX, Value blockSizeY, Value blockSizeZ, " "ValueRange kernelOperands">, - OpBuilder<"Builder *builder, OperationState &result, GPUFuncOp kernelFunc, " + OpBuilder<"OpBuilder &builder, OperationState &result, GPUFuncOp kernelFunc, " "KernelDim3 gridSize, KernelDim3 blockSize, " "ValueRange kernelOperands"> ]; @@ -441,7 +442,7 @@ def GPU_LaunchOp : GPU_Op<"launch">, let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, Value gridSizeX," + OpBuilder<"OpBuilder &builder, OperationState &result, Value gridSizeX," "Value gridSizeY, Value gridSizeZ, Value blockSizeX," "Value blockSizeY, Value blockSizeZ"> ]; @@ -488,7 +489,7 @@ def GPU_ReturnOp : GPU_Op<"return", [HasParent<"GPUFuncOp">, NoSideEffect, by an invocation of the `gpu.func`. }]; - let builders = [OpBuilder<"Builder *builder, OperationState &result", " // empty">]; + let builders = [OpBuilder<"OpBuilder &builder, OperationState &result", " // empty">]; let parser = [{ return parseReturnOp(parser, result); }]; let printer = [{ p << getOperationName(); }]; @@ -667,7 +668,7 @@ def GPU_GPUModuleOp : GPU_Op<"module", [ ``` }]; - let builders = [OpBuilder<"Builder *builder, OperationState &result, " + let builders = [OpBuilder<"OpBuilder &builder, OperationState &result, " "StringRef name">]; let parser = [{ return ::parseGPUModuleOp(parser, result); }]; let printer = [{ return ::print(p, *this); }]; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index c9ee88c770105..b08278f0c2df3 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -14,6 +14,7 @@ #define LLVMIR_OPS include "mlir/Dialect/LLVMIR/LLVMOpBase.td" +include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/SideEffects.td" @@ -22,7 +23,7 @@ class LLVM_Builder { } def LLVM_OneResultOpBuilder : OpBuilder< - "Builder *, OperationState &result, Type resultType, " + "OpBuilder &, OperationState &result, Type resultType, " "ValueRange operands, ArrayRef attributes = {}", [{ if (resultType) result.addTypes(resultType); @@ -33,7 +34,7 @@ def LLVM_OneResultOpBuilder : OpBuilder< }]>; def LLVM_ZeroResultOpBuilder : OpBuilder< - "Builder *, OperationState &result, ValueRange operands, " + "OpBuilder &, OperationState &result, ValueRange operands, " "ArrayRef attributes = {}", [{ result.addOperands(operands); @@ -55,7 +56,7 @@ class LLVM_OneResultOp traits = []> : // Compatibility builder that takes an instance of wrapped llvm::VoidType // to indicate no result. def LLVM_VoidResultTypeOpBuilder : OpBuilder< - "Builder *builder, OperationState &result, Type resultType, " + "OpBuilder &builder, OperationState &result, Type resultType, " "ValueRange operands, ArrayRef attributes = {}", [{ auto llvmType = resultType.dyn_cast(); (void)llvmType; @@ -73,7 +74,7 @@ class LLVM_ZeroResultOp traits = []> : // Opaque builder used for terminator operations that contain successors. def LLVM_TerminatorPassthroughOpBuilder : OpBuilder< - "Builder *, OperationState &result, ValueRange operands, " + "OpBuilder &, OperationState &result, ValueRange operands, " "SuccessorRange destinations, ArrayRef attributes = {}", [{ result.addOperands(operands); @@ -149,11 +150,11 @@ def LLVM_ICmpOp : LLVM_OneResultOp<"icmp", [NoSideEffect]>, $res = builder.CreateICmp(getLLVMCmpPredicate($predicate), $lhs, $rhs); }]; let builders = [OpBuilder< - "Builder *b, OperationState &result, ICmpPredicate predicate, Value lhs, " + "OpBuilder &b, OperationState &result, ICmpPredicate predicate, Value lhs, " "Value rhs", [{ LLVMDialect *dialect = &lhs.getType().cast().getDialect(); build(b, result, LLVMType::getInt1Ty(dialect), - b->getI64IntegerAttr(static_cast(predicate)), lhs, rhs); + b.getI64IntegerAttr(static_cast(predicate)), lhs, rhs); }]>]; let parser = [{ return parseCmpOp(parser, result); }]; let printer = [{ printICmpOp(p, *this); }]; @@ -196,11 +197,11 @@ def LLVM_FCmpOp : LLVM_OneResultOp<"fcmp", [NoSideEffect]>, $res = builder.CreateFCmp(getLLVMCmpPredicate($predicate), $lhs, $rhs); }]; let builders = [OpBuilder< - "Builder *b, OperationState &result, FCmpPredicate predicate, Value lhs, " + "OpBuilder &b, OperationState &result, FCmpPredicate predicate, Value lhs, " "Value rhs", [{ LLVMDialect *dialect = &lhs.getType().cast().getDialect(); build(b, result, LLVMType::getInt1Ty(dialect), - b->getI64IntegerAttr(static_cast(predicate)), lhs, rhs); + b.getI64IntegerAttr(static_cast(predicate)), lhs, rhs); }]>]; let parser = [{ return parseCmpOp(parser, result); }]; let printer = [{ printFCmpOp(p, *this); }]; @@ -229,12 +230,12 @@ def LLVM_AllocaOp : $res = alloca; }]; let builders = [OpBuilder< - "Builder *b, OperationState &result, Type resultType, Value arraySize, " + "OpBuilder &b, OperationState &result, Type resultType, Value arraySize, " "unsigned alignment", [{ if (alignment == 0) return build(b, result, resultType, arraySize, IntegerAttr()); - build(b, result, resultType, arraySize, b->getI64IntegerAttr(alignment)); + build(b, result, resultType, arraySize, b.getI64IntegerAttr(alignment)); }]>]; let parser = [{ return parseAllocaOp(parser, result); }]; let printer = [{ printAllocaOp(p, *this); }]; @@ -257,7 +258,7 @@ def LLVM_GEPOp : LLVM_OneResultOp<"getelementptr", [NoSideEffect]>, def LLVM_LoadOp : LLVM_OneResultOp<"load">, Arguments<(ins LLVM_Type:$addr)>, LLVM_Builder<"$res = builder.CreateLoad($addr);"> { let builders = [OpBuilder< - "Builder *b, OperationState &result, Value addr", + "OpBuilder &b, OperationState &result, Value addr", [{ auto type = addr.getType().cast().getPointerElementTy(); build(b, result, type, addr); @@ -311,7 +312,7 @@ def LLVM_InvokeOp : LLVM_Op<"invoke", [ AnySuccessor:$unwindDest); let builders = [OpBuilder< - "Builder *b, OperationState &result, ArrayRef tys, " + "OpBuilder &b, OperationState &result, ArrayRef tys, " "FlatSymbolRefAttr callee, ValueRange ops, Block* normal, " "ValueRange normalOps, Block* unwind, ValueRange unwindOps", [{ @@ -319,7 +320,7 @@ def LLVM_InvokeOp : LLVM_Op<"invoke", [ build(b, result, tys, ops, normal, normalOps, unwind, unwindOps); }]>, OpBuilder< - "Builder *b, OperationState &result, ArrayRef tys, " + "OpBuilder &b, OperationState &result, ArrayRef tys, " "ValueRange ops, Block* normal, " "ValueRange normalOps, Block* unwind, ValueRange unwindOps", [{ @@ -344,13 +345,13 @@ def LLVM_CallOp : LLVM_Op<"call">, Variadic)>, Results<(outs Variadic)> { let builders = [OpBuilder< - "Builder *builder, OperationState &result, LLVMFuncOp func," + "OpBuilder &builder, OperationState &result, LLVMFuncOp func," "ValueRange operands, ArrayRef attributes = {}", [{ LLVMType resultType = func.getType().getFunctionResultType(); if (!resultType.isVoidTy()) result.addTypes(resultType); - result.addAttribute("callee", builder->getSymbolRefAttr(func)); + result.addAttribute("callee", builder.getSymbolRefAttr(func)); result.addAttributes(attributes); result.addOperands(operands); }]>]; @@ -369,7 +370,7 @@ def LLVM_ExtractElementOp : LLVM_OneResultOp<"extractelement", [NoSideEffect]>, $res = builder.CreateExtractElement($vector, $position); }]; let builders = [OpBuilder< - "Builder *b, OperationState &result, Value vector, Value position," + "OpBuilder &b, OperationState &result, Value vector, Value position," "ArrayRef attrs = {}">]; let parser = [{ return parseExtractElementOp(parser, result); }]; let printer = [{ printExtractElementOp(p, *this); }]; @@ -400,7 +401,7 @@ def LLVM_InsertValueOp : LLVM_OneResultOp<"insertvalue", [NoSideEffect]>, extractPosition($position)); }]; let builders = [OpBuilder< - "Builder *b, OperationState &result, Value container, Value value, " + "OpBuilder &b, OperationState &result, Value container, Value value, " "ArrayAttr position", [{ build(b, result, container.getType(), container, value, position); @@ -417,7 +418,7 @@ def LLVM_ShuffleVectorOp $res = builder.CreateShuffleVector($v1, $v2, mask); }]; let builders = [OpBuilder< - "Builder *b, OperationState &result, Value v1, Value v2, " + "OpBuilder &b, OperationState &result, Value v1, Value v2, " "ArrayAttr mask, ArrayRef attrs = {}">]; let verifier = [{ auto wrappedVectorType1 = v1().getType().cast(); @@ -442,7 +443,7 @@ def LLVM_SelectOp LLVM_Builder< "$res = builder.CreateSelect($condition, $trueValue, $falseValue);"> { let builders = [OpBuilder< - "Builder *b, OperationState &result, Value condition, Value lhs, " + "OpBuilder &b, OperationState &result, Value condition, Value lhs, " "Value rhs", [{ build(b, result, lhs.getType(), condition, lhs, rhs); }]>]; @@ -479,13 +480,13 @@ def LLVM_CondBrOp : LLVM_TerminatorOp<"cond_br", }]; let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value condition," + "OpBuilder &builder, OperationState &result, Value condition," "Block *trueDest, ValueRange trueOperands," "Block *falseDest, ValueRange falseOperands", [{ build(builder, result, condition, trueOperands, falseOperands, trueDest, falseDest); }]>, OpBuilder< - "Builder *builder, OperationState &result, Value condition," + "OpBuilder &builder, OperationState &result, Value condition," "Block *trueDest, Block *falseDest, ValueRange falseOperands = {}", [{ build(builder, result, condition, trueDest, ValueRange(), falseDest, falseOperands); @@ -575,13 +576,13 @@ def LLVM_AddressOfOp : LLVM_OneResultOp<"mlir.addressof">, Arguments<(ins FlatSymbolRefAttr:$global_name)> { let builders = [ - OpBuilder<"Builder *builder, OperationState &result, LLVMType resType, " + OpBuilder<"OpBuilder &builder, OperationState &result, LLVMType resType, " "StringRef name, ArrayRef attrs = {}", [{ - result.addAttribute("global_name", builder->getSymbolRefAttr(name)); + result.addAttribute("global_name", builder.getSymbolRefAttr(name)); result.addAttributes(attrs); result.addTypes(resType);}]>, - OpBuilder<"Builder *builder, OperationState &result, GlobalOp global, " + OpBuilder<"OpBuilder &builder, OperationState &result, GlobalOp global, " "ArrayRef attrs = {}", [{ build(builder, result, global.getType().getPointerTo(global.addr_space().getZExtValue()), @@ -628,7 +629,7 @@ def LLVM_GlobalOp let regions = (region AnyRegion:$initializer); let builders = [ - OpBuilder<"Builder *builder, OperationState &result, LLVMType type, " + OpBuilder<"OpBuilder &builder, OperationState &result, LLVMType type, " "bool isConstant, Linkage linkage, StringRef name, " "Attribute value, unsigned addrSpace = 0, " "ArrayRef attrs = {}"> @@ -676,10 +677,10 @@ def LLVM_LLVMFuncOp let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, StringRef name, " + OpBuilder<"OpBuilder &builder, OperationState &result, StringRef name, " "LLVMType type, LLVM::Linkage linkage = LLVM::Linkage::External, " "ArrayRef attrs = {}, " - "ArrayRef argAttrs = {}"> + "ArrayRef argAttrs = {}"> ]; let extraClassDeclaration = [{ diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index b442d48e0e673..58ee259c5c498 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -87,5 +87,19 @@ def ROCDL_GridDimYOp : ROCDL_DeviceFunctionOp<"grid.dim.y", def ROCDL_GridDimZOp : ROCDL_DeviceFunctionOp<"grid.dim.z", "__ockl_get_global_size", 2>; +//===----------------------------------------------------------------------===// +// Synchronization primitives + +def ROCDL_BarrierOp : ROCDL_Op<"barrier"> { + string llvmBuilder = [{ + llvm::LLVMContext &llvmContext = builder.getContext(); + builder.CreateFence(llvm::AtomicOrdering::Release, + llvmContext.getOrInsertSyncScopeID("workgroup")); + createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier); + builder.CreateFence(llvm::AtomicOrdering::Acquire, + llvmContext.getOrInsertSyncScopeID("workgroup")); + }]; + let assemblyFormat = "attr-dict"; +} #endif // ROCDLIR_OPS diff --git a/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt b/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt index 076c2dfbccb5c..66ac74515dddd 100644 --- a/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt @@ -1,5 +1,4 @@ add_subdirectory(IR) -add_subdirectory(Transforms) set(LLVM_TARGET_DEFINITIONS Passes.td) mlir_tablegen(Passes.h.inc -gen-pass-decls) diff --git a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h index 1608a1ad4ab16..5180c58b64986 100644 --- a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h @@ -18,7 +18,6 @@ #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/EDSC/Builders.h" -#include "mlir/EDSC/Intrinsics.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/Builders.h" diff --git a/mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h b/mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h index 2c506a87dc5ef..3575d55c3d534 100644 --- a/mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h +++ b/mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h @@ -22,9 +22,9 @@ struct FoldedValueBuilder { // Builder-based template FoldedValueBuilder(OperationFolder *folder, Args... args) { - value = folder ? folder->create(ScopedContext::getBuilder(), + value = folder ? folder->create(ScopedContext::getBuilderRef(), ScopedContext::getLocation(), args...) - : ScopedContext::getBuilder().create( + : ScopedContext::getBuilderRef().create( ScopedContext::getLocation(), args...); } diff --git a/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt index 6f25c2049272d..fe67dcb7a6609 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt @@ -29,6 +29,7 @@ add_linalg_ods_gen(LinalgNamedStructuredOpsSpec.tc LinalgNamedStructuredOps) # Provide a short name for all external dependency that needs to # include Linalg in ODS add_custom_target(LinalgOdsGen DEPENDS MLIRLinalgNamedStructuredOpsIncGen) +add_dependencies(mlir-headers LinalgOdsGen) add_mlir_dialect(LinalgOps linalg) @@ -40,8 +41,10 @@ mlir_tablegen(LinalgStructuredOps.h.inc -gen-op-decls) mlir_tablegen(LinalgStructuredOps.cpp.inc -gen-op-defs) add_public_tablegen_target(MLIRLinalgStructuredOpsIncGen) add_dependencies(MLIRLinalgStructuredOpsIncGen LinalgOdsGen) - +add_dependencies(mlir-headers MLIRLinalgStructuredOpsIncGen) + set(LLVM_TARGET_DEFINITIONS LinalgStructuredOpsInterface.td) mlir_tablegen(LinalgStructuredOpsInterfaces.h.inc -gen-op-interface-decls) mlir_tablegen(LinalgStructuredOpsInterfaces.cpp.inc -gen-op-interface-defs) add_public_tablegen_target(MLIRLinalgStructuredOpsInterfaceIncGen) +add_dependencies(mlir-headers MLIRLinalgStructuredOpsInterfaceIncGen) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td index c1b34cc690cbe..5fe464231d12c 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -49,10 +49,10 @@ def Linalg_RangeOp : ```` }]; let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value min, Value max, " + "OpBuilder &builder, OperationState &result, Value min, Value max, " "Value step", [{ - auto rangeType = RangeType::get(builder->getContext()); + auto rangeType = RangeType::get(builder.getContext()); build(builder, result, rangeType, min, max, step); }]>]; @@ -66,12 +66,12 @@ class Linalg_ReshapeLikeOp : let builders = [ // Builder for a contracting reshape whose result type is computed from // `src` and `reassociation`. - OpBuilder<"Builder *b, OperationState &result, Value src, " + OpBuilder<"OpBuilder &b, OperationState &result, Value src, " "ArrayRef> reassociation, " "ArrayRef attrs = {}">, // Builder for a reshape whose result type is passed explicitly. This may be // either a contracting or expanding reshape. - OpBuilder<"Builder *b, OperationState &result, Type resultType, Value src," + OpBuilder<"OpBuilder &b, OperationState &result, Type resultType, Value src," "ArrayRef> reassociation, " "ArrayRef attrs = {}">]; @@ -227,7 +227,7 @@ def Linalg_SliceOp : Linalg_Op<"slice", [ }]; let builders = [OpBuilder< - "Builder *b, OperationState &result, Value base, " + "OpBuilder &b, OperationState &result, Value base, " "ValueRange indexings">]; let extraClassDeclaration = [{ @@ -271,7 +271,7 @@ def Linalg_TransposeOp : Linalg_Op<"transpose", [NoSideEffect]>, }]; let builders = [OpBuilder< - "Builder *b, OperationState &result, Value view, " + "OpBuilder &b, OperationState &result, Value view, " "AffineMapAttr permutation, ArrayRef attrs = {}">]; let verifier = [{ diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index e7b11df3141ab..074d659778d29 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -117,7 +117,7 @@ def CopyOp : LinalgStructured_Op<"copy", [NInputs<1>, NOutputs<1>]> { // TODO(ntv) this should go away once the usage of OptionalAttr triggers // emission of builders with default arguments left unspecified. let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value input, Value output", [{ + "OpBuilder &builder, OperationState &result, Value input, Value output", [{ return build( builder, result, input, output, AffineMapAttr(), AffineMapAttr()); }]>]; @@ -260,7 +260,7 @@ def MatmulOp : LinalgStructured_Op<"matmul", [NInputs<2>, NOutputs<1>]> { /// OptionalAttr:$strides /// OptionalAttr:$dilations /// OptionalAttr:$padding -/// `stirdes` denotes the step of each window along the dimension. +/// `strides` denotes the step of each window along the dimension. class PoolingBase_Op props> : LinalgStructured_Op { let description = [{ diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h index 1c427faff6933..b7bba5a310113 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h @@ -351,10 +351,11 @@ template class NamedStructuredOpTraits : public OpTrait::TraitBase { public: - llvm::Optional> referenceIterators(); - llvm::Optional> referenceIndexingMaps(); - std::function)> - emitScalarImplementation(); + static SmallVector referenceIterators(TypeRange inputTypes, + TypeRange outputTypes); + + static SmallVector referenceIndexingMaps(TypeRange inputTypes, + TypeRange outputTypes); }; } // namespace linalg diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h index 5c4868c4c870e..90f894fccced2 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -50,6 +50,11 @@ std::unique_ptr> createConvertLinalgToParallelLoopsPass(); /// Placeholder for now, this is NYI. std::unique_ptr> createConvertLinalgToAffineLoopsPass(); +/// Create a pass to convert Linalg operations which work on tensors to use +/// buffers instead. +std::unique_ptr> +createConvertLinalgOnTensorsToBuffersPass(); + /// Patterns for fusing linalg operation on tensors. void populateLinalgTensorOpsFusionPatterns(MLIRContext *context, OwningRewritePatternList &patterns); diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index 9f60b274817e0..850f381dd4efa 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -32,6 +32,12 @@ def LinalgLowerToLoops : FunctionPass<"convert-linalg-to-loops"> { let constructor = "mlir::createConvertLinalgToLoopsPass()"; } +def LinalgOnTensorsToBuffers : Pass<"convert-linalg-on-tensors-to-buffers", "ModuleOp"> { + let summary = "Convert the Linalg operations which work on tensor-type " + "operands or results to use buffers instead"; + let constructor = "mlir::createConvertLinalgOnTensorsToBuffersPass()"; +} + def LinalgLowerToParallelLoops : FunctionPass<"convert-linalg-to-parallel-loops"> { let summary = "Lower the operations from the linalg dialect into parallel " diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/Linalg/Transforms/CMakeLists.txt deleted file mode 100644 index 932a213980f45..0000000000000 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS LinalgTransformPatterns.td) -mlir_tablegen(TestLinalgMatmulToVectorPatterns.h.inc -gen-rewriters) -mlir_tablegen(LinalgTransformPatterns.h.inc -gen-rewriters) -add_public_tablegen_target(MLIRLinalgTransformPatternsIncGen) - -# Including Linalg in TableGen requires to depends on generated files -add_dependencies(MLIRLinalgTransformPatternsIncGen LinalgOdsGen) - diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td b/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td deleted file mode 100644 index a51352cd4d0e0..0000000000000 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td +++ /dev/null @@ -1,123 +0,0 @@ -//===- LinalgPatterns.td - Linalg transformation patterns --*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is the pattern definition file for declarative Linalg transformation. -// -//===----------------------------------------------------------------------===// - -#ifndef LINALG_TRANSFORMS -#define LINALG_TRANSFORMS - -include "mlir/Dialect/Linalg/IR/LinalgOps.td" -include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.td" -include "mlir/Dialect/Affine/IR/AffineOps.td" - -def HasNoLinalgTransformMarker : CPred<[{ - !op.getAttrOfType(LinalgTransforms::kLinalgTransformMarker) -}]>; - -class HasLinalgTransformMarker : CPred<[{ - op.getAttrOfType( - LinalgTransforms::kLinalgTransformMarker) && - op.getAttrOfType( - LinalgTransforms::kLinalgTransformMarker).getValue() == "}] # str # [{"}]>; - -class IsProducedByOpOfType : - CPred<"isProducedByOpOfType<" # str # ">(op, $0)">; - -class AffineMapDomainHasDim : CPred<[{ - op.getAttrOfType(getIndexingMapsAttrName()).getValue()[0]. - cast().getValue().getNumDims() ==}] # n # [{}]>; - -class HasOperandsOfType: CPred<[{ - llvm::any_of(op.getOperands(), - [](Value v) { - return dyn_cast_or_null<}] # type # [{>(v.getDefiningOp()); - }) -}]>; - -//===----------------------------------------------------------------------===// -// Linalg fusion patterns. -//===----------------------------------------------------------------------===// -// -// In the future, tile sizes should be derived from op properties + machine -// description but we do not need to wait on this to start having useful -// patterns. -class TileAndFuseLinalgOp< - list sizes, list operandIndices, string value> : NativeCodeCall< - "if (failed(tileAndFuseLinalgOpAndSetMarker($_builder, op, {" # - StrJoinInt.result # "}, {" # StrJoinInt.result # "}," # - " \"" # value # "\")))" # - " return failure();">; - -//===----------------------------------------------------------------------===// -// Linalg tiling patterns. -//===----------------------------------------------------------------------===// -// -// In the future, tile sizes should be derived from op properties + machine -// description but we do not need to wait on this to start having useful -// patterns. -// `permutation` is an optional parameter to specify the ordering of the -// tiled loops. If provided, it must be a list of integers with the same number -// of elements as `sizes`. -class TileLinalgOp sizes, string value, list permutation=[]> : - NativeCodeCall< - "if (failed(tileLinalgOpAndSetMarker($_builder, op, {" # - StrJoinInt.result # "}, \"" # value # "\", {" # - StrJoinInt.result # "})))" # - " return failure();">; - -//===----------------------------------------------------------------------===// -// Linalg to loop patterns. -//===----------------------------------------------------------------------===// -class LinalgOpToLoops : NativeCodeCall< - "if (failed(linalgOpToLoops<" # OpType # ">($_builder, op))) " # - " return failure();">; - -class LinalgOpToParallelLoops : NativeCodeCall< - "if (failed(linalgOpToParallelLoops<" # OpType # ">($_builder, op))) " # - " return failure();">; - -class LinalgOpToAffineLoops : NativeCodeCall< - "if (failed(linalgOpToAffineLoops<" # OpType # ">($_builder, op))) " # - " return failure();">; - -//===----------------------------------------------------------------------===// -// Linalg to vector patterns precondition and DRR. -//===----------------------------------------------------------------------===// -def PreconditionVectorizeLinalgOp : CPred< - "succeeded(vectorizeLinalgOpPrecondition(op))">; -def VectorizeLinalgOp : NativeCodeCall< - "vectorizeLinalgOp($_builder, op)">; - - -//===----------------------------------------------------------------------===// -// Linalg generic permutation patterns precondition and DRR. -//===----------------------------------------------------------------------===// -class PreconditionPermuteGenericLinalgOp permutation> : CPred< - "succeeded(permuteGenericLinalgOpPrecondition(op, {" # - StrJoinInt.result # "}))">; -class PermuteGenericLinalgOp permutation, string value> : - NativeCodeCall< - "permuteGenericLinalgOp($_builder, op, {" # StrJoinInt.result # - "}, \"" # value # "\")">; - -//===----------------------------------------------------------------------===// -// Linalg promote subview operands precondition and DRR. -//===----------------------------------------------------------------------===// -def PreconditionPromoteSubviewsLinalgOp : CPred< - "succeeded(promoteSubviewsLinalgOpPrecondition(op))">; -def PromoteSubviewsLinalgOp : NativeCodeCall< - "promoteSubviewsLinalgOp($_builder, op)">; - -class PromoteSelectedSubviewsLinalgOp operands, string marker="", - int alignment=0> : - NativeCodeCall<"promoteSelectedSubviewsLinalgOpAndSetMarker($_builder, op, {" # - StrJoinInt.result # "}, \"" # marker # "\", " # alignment # ")">; - -#endif // LINALG_TRANSFORMS diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h deleted file mode 100644 index 78d588aaf00ba..0000000000000 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h +++ /dev/null @@ -1,137 +0,0 @@ -//===- LinalgTransforms.h - Linalg transformations as patterns --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef DIALECT_LINALG_TRANSFORMS_LINALGTRANSFORMS_H_ -#define DIALECT_LINALG_TRANSFORMS_LINALGTRANSFORMS_H_ - -#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" -#include "mlir/Dialect/Linalg/IR/LinalgOps.h" -#include "mlir/Dialect/Linalg/Passes.h" -#include "mlir/Dialect/Linalg/Utils/Utils.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Pass/Pass.h" - -#include "llvm/ADT/STLExtras.h" - -namespace mlir { -namespace linalg { - -// Marker used as attribute name in generated Linalg rewriting transformations. -struct LinalgTransforms { - static const StringLiteral kLinalgTransformMarker; -}; - -namespace detail { -// Implementation detail of isProducedByOpOfType avoids the need for explicit -// template instantiations. -bool isProducedByOpOfTypeImpl(Operation *consumerOp, Value consumedView, - function_ref isaOpType); -} // namespace detail - -// Returns true if the `consumedView` value use in `consumerOp` is produced by -// an op of type `OpTy`. This is used to implement use-def type information on -// buffers. -template -bool isProducedByOpOfType(Operation *consumerOp, Value consumedView) { - return detail::isProducedByOpOfTypeImpl( - consumerOp, consumedView, [](Operation *op) { return isa(op); }); -} - -//////////////////////////////////////////////////////////////////////////////// -// The following Declarative Rewrite Rule (DRR) helpers are used in rewrite -// patterns. As such, they must not call into `rewriter.erase/replace` APIs and -// it is the responsibility of the enclosing PatternRewriter to erase on -// success. -//////////////////////////////////////////////////////////////////////////////// - -/// Tiles `op` by `sizes` permuting the loops according to `permutation` and -/// sets the attribute `kLinalgTransformMarker` to `linalgMarker`. The -/// permutation is expressed as a list of integers that specify the new ordering -/// of the loop nest (using loop.for operations). The length of `permutation` -/// must be equal to the length of `tileSizes`. -/// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with -/// `permutation = [1,2,0]`. All values in `permutation` must be -/// integers, in the range 0..`tileSizes.size()` without duplications -/// (i.e. `[1,1,2]` is an invalid permutation). An empty list -/// states for the identity permutation. -LogicalResult tileLinalgOpAndSetMarker(PatternRewriter &rewriter, Operation *op, - ArrayRef sizes, - StringRef linalgMarker, - ArrayRef permutation); - -/// Tiles ops similar to `tileLinalgOpAndSetMarker` but generates loop.parallel -/// operations instead. -LogicalResult tileLinalgOpToParallelLoopsAndSetMarker( - PatternRewriter &rewriter, Operation *op, ArrayRef sizes, - StringRef linalgMarker, ArrayRef permutation); - -/// Tiles `op` by `sizes`, fuses the producers of `operandIndicesToFuse` and -/// sets the attribute `kLinalgTransformMarker` to `linalgMarker`. -LogicalResult tileAndFuseLinalgOpAndSetMarker( - PatternRewriter &rewriter, Operation *op, ArrayRef sizes, - ArrayRef operandIndicesToFuse, StringRef linalgMarker); - -/// Tiles ops similar to `tileAndFuseLinalgOpAndSetMarker` but generates -/// loop.parallel operations instead. -LogicalResult tileAndFuseLinalgOpToParallelLoopsAndSetMarker( - PatternRewriter &rewriter, Operation *op, ArrayRef sizes, - ArrayRef operandIndicesToFuse, StringRef linalgMarker); - -using LinalgLoops = SmallVector; - -/// Emits a loop nest of with the proper body for `op`. -template -Optional linalgLowerOpToLoops(PatternRewriter &rewriter, - Operation *op); - -/// Emits a loop nest of `loop.for` with the proper body for `op`. -template -LogicalResult linalgOpToLoops(PatternRewriter &rewriter, Operation *op); - -/// Emits a loop nest of `loop.parallel` with the proper body for `op`. -template -LogicalResult linalgOpToParallelLoops(PatternRewriter &rewriter, Operation *op); - -/// Emits a loop nest of `affine.for` with the proper body for `op`. -template -LogicalResult linalgOpToAffineLoops(PatternRewriter &rewriter, Operation *op); - -/// Rewrite a linalg.generic into a suitable vector.contraction op. -LogicalResult vectorizeLinalgOpPrecondition(Operation *op); -SmallVector vectorizeLinalgOp(PatternRewriter &rewriter, - Operation *op); - -/// Emits a `generic` or `indexed_generic` operation with the `indexing_maps` -/// and `iterator_types` permutated according to `permutation`. -LogicalResult -permuteGenericLinalgOpPrecondition(Operation *op, - ArrayRef permutation); -SmallVector permuteGenericLinalgOp(PatternRewriter &rewriter, - Operation *op, - ArrayRef permutation, - StringRef linalgMarker); - -/// Promote std.subviews feeding linalg operations. -LogicalResult promoteSubviewsLinalgOpPrecondition(Operation *op); -SmallVector promoteSubviewsLinalgOp(PatternRewriter &rewriter, - Operation *op); - -/// Similar to `promoteSubviewsLinalgOp` but only tries to promote -/// the views corresponding to the operands specified in -/// `operandIndicesToPromote`. Generated allocations are memory-aligned -/// according to the `alignment` parameter. -/// If linalgMarker is specified and the transformation is successfull -/// sets the attribute `kLinalgTransformMarker` to `linalgMarker`. -SmallVector promoteSelectedSubviewsLinalgOpAndSetMarker( - PatternRewriter &rewriter, Operation *op, - ArrayRef operandIndicesToPromote, StringRef linalgMarker = "", - int64_t alignment = 0); -} // namespace linalg -} // namespace mlir - -#endif // DIALECT_LINALG_TRANSFORMS_LINALGTRANSFORMS_H_ diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h new file mode 100644 index 0000000000000..b67ff776ea4a9 --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -0,0 +1,375 @@ +//===- Transforms.h - Linalg transformations as patterns --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H_ +#define DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H_ + +#include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/IR/PatternMatch.h" + +namespace mlir { +namespace linalg { + +//===----------------------------------------------------------------------===// +// Transformations exposed as function calls. +//===----------------------------------------------------------------------===// +using LinalgLoops = SmallVector; + +struct TiledLinalgOp { + LinalgOp op; + SmallVector loops; +}; + +/// Performs standalone tiling of a single LinalgOp by `tileSizes`. +/// and permute the loop nest according to `interchangeVector` +/// The permutation is expressed as a list of integers that specify +/// the new ordering of the loop nest. The length of `interchangeVector` +/// must be equal to the length of `tileSizes`. +/// An empty vector is interpreted as the identity permutation and the +/// transformation returns early. +/// +/// When non-null, the optional pointer `folder` is used to call into the +/// `createAndFold` builder method. If `folder` is null, the regular `create` +/// method is called. +/// +/// Returns a struct containing the tiled loops in the specified order +/// and the cloned op if successful, llvm::None otherwise. +/// +/// E.g. the permutation `(i,j,k) -> (j,k,i)` is expressed by +/// `interchangeVector = [1,2,0]`. All values in `interchangeVector` must be +/// integers, in the range 0..`tileSizes.size()` without duplications +/// (i.e. `[1,1,2]` is an invalid permutation). +Optional tileLinalgOp(OpBuilder &b, LinalgOp op, + ArrayRef tileSizes, + ArrayRef interchangeVector = {}, + OperationFolder *folder = nullptr); +Optional +tileLinalgOpToParallelLoops(OpBuilder &b, LinalgOp op, + ArrayRef tileSizes, + ArrayRef interchangeVector = {}, + OperationFolder *folder = nullptr); + +/// Performs standalone tiling of a single LinalgOp by constant `tileSizes`. +/// See `tileLinalgOp(... ArrayRef tileSizes,)` for more details +Optional tileLinalgOp(OpBuilder &b, LinalgOp op, + ArrayRef tileSizes, + ArrayRef interchangeVector = {}, + OperationFolder *folder = nullptr); +Optional +tileLinalgOpToParallelLoops(OpBuilder &b, LinalgOp op, + ArrayRef tileSizes, + ArrayRef interchangeVector = {}, + OperationFolder *folder = nullptr); + +/// Interchanges the `iterator_types` and `iterator_maps` dimensions of `op`. +/// This is an in-place transformation controlled by `interchangeVector`. +/// An empty vector is interpreted as the identity permutation and the +/// transformation returns early. +/// +/// E.g. the permutation `(i,j,k) -> (j,k,i)` is expressed with +/// `interchangeVector = [1,2,0]`. All values in `interchangeVector` must be +/// integers, in the range 0..`op.rank` without duplications +/// (i.e. `[1,1,2]` is an invalid permutation). +LinalgOp interchange(LinalgOp op, ArrayRef interchangeVector); + +/// Promotes the `subViews` into a new buffer allocated at the insertion point +/// `b`. Promotion occurs in 3 steps: +/// 1. Create a new buffer for a full tile (i.e. not clipped at the boundary). +/// 2. Take a full view on the buffer and `linalg.fill` it with zeros (use +/// float zero for now). +/// 3. Take a partial slice of the full view in step 2. and copy into it. +/// Infers statically sized buffers from subViews unless `dynamicBuffers` is +/// true. +/// +/// Returns a list of PromotionInfo which hold the promoted buffer and the +/// full and partial views indexing into the buffer. +// TODO: revisit dynamicBuffers option. +LinalgOp promoteSubViewOperands(OpBuilder &b, LinalgOp op, + llvm::SetVector subViews, + bool dynamicBuffers = false, + int64_t alignment = 0, + OperationFolder *folder = nullptr); + +/// Emit a suitable vector form for a Linalg op with fully static shape. +void vectorizeLinalgOp(OpBuilder &builder, Operation *op); + +/// Emits a loop nest of `LoopTy` with the proper body for `op`. +template +Optional linalgLowerOpToLoops(OpBuilder &builder, Operation *op); + +/// Emits a loop nest of `loop.for` with the proper body for `op`. +template +LogicalResult linalgOpToLoops(OpBuilder &builder, Operation *op); + +/// Emits a loop nest of `loop.parallel` with the proper body for `op`. +template +LogicalResult linalgOpToParallelLoops(OpBuilder &builder, Operation *op); + +/// Emits a loop nest of `affine.for` with the proper body for `op`. +template +LogicalResult linalgOpToAffineLoops(OpBuilder &builder, Operation *op); + +//===----------------------------------------------------------------------===// +// Preconditions that ensure the corresponding transformation suceeds and can be +// applied as a rewrite pattern. +//===----------------------------------------------------------------------===// +/// Emits a `generic` or `indexed_generic` operation with the `indexing_maps` +/// and `iterator_types` permutated according to `permutation`. +LogicalResult +interchangeGenericLinalgOpPrecondition(Operation *op, + ArrayRef interchangeVector); + +/// Promote std.subviews feeding linalg operations. +LogicalResult promoteSubviewsLinalgOpPrecondition( + Operation *op, Optional> operandIndicesToPromote = None); + +/// Rewrite a linalg.generic into a suitable vector.contraction op. +LogicalResult vectorizeLinalgOpPrecondition(Operation *op); + +//===----------------------------------------------------------------------===// +// Transformations exposed as rewrite patterns. +//===----------------------------------------------------------------------===// +// Marker used as attribute name in generated Linalg rewriting transformations. +struct LinalgTransforms { + static const StringLiteral kLinalgTransformMarker; +}; + +/// Helper class to control common attribute matching and setting behavior. +struct LinalgMarker { + LinalgMarker(ArrayRef matchDisjunction = {}, + Optional replacement = None); + LinalgMarker(ArrayRef matchDisjunction, StringRef replacement); + LogicalResult checkAndNotify(PatternRewriter &rewriter, Operation *op) const; + void replaceLinalgMarker(PatternRewriter &rewriter, Operation *op) const; + +private: + SmallVector matchDisjunction; + Optional replacement; +}; + +/// +/// Linalg tiling patterns. +/// +/// Apply the `tileLinalgOp` transformation as a pattern. +/// `marker` controls LinalgTransformMarker matching and update when specified. +/// See `tileLinalgOp` for more details. +enum class LinalgTilingLoopType { + Loops = 0, + AffineLoops = 1, + ParallelLoops = 2 +}; +struct LinalgTilingOptions { + /// The tile sizes by which to tile. + SmallVector tileSizes{}; + LinalgTilingOptions &setTileSizes(ArrayRef ts) { + tileSizes.assign(ts.begin(), ts.end()); + return *this; + } + /// The interchange vector to reorder the tiled loops. + SmallVector interchangeVector{}; + LinalgTilingOptions &setInterchange(ArrayRef interchange) { + interchangeVector.assign(interchange.begin(), interchange.end()); + return *this; + } + /// The type of tile loops to generate. + LinalgTilingLoopType loopType{LinalgTilingLoopType::Loops}; + LinalgTilingOptions &setLoopType(LinalgTilingLoopType lt) { + loopType = lt; + return *this; + } +}; + +struct LinalgBaseTilingPattern : public RewritePattern { + LinalgBaseTilingPattern(StringRef opName, MLIRContext *context, + LinalgTilingOptions options, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1); + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override; + +private: + /// LinalgTransformMarker handles special attribute manipulations. + LinalgMarker marker; + /// Options to control tiling; + LinalgTilingOptions options; +}; + +template +struct LinalgTilingPattern : public LinalgBaseTilingPattern { + LinalgTilingPattern(MLIRContext *context, LinalgTilingOptions options, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1) + : LinalgBaseTilingPattern(OpTy::getOperationName(), context, options, + marker, benefit) {} +}; + +/// +/// Linalg interchange patterns. +/// +/// Apply the `interchange` transformation as a pattern. +/// `marker` controls LinalgTransformMarker matching and update when specified. +/// See `interchange` for more details. +struct LinalgBaseInterchangePattern : public RewritePattern { + LinalgBaseInterchangePattern(StringRef opName, MLIRContext *context, + ArrayRef interchangeVector, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1); + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override; + +private: + /// LinalgTransformMarker handles special attribute manipulations. + LinalgMarker marker; + /// The interchange vector to reorder the iterators and indexing_maps dims. + SmallVector interchangeVector; +}; + +template +struct LinalgInterchangePattern : public LinalgBaseInterchangePattern { + LinalgInterchangePattern(MLIRContext *context, + ArrayRef interchangeVector, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1) + : LinalgBaseInterchangePattern(OpTy::getOperationName(), context, + interchangeVector, marker, benefit) {} +}; + +/// +/// Linalg promotion patterns. +/// +/// Apply the `promoteSubViewOperands` transformation as a pattern. +/// `marker` controls LinalgTransformMarker matching and update when specified. +/// See `promoteSubViewOperands` for more details. +struct LinalgBasePromotionPattern : public RewritePattern { + LinalgBasePromotionPattern(StringRef opName, MLIRContext *context, + ArrayRef operandsToPromote = {}, + unsigned alignment = 0, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1); + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override; + +private: + /// LinalgTransformMarker handles special attribute manipulations. + LinalgMarker marker; + /// Indices of subViews to promote. + SmallVector operandsToPromote; + /// Alignment of promoted buffer. + unsigned alignment; +}; + +template +struct LinalgPromotionPattern : public LinalgBasePromotionPattern { + LinalgPromotionPattern(MLIRContext *context, + ArrayRef operandsToPromote = {}, + unsigned alignment = 0, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1) + : LinalgBasePromotionPattern(OpTy::getOperationName(), context, + operandsToPromote, alignment, marker, + benefit) {} + LinalgPromotionPattern(MLIRContext *context, + ArrayRef operandsToPromote, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1) + : LinalgPromotionPattern(context, operandsToPromote, 0, marker, benefit) { + } + LinalgPromotionPattern(MLIRContext *context, unsigned alignment, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1) + : LinalgPromotionPattern(context, {}, alignment, marker, benefit) {} + LinalgPromotionPattern(MLIRContext *context, LinalgMarker marker, + PatternBenefit benefit = 1) + : LinalgPromotionPattern(context, {}, 0, marker, benefit) {} +}; + +/// +/// Linalg vectorization patterns. +/// +/// Apply the `vectorizeLinalgOp` transformation as a pattern. +/// `marker` controls LinalgTransformMarker matching and update when specified. +/// See `vectorizeLinalgOp` for more details. +struct LinalgBaseVectorizationPattern : public RewritePattern { + LinalgBaseVectorizationPattern(StringRef opName, MLIRContext *context, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1); + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override; + +private: + /// LinalgTransformMarker handles special attribute manipulations. + LinalgMarker marker; +}; + +template +struct LinalgVectorizationPattern : public LinalgBaseVectorizationPattern { + LinalgVectorizationPattern(MLIRContext *context, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1) + : LinalgBaseVectorizationPattern(OpTy::getOperationName(), context, + marker, benefit) {} +}; + +/// +/// Linalg lowering patterns. +/// +/// Apply the `linalgLowerOpToLoops` transformation as a pattern. +/// `marker` controls LinalgTransformMarker matching and update when specified. +/// See `linalgLowerOpToLoops` for more details. +enum class LinalgLoweringType { + LibraryCall = 0, + Loops = 1, + AffineLoops = 2, + ParallelLoops = 3 +}; +template +struct LinalgLoweringPattern : public RewritePattern { + LinalgLoweringPattern(MLIRContext *context, LinalgLoweringType loweringType, + LinalgMarker marker = LinalgMarker(), + PatternBenefit benefit = 1) + : RewritePattern(OpTy::getOperationName(), {}, benefit, context), + marker(marker), loweringType(loweringType) {} + // TODO: Move implementation to .cpp once named ops are auto-generated. + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override { + LinalgOp linalgOp = dyn_cast(op); + if (!linalgOp) + return failure(); + if (failed(marker.checkAndNotify(rewriter, linalgOp))) + return failure(); + if (failed(promoteSubviewsLinalgOpPrecondition(op))) + return failure(); + + if (loweringType == LinalgLoweringType::LibraryCall) { + // TODO: Move lowering to library calls here. + return failure(); + } else if (loweringType == LinalgLoweringType::Loops) { + if (failed(linalgOpToLoops(rewriter, op))) + return failure(); + } else if (loweringType == LinalgLoweringType::AffineLoops) { + if (failed(linalgOpToAffineLoops(rewriter, op))) + return failure(); + } else if (failed(linalgOpToParallelLoops(rewriter, op))) { + return failure(); + } + rewriter.eraseOp(op); + return success(); + } + +private: + /// LinalgTransformMarker handles special attribute manipulations. + LinalgMarker marker; + /// Controls whether the pattern lowers to library calls, loop.for, affine.for + /// or loop.parallel. + LinalgLoweringType loweringType; +}; + +} // namespace linalg +} // namespace mlir + +#endif // DIALECT_LINALG_TRANSFORMS_TRANSFORMS_H_ diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 7dea577f0a496..1a5b6d888c0c3 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -101,63 +101,6 @@ SmallVector applyMapToValues(OpBuilder &b, Location loc, AffineMap map, ArrayRef values, OperationFolder *folder = nullptr); -struct TiledLinalgOp { - LinalgOp op; - SmallVector loops; -}; - -/// Performs standalone tiling of a single LinalgOp by `tileSizes`. -/// and permute the loop nest according to `permutation` -/// The permutation is expressed as a list of integers that specify -/// the new ordering of the loop nest. The length of `permutation` -/// must be equal to the length of `tileSizes`. -/// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with -/// `permutation = [1,2,0]`. All values in `permutation` must be -/// integers, in the range 0..`tileSizes.size()` without duplications -/// (i.e. `[1,1,2]` is an invalid permutation). An empty list -/// states for the identity permutation. -/// Returns a struct containing the tiled loops in the specified order -/// and the cloned op if successful, llvm::None otherwise. -/// When non-null, the optional pointer `folder` is used to call into the -/// `createAndFold` builder method. If `folder` is null, the regular `create` -/// method is called. -Optional tileLinalgOp(OpBuilder &b, LinalgOp op, - ArrayRef tileSizes, - ArrayRef permutation = {}, - OperationFolder *folder = nullptr); -Optional tileLinalgOpToParallelLoops( - OpBuilder &b, LinalgOp op, ArrayRef tileSizes, - ArrayRef permutation = {}, OperationFolder *folder = nullptr); - -/// Performs standalone tiling of a single LinalgOp by constant `tileSizes`. -/// and permute the loop nest according to `permutation` -/// The permutation is expressed as a list of integers that specify -/// the new ordering of the loop nest. The length of `permutation` -/// must be equal to the length of `tileSizes`. -/// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with -/// `permutation = [1,2,0]`. All values in `permutation` must be -/// integers, in the range 0..`tileSizes.size()` without duplications -/// (i.e. `[1,1,2]` is an invalid permutation). An empty list -/// states for the identity permutation. -/// Returns a struct containing the tiled loops in the specified order -/// and the cloned op if successful, llvm::None otherwise. -/// When non-null, the optional pointer `folder` is used to call into the -/// `createAndFold` builder method. If `folder` is null, the regular `create` -/// method is called. -Optional tileLinalgOp(OpBuilder &b, LinalgOp op, - ArrayRef tileSizes, - ArrayRef permutation = {}, - OperationFolder *folder = nullptr); -Optional tileLinalgOpToParallelLoops( - OpBuilder &b, LinalgOp op, ArrayRef tileSizes, - ArrayRef permutation = {}, OperationFolder *folder = nullptr); - -template -Optional tileLinalgOperation(OpBuilder &b, Operation *op, - Args... args) { - return tileLinalgOp(b, cast(op), args...); -} - struct PromotionInfo { Value buffer; Value fullLocalView; @@ -198,17 +141,6 @@ void applyPermutationToVector(SmallVector &inVec, inVec = auxVec; } -/// Prepares the SubView promotion later performed by `promoteSubViews` -/// (where most of the transformation happens). It arranges the new -/// operands for `LinalgOp op` and deallocates the new buffer(s) -/// It is the entry point for declarative transformation -/// Returns the cloned `LinalgOp` with the new operands -LinalgOp promoteSubViewOperands(OpBuilder &b, LinalgOp op, - llvm::SetVector subViews, - bool dynamicBuffers = false, - int64_t alignment = 0, - OperationFolder *folder = nullptr); - } // namespace linalg } // namespace mlir diff --git a/mlir/include/mlir/Dialect/LoopOps/CMakeLists.txt b/mlir/include/mlir/Dialect/LoopOps/CMakeLists.txt index 2627cbb542dae..90de6b097a9d6 100644 --- a/mlir/include/mlir/Dialect/LoopOps/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/LoopOps/CMakeLists.txt @@ -4,5 +4,6 @@ add_mlir_doc(LoopOps -gen-dialect-doc LoopDialect Dialects/) set(LLVM_TARGET_DEFINITIONS Passes.td) mlir_tablegen(Passes.h.inc -gen-pass-decls) add_public_tablegen_target(MLIRLoopPassIncGen) +add_dependencies(mlir-headers MLIRLoopPassIncGen) add_mlir_doc(Passes -gen-pass-doc LoopPasses ./) diff --git a/mlir/include/mlir/Dialect/LoopOps/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/LoopOps/EDSC/Intrinsics.h index 21803e2bf13b3..1a1b3a0010b20 100644 --- a/mlir/include/mlir/Dialect/LoopOps/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/LoopOps/EDSC/Intrinsics.h @@ -10,11 +10,11 @@ #define MLIR_DIALECT_LOOPOPS_EDSC_INTRINSICS_H_ #include "mlir/Dialect/LoopOps/EDSC/Builders.h" -#include "mlir/EDSC/Intrinsics.h" namespace mlir { namespace edsc { namespace intrinsics { + using loop_yield = OperationBuilder; } // namespace intrinsics diff --git a/mlir/include/mlir/Dialect/LoopOps/LoopOps.td b/mlir/include/mlir/Dialect/LoopOps/LoopOps.td index 3e0004a9642a7..f0005f6ee5d15 100644 --- a/mlir/include/mlir/Dialect/LoopOps/LoopOps.td +++ b/mlir/include/mlir/Dialect/LoopOps/LoopOps.td @@ -135,17 +135,13 @@ def ForOp : Loop_Op<"for", let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, " + OpBuilder<"OpBuilder &builder, OperationState &result, " "Value lowerBound, Value upperBound, Value step, " "ValueRange iterArgs = llvm::None"> ]; let extraClassDeclaration = [{ - Block *getBody() { return ®ion().front(); } Value getInductionVar() { return getBody()->getArgument(0); } - OpBuilder getBodyBuilder() { - return OpBuilder(getBody(), std::prev(getBody()->end())); - } Block::BlockArgListType getRegionIterArgs() { return getBody()->getArguments().drop_front(); } @@ -234,24 +230,22 @@ def IfOp : Loop_Op<"if", let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, " + OpBuilder<"OpBuilder &builder, OperationState &result, " "Value cond, bool withElseRegion">, - OpBuilder<"Builder *builder, OperationState &result, " + OpBuilder<"OpBuilder &builder, OperationState &result, " "TypeRange resultTypes, Value cond, bool withElseRegion"> ]; let extraClassDeclaration = [{ OpBuilder getThenBodyBuilder() { - assert(!thenRegion().empty() && "Unexpected empty 'then' region."); - Block &body = thenRegion().front(); - return OpBuilder(&body, - results().empty() ? std::prev(body.end()) : body.end()); + Block* body = getBody(0); + return results().empty() ? OpBuilder::atBlockTerminator(body) + : OpBuilder::atBlockEnd(body); } OpBuilder getElseBodyBuilder() { - assert(!elseRegion().empty() && "Unexpected empty 'else' region."); - Block &body = elseRegion().front(); - return OpBuilder(&body, - results().empty() ? std::prev(body.end()) : body.end()); + Block* body = getBody(1); + return results().empty() ? OpBuilder::atBlockTerminator(body) + : OpBuilder::atBlockEnd(body); } }]; } @@ -316,13 +310,12 @@ def ParallelOp : Loop_Op<"parallel", let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, " + OpBuilder<"OpBuilder &builder, OperationState &result, " "ValueRange lowerBounds, ValueRange upperBounds, " "ValueRange steps, ValueRange initVals = {}">, ]; let extraClassDeclaration = [{ - Block *getBody() { return ®ion().front(); } ValueRange getInductionVars() { return getBody()->getArguments(); } @@ -370,7 +363,7 @@ def ReduceOp : Loop_Op<"reduce", [HasParent<"ParallelOp">]> { let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, " + OpBuilder<"OpBuilder &builder, OperationState &result, " "Value operand"> ]; @@ -412,7 +405,7 @@ def YieldOp : Loop_Op<"yield", [NoSideEffect, ReturnLike, Terminator]> { let arguments = (ins Variadic:$results); let builders = [ - OpBuilder<"Builder *builder, OperationState &result", + OpBuilder<"OpBuilder &builder, OperationState &result", [{ /* nothing to do */ }]> ]; } diff --git a/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt b/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt index 15f4a4dfe8478..1e0901f07e91d 100644 --- a/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt @@ -5,34 +5,41 @@ set(LLVM_TARGET_DEFINITIONS SPIRVBase.td) mlir_tablegen(SPIRVEnums.h.inc -gen-enum-decls) mlir_tablegen(SPIRVEnums.cpp.inc -gen-enum-defs) add_public_tablegen_target(MLIRSPIRVEnumsIncGen) +add_dependencies(mlir-headers MLIRSPIRVEnumsIncGen) set(LLVM_TARGET_DEFINITIONS SPIRVBase.td) mlir_tablegen(SPIRVEnumAvailability.h.inc -gen-spirv-enum-avail-decls) mlir_tablegen(SPIRVEnumAvailability.cpp.inc -gen-spirv-enum-avail-defs) mlir_tablegen(SPIRVCapabilityImplication.inc -gen-spirv-capability-implication) add_public_tablegen_target(MLIRSPIRVEnumAvailabilityIncGen) +add_dependencies(mlir-headers MLIRSPIRVEnumAvailabilityIncGen) set(LLVM_TARGET_DEFINITIONS SPIRVOps.td) mlir_tablegen(SPIRVAvailability.h.inc -gen-avail-interface-decls) mlir_tablegen(SPIRVAvailability.cpp.inc -gen-avail-interface-defs) mlir_tablegen(SPIRVOpAvailabilityImpl.inc -gen-spirv-avail-impls) add_public_tablegen_target(MLIRSPIRVAvailabilityIncGen) +add_dependencies(mlir-headers MLIRSPIRVAvailabilityIncGen) set(LLVM_TARGET_DEFINITIONS SPIRVOps.td) mlir_tablegen(SPIRVSerialization.inc -gen-spirv-serialization) add_public_tablegen_target(MLIRSPIRVSerializationGen) +add_dependencies(mlir-headers MLIRSPIRVSerializationGen) set(LLVM_TARGET_DEFINITIONS SPIRVBase.td) mlir_tablegen(SPIRVOpUtils.inc -gen-spirv-op-utils) add_public_tablegen_target(MLIRSPIRVOpUtilsGen) +add_dependencies(mlir-headers MLIRSPIRVOpUtilsGen) set(LLVM_TARGET_DEFINITIONS TargetAndABI.td) mlir_tablegen(TargetAndABI.h.inc -gen-struct-attr-decls) mlir_tablegen(TargetAndABI.cpp.inc -gen-struct-attr-defs) add_public_tablegen_target(MLIRSPIRVTargetAndABIIncGen) +add_dependencies(mlir-headers MLIRSPIRVTargetAndABIIncGen) set(LLVM_TARGET_DEFINITIONS Passes.td) mlir_tablegen(Passes.h.inc -gen-pass-decls) add_public_tablegen_target(MLIRSPIRVPassIncGen) +add_dependencies(mlir-headers MLIRSPIRVPassIncGen) add_mlir_doc(Passes -gen-pass-doc SPIRVPasses ./) diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td index 13eddf687f5fd..12fae99725924 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td @@ -50,7 +50,7 @@ class SPV_AtomicUpdateWithValueOp traits = []> : let builders = [ OpBuilder< - [{Builder *builder, OperationState &state, Value pointer, + [{OpBuilder &builder, OperationState &state, Value pointer, ::mlir::spirv::Scope scope, ::mlir::spirv::MemorySemantics memory, Value value}], [{build(builder, state, value.getType(), pointer, scope, memory, value);}] diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td index d63837c585c9c..64063cb77d011 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td @@ -3079,6 +3079,7 @@ def SPV_OC_OpSourceExtension : I32EnumAttrCase<"OpSourceExtension", 4> def SPV_OC_OpName : I32EnumAttrCase<"OpName", 5>; def SPV_OC_OpMemberName : I32EnumAttrCase<"OpMemberName", 6>; def SPV_OC_OpString : I32EnumAttrCase<"OpString", 7>; +def SPV_OC_OpLine : I32EnumAttrCase<"OpLine", 8>; def SPV_OC_OpExtension : I32EnumAttrCase<"OpExtension", 10>; def SPV_OC_OpExtInstImport : I32EnumAttrCase<"OpExtInstImport", 11>; def SPV_OC_OpExtInst : I32EnumAttrCase<"OpExtInst", 12>; @@ -3204,6 +3205,7 @@ def SPV_OC_OpBranchConditional : I32EnumAttrCase<"OpBranchConditional", def SPV_OC_OpReturn : I32EnumAttrCase<"OpReturn", 253>; def SPV_OC_OpReturnValue : I32EnumAttrCase<"OpReturnValue", 254>; def SPV_OC_OpUnreachable : I32EnumAttrCase<"OpUnreachable", 255>; +def SPV_OC_OpNoLine : I32EnumAttrCase<"OpNoLine", 317>; def SPV_OC_OpModuleProcessed : I32EnumAttrCase<"OpModuleProcessed", 330>; def SPV_OC_OpGroupNonUniformElect : I32EnumAttrCase<"OpGroupNonUniformElect", 333>; def SPV_OC_OpGroupNonUniformBallot : I32EnumAttrCase<"OpGroupNonUniformBallot", 339>; @@ -3223,7 +3225,7 @@ def SPV_OpcodeAttr : SPV_I32EnumAttr<"Opcode", "valid SPIR-V instructions", [ SPV_OC_OpNop, SPV_OC_OpUndef, SPV_OC_OpSourceContinued, SPV_OC_OpSource, SPV_OC_OpSourceExtension, SPV_OC_OpName, SPV_OC_OpMemberName, SPV_OC_OpString, - SPV_OC_OpExtension, SPV_OC_OpExtInstImport, SPV_OC_OpExtInst, + SPV_OC_OpLine, SPV_OC_OpExtension, SPV_OC_OpExtInstImport, SPV_OC_OpExtInst, SPV_OC_OpMemoryModel, SPV_OC_OpEntryPoint, SPV_OC_OpExecutionMode, SPV_OC_OpCapability, SPV_OC_OpTypeVoid, SPV_OC_OpTypeBool, SPV_OC_OpTypeInt, SPV_OC_OpTypeFloat, SPV_OC_OpTypeVector, SPV_OC_OpTypeArray, @@ -3262,14 +3264,14 @@ def SPV_OpcodeAttr : SPV_OC_OpAtomicUMax, SPV_OC_OpAtomicAnd, SPV_OC_OpAtomicOr, SPV_OC_OpAtomicXor, SPV_OC_OpPhi, SPV_OC_OpLoopMerge, SPV_OC_OpSelectionMerge, SPV_OC_OpLabel, SPV_OC_OpBranch, SPV_OC_OpBranchConditional, SPV_OC_OpReturn, - SPV_OC_OpReturnValue, SPV_OC_OpUnreachable, SPV_OC_OpModuleProcessed, - SPV_OC_OpGroupNonUniformElect, SPV_OC_OpGroupNonUniformBallot, - SPV_OC_OpGroupNonUniformIAdd, SPV_OC_OpGroupNonUniformFAdd, - SPV_OC_OpGroupNonUniformIMul, SPV_OC_OpGroupNonUniformFMul, - SPV_OC_OpGroupNonUniformSMin, SPV_OC_OpGroupNonUniformUMin, - SPV_OC_OpGroupNonUniformFMin, SPV_OC_OpGroupNonUniformSMax, - SPV_OC_OpGroupNonUniformUMax, SPV_OC_OpGroupNonUniformFMax, - SPV_OC_OpSubgroupBallotKHR + SPV_OC_OpReturnValue, SPV_OC_OpUnreachable, SPV_OC_OpNoLine, + SPV_OC_OpModuleProcessed, SPV_OC_OpGroupNonUniformElect, + SPV_OC_OpGroupNonUniformBallot, SPV_OC_OpGroupNonUniformIAdd, + SPV_OC_OpGroupNonUniformFAdd, SPV_OC_OpGroupNonUniformIMul, + SPV_OC_OpGroupNonUniformFMul, SPV_OC_OpGroupNonUniformSMin, + SPV_OC_OpGroupNonUniformUMin, SPV_OC_OpGroupNonUniformFMin, + SPV_OC_OpGroupNonUniformSMax, SPV_OC_OpGroupNonUniformUMax, + SPV_OC_OpGroupNonUniformFMax, SPV_OC_OpSubgroupBallotKHR ]>; // End opcode section. Generated from SPIR-V spec; DO NOT MODIFY! diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td index 8e83221023c4e..e7f034b715259 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td @@ -111,7 +111,7 @@ def SPV_CompositeExtractOp : SPV_Op<"CompositeExtract", [NoSideEffect]> { ); let builders = [ - OpBuilder<[{Builder *builder, OperationState &state, + OpBuilder<[{OpBuilder &builder, OperationState &state, Value composite, ArrayRef indices}]> ]; diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td index 864c9a563d325..0e5ed27ed1c55 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td @@ -55,7 +55,7 @@ def SPV_BranchOp : SPV_Op<"Branch", [ let builders = [ OpBuilder< - "Builder *, OperationState &state, " + "OpBuilder &, OperationState &state, " "Block *successor, ValueRange arguments = {}", [{ state.addSuccessors(successor); state.addOperands(arguments); @@ -137,7 +137,7 @@ def SPV_BranchConditionalOp : SPV_Op<"BranchConditional", [ let builders = [ OpBuilder< - "Builder *builder, OperationState &state, Value condition, " + "OpBuilder &builder, OperationState &state, Value condition, " "Block *trueBlock, ValueRange trueArguments, " "Block *falseBlock, ValueRange falseArguments, " "Optional> weights = {}", @@ -145,8 +145,8 @@ def SPV_BranchConditionalOp : SPV_Op<"BranchConditional", [ ArrayAttr weightsAttr; if (weights) { weightsAttr = - builder->getI32ArrayAttr({static_cast(weights->first), - static_cast(weights->second)}); + builder.getI32ArrayAttr({static_cast(weights->first), + static_cast(weights->second)}); } build(builder, state, condition, trueArguments, falseArguments, weightsAttr, trueBlock, falseBlock); @@ -290,7 +290,7 @@ def SPV_LoopOp : SPV_Op<"loop", [InFunctionScope]> { let regions = (region AnyRegion:$body); - let builders = [OpBuilder<"Builder *builder, OperationState &state">]; + let builders = [OpBuilder<"OpBuilder &builder, OperationState &state">]; let extraClassDeclaration = [{ // Returns the entry block. @@ -465,8 +465,8 @@ def SPV_SelectionOp : SPV_Op<"selection", [InFunctionScope]> { /// newly inserted spv.selection op afterwards. static SelectionOp createIfThen( Location loc, Value condition, - function_ref thenBody, - OpBuilder *builder); + function_ref thenBody, + OpBuilder &builder); }]; let hasOpcode = 0; diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVLogicalOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVLogicalOps.td index 11ebd52439332..24d3c413b5bc7 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVLogicalOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVLogicalOps.td @@ -854,7 +854,7 @@ def SPV_SelectOp : SPV_Op<"Select", SPV_SelectType:$result ); - let builders = [OpBuilder<[{Builder *builder, OperationState &state, + let builders = [OpBuilder<[{OpBuilder &builder, OperationState &state, Value cond, Value trueValue, Value falseValue}]>]; diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVNonUniformOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVNonUniformOps.td index 36b0879669b97..34be336bb2a56 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVNonUniformOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVNonUniformOps.td @@ -147,7 +147,7 @@ def SPV_GroupNonUniformElectOp : SPV_Op<"GroupNonUniformElect", []> { ); let builders = [ - OpBuilder<[{Builder *builder, OperationState &state, spirv::Scope}]> + OpBuilder<[{OpBuilder &builder, OperationState &state, spirv::Scope}]> ]; let assemblyFormat = "$execution_scope attr-dict `:` type($result)"; diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.td index c8932652bdfa8..e935a85685cd0 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.td @@ -93,7 +93,7 @@ def SPV_AccessChainOp : SPV_Op<"AccessChain", [NoSideEffect]> { SPV_AnyPtr:$component_ptr ); - let builders = [OpBuilder<[{Builder *builder, OperationState &state, + let builders = [OpBuilder<[{OpBuilder &builder, OperationState &state, Value basePtr, ValueRange indices}]>]; let hasCanonicalizer = 1; @@ -214,7 +214,7 @@ def SPV_ExecutionModeOp : SPV_Op<"ExecutionMode", [InModuleScope]> { let autogenSerialization = 0; - let builders = [OpBuilder<[{Builder *builder, OperationState &state, + let builders = [OpBuilder<[{OpBuilder &builder, OperationState &state, spirv::FuncOp function, spirv::ExecutionMode executionMode, ArrayRef params}]>]; @@ -269,7 +269,7 @@ def SPV_LoadOp : SPV_Op<"Load", []> { let builders = [ OpBuilder<[{ - Builder *builder, OperationState &state, + OpBuilder &builder, OperationState &state, Value basePtr, IntegerAttr memory_access = {}, IntegerAttr alignment = {} }]> @@ -371,7 +371,7 @@ def SPV_StoreOp : SPV_Op<"Store", []> { let results = (outs); let builders = [ - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "Value ptr, Value value, ArrayRef namedAttrs = {}", [{ state.addOperands(ptr); state.addOperands(value); diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td index c9f27b971bb78..96daa2abec39a 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td @@ -16,6 +16,7 @@ #define SPIRV_STRUCTURE_OPS include "mlir/Dialect/SPIRV/SPIRVBase.td" +include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/CallInterfaces.td" include "mlir/Interfaces/SideEffects.td" @@ -59,7 +60,7 @@ def SPV_AddressOfOp : SPV_Op<"_address_of", [InFunctionScope, NoSideEffect]> { let autogenSerialization = 0; - let builders = [OpBuilder<[{Builder *builder, OperationState &state, + let builders = [OpBuilder<[{OpBuilder &builder, OperationState &state, spirv::GlobalVariableOp var}]>]; let assemblyFormat = "$variable attr-dict `:` type($pointer)"; @@ -121,9 +122,9 @@ def SPV_ConstantOp : SPV_Op<"constant", [ConstantLike, NoSideEffect]> { // Creates a constant zero/one of the given `type` at the current insertion // point of `builder` and returns it. static spirv::ConstantOp getZero(Type type, Location loc, - OpBuilder *builder); + OpBuilder &builder); static spirv::ConstantOp getOne(Type type, Location loc, - OpBuilder *builder); + OpBuilder &builder); }]; let hasOpcode = 0; @@ -188,7 +189,7 @@ def SPV_EntryPointOp : SPV_Op<"EntryPoint", [InModuleScope]> { let autogenSerialization = 0; - let builders = [OpBuilder<[{Builder *builder, OperationState &state, + let builders = [OpBuilder<[{OpBuilder &builder, OperationState &state, spirv::ExecutionModel executionModel, spirv::FuncOp function, ArrayRef interfaceVars}]>]; @@ -243,7 +244,7 @@ def SPV_FuncOp : SPV_Op<"func", [ let verifier = [{ return success(); }]; let builders = [OpBuilder<[{ - Builder *, OperationState &state, + OpBuilder &, OperationState &state, StringRef name, FunctionType type, spirv::FunctionControl control = spirv::FunctionControl::None, ArrayRef attrs = {} @@ -332,15 +333,15 @@ def SPV_GlobalVariableOp : SPV_Op<"globalVariable", [InModuleScope, Symbol]> { let results = (outs); let builders = [ - OpBuilder<"Builder *builder, OperationState &state, " + OpBuilder<"OpBuilder &builder, OperationState &state, " "TypeAttr type, ArrayRef namedAttrs", [{ state.addAttribute("type", type); state.addAttributes(namedAttrs); }]>, - OpBuilder<[{Builder *builder, OperationState &state, + OpBuilder<[{OpBuilder &builder, OperationState &state, Type type, StringRef name, unsigned descriptorSet, unsigned binding}]>, - OpBuilder<[{Builder *builder, OperationState &state, + OpBuilder<[{OpBuilder &builder, OperationState &state, Type type, StringRef name, spirv::BuiltIn builtin}]> ]; @@ -416,8 +417,8 @@ def SPV_ModuleOp : SPV_Op<"module", let regions = (region SizedRegion<1>:$body); let builders = [ - OpBuilder<[{Builder *, OperationState &state}]>, - OpBuilder<[{Builder *, OperationState &state, + OpBuilder<[{OpBuilder &, OperationState &state}]>, + OpBuilder<[{OpBuilder &, OperationState &state, spirv::AddressingModel addressing_model, spirv::MemoryModel memory_model}]> ]; diff --git a/mlir/include/mlir/Dialect/SPIRV/Serialization.h b/mlir/include/mlir/Dialect/SPIRV/Serialization.h index ef673472cd777..f6370a1b5ec26 100644 --- a/mlir/include/mlir/Dialect/SPIRV/Serialization.h +++ b/mlir/include/mlir/Dialect/SPIRV/Serialization.h @@ -26,7 +26,8 @@ class ModuleOp; /// Serializes the given SPIR-V `module` and writes to `binary`. On failure, /// reports errors to the error handler registered with the MLIR context for /// `module`. -LogicalResult serialize(ModuleOp module, SmallVectorImpl &binary); +LogicalResult serialize(ModuleOp module, SmallVectorImpl &binary, + bool emitDebugInfo = false); /// Deserializes the given SPIR-V `binary` module and creates a MLIR ModuleOp /// in the given `context`. Returns the ModuleOp on success; otherwise, reports diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td index fa277f4f89def..4b8ec1d7ed914 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td +++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td @@ -334,7 +334,7 @@ def Shape_YieldOp : Shape_Op<"yield", [NoSideEffect, Terminator]> { let arguments = (ins Variadic:$operands); let builders = [OpBuilder< - "Builder *b, OperationState &result", [{ build(b, result, llvm::None); }] + "OpBuilder &b, OperationState &result", [{ build(b, result, llvm::None); }] >]; let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; diff --git a/mlir/include/mlir/Dialect/StandardOps/CMakeLists.txt b/mlir/include/mlir/Dialect/StandardOps/CMakeLists.txt index f33061b2d87cf..9f57627c321fb 100644 --- a/mlir/include/mlir/Dialect/StandardOps/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/StandardOps/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(IR) +add_subdirectory(Transforms) diff --git a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h index 8df7021d66278..5f0d6d83df99b 100644 --- a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h @@ -68,6 +68,7 @@ class MemRefBoundsCapture : public BoundsCapture { class VectorBoundsCapture : public BoundsCapture { public: explicit VectorBoundsCapture(Value v); + explicit VectorBoundsCapture(VectorType t); VectorBoundsCapture(const VectorBoundsCapture &) = default; VectorBoundsCapture &operator=(const VectorBoundsCapture &) = default; diff --git a/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h index ca1a19609490e..f3b9304f0c7f0 100644 --- a/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h @@ -9,15 +9,17 @@ #define MLIR_DIALECT_STANDARDOPS_EDSC_INTRINSICS_H_ #include "mlir/Dialect/StandardOps/EDSC/Builders.h" -#include "mlir/EDSC/Intrinsics.h" namespace mlir { namespace edsc { namespace intrinsics { +using std_addi = ValueBuilder; using std_addf = ValueBuilder; using std_alloc = ValueBuilder; +using std_alloca = ValueBuilder; using std_call = OperationBuilder; +using std_create_complex = ValueBuilder; using std_constant = ValueBuilder; using std_constant_float = ValueBuilder; using std_constant_index = ValueBuilder; @@ -25,13 +27,16 @@ using std_constant_int = ValueBuilder; using std_dealloc = OperationBuilder; using std_dim = ValueBuilder; using std_extract_element = ValueBuilder; +using std_im = ValueBuilder; using std_index_cast = ValueBuilder; using std_muli = ValueBuilder; using std_mulf = ValueBuilder; using std_memref_cast = ValueBuilder; +using std_re = ValueBuilder; using std_ret = OperationBuilder; using std_select = ValueBuilder; using std_load = ValueBuilder; +using std_splat = ValueBuilder; using std_store = OperationBuilder; using std_subi = ValueBuilder; using std_sub_view = ValueBuilder; @@ -46,7 +51,7 @@ using std_sign_extendi = ValueBuilder; /// /// Prerequisites: /// All Handles have already captured previously constructed IR objects. -OperationHandle std_br(BlockHandle bh, ArrayRef operands); +BranchOp std_br(BlockHandle bh, ValueRange operands); /// Creates a new mlir::Block* and branches to it from the current block. /// Argument types are specified by `operands`. @@ -61,9 +66,8 @@ OperationHandle std_br(BlockHandle bh, ArrayRef operands); /// All `operands` have already captured an mlir::Value /// captures.size() == operands.size() /// captures and operands are pairwise of the same type. -OperationHandle std_br(BlockHandle *bh, ArrayRef types, - MutableArrayRef captures, - ArrayRef operands); +BranchOp std_br(BlockHandle *bh, ArrayRef types, + MutableArrayRef captures, ValueRange operands); /// Branches into the mlir::Block* captured by BlockHandle `trueBranch` with /// `trueOperands` if `cond` evaluates to `true` (resp. `falseBranch` and @@ -71,10 +75,9 @@ OperationHandle std_br(BlockHandle *bh, ArrayRef types, /// /// Prerequisites: /// All Handles have captured previously constructed IR objects. -OperationHandle std_cond_br(Value cond, BlockHandle trueBranch, - ArrayRef trueOperands, - BlockHandle falseBranch, - ArrayRef falseOperands); +CondBranchOp std_cond_br(Value cond, BlockHandle trueBranch, + ValueRange trueOperands, BlockHandle falseBranch, + ValueRange falseOperands); /// Eagerly creates new mlir::Block* with argument types specified by /// `trueOperands`/`falseOperands`. @@ -92,13 +95,11 @@ OperationHandle std_cond_br(Value cond, BlockHandle trueBranch, /// `falseCaptures`.size() == `falseOperands`.size() /// `trueCaptures` and `trueOperands` are pairwise of the same type /// `falseCaptures` and `falseOperands` are pairwise of the same type. -OperationHandle std_cond_br(Value cond, BlockHandle *trueBranch, - ArrayRef trueTypes, - MutableArrayRef trueCaptures, - ArrayRef trueOperands, - BlockHandle *falseBranch, ArrayRef falseTypes, - MutableArrayRef falseCaptures, - ArrayRef falseOperands); +CondBranchOp +std_cond_br(Value cond, BlockHandle *trueBranch, ArrayRef trueTypes, + MutableArrayRef trueCaptures, ValueRange trueOperands, + BlockHandle *falseBranch, ArrayRef falseTypes, + MutableArrayRef falseCaptures, ValueRange falseOperands); /// Provide an index notation around sdt_load and std_store. using StdIndexedValue = diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h index d28e22c5b61bc..573f9b7c988f1 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h @@ -47,7 +47,7 @@ class ConstantFloatOp : public ConstantOp { using ConstantOp::ConstantOp; /// Builds a constant float op producing a float of the specified type. - static void build(Builder *builder, OperationState &result, + static void build(OpBuilder &builder, OperationState &result, const APFloat &value, FloatType type); APFloat getValue() { return getAttrOfType("value").getValue(); } @@ -64,12 +64,12 @@ class ConstantIntOp : public ConstantOp { public: using ConstantOp::ConstantOp; /// Build a constant int op producing an integer of the specified width. - static void build(Builder *builder, OperationState &result, int64_t value, + static void build(OpBuilder &builder, OperationState &result, int64_t value, unsigned width); /// Build a constant int op producing an integer with the specified type, /// which must be an integer type. - static void build(Builder *builder, OperationState &result, int64_t value, + static void build(OpBuilder &builder, OperationState &result, int64_t value, Type type); int64_t getValue() { return getAttrOfType("value").getInt(); } @@ -87,7 +87,7 @@ class ConstantIndexOp : public ConstantOp { using ConstantOp::ConstantOp; /// Build a constant int op producing an index. - static void build(Builder *builder, OperationState &result, int64_t value); + static void build(OpBuilder &builder, OperationState &result, int64_t value); int64_t getValue() { return getAttrOfType("value").getInt(); } @@ -136,7 +136,7 @@ class DmaStartOp public: using Op::Op; - static void build(Builder *builder, OperationState &result, Value srcMemRef, + static void build(OpBuilder &builder, OperationState &result, Value srcMemRef, ValueRange srcIndices, Value destMemRef, ValueRange destIndices, Value numElements, Value tagMemRef, ValueRange tagIndices, Value stride = nullptr, @@ -260,7 +260,7 @@ class DmaWaitOp public: using Op::Op; - static void build(Builder *builder, OperationState &result, Value tagMemRef, + static void build(OpBuilder &builder, OperationState &result, Value tagMemRef, ValueRange tagIndices, Value numElements); static StringRef getOperationName() { return "std.dma_wait"; } @@ -286,6 +286,7 @@ class DmaWaitOp void print(OpAsmPrinter &p); LogicalResult fold(ArrayRef cstOperands, SmallVectorImpl &results); + LogicalResult verify(); }; /// Prints dimension and symbol list. diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 17b959396303f..efcbdf63983ea 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -47,7 +47,7 @@ class CastOp traits = []> : let results = (outs AnyType); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value source, Type destType", [{ + "OpBuilder &builder, OperationState &result, Value source, Type destType", [{ impl::buildCastOp(builder, result, source, destType); }]>]; @@ -129,21 +129,19 @@ class FloatArithmeticOp traits = []> : // // %0 = alloclike(%m)[%s] : memref<8x?xf32, (d0, d1)[s0] -> ((d0 + s0), d1)> // -class AllocLikeOp resultDecorators = [], - list traits = []> : - Std_Op { +class AllocLikeOp traits = []> : + Std_Op])> { let arguments = (ins Variadic:$value, Confined, [IntMinValue<0>]>:$alignment); - let results = (outs Arg); + let results = (outs Res); let builders = [OpBuilder< - "Builder *builder, OperationState &result, MemRefType memrefType", [{ + "OpBuilder &builder, OperationState &result, MemRefType memrefType", [{ result.types.push_back(memrefType); }]>, OpBuilder< - "Builder *builder, OperationState &result, MemRefType memrefType, " # + "OpBuilder &builder, OperationState &result, MemRefType memrefType, " # "ValueRange operands, IntegerAttr alignment = IntegerAttr()", [{ result.addOperands(operands); result.types.push_back(memrefType); @@ -278,7 +276,7 @@ def AddIOp : IntArithmeticOp<"addi", [Commutative]> { // AllocOp //===----------------------------------------------------------------------===// -def AllocOp : AllocLikeOp<"alloc", [MemAlloc], [MemoryEffects<[MemAlloc]>]> { +def AllocOp : AllocLikeOp<"alloc"> { let summary = "memory allocation operation"; let description = [{ The `alloc` operation allocates a region of memory, as specified by its @@ -528,16 +526,11 @@ def GenericAtomicRMWOp : Std_Op<"generic_atomic_rmw", [ let skipDefaultBuilders = 1; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, " + OpBuilder<"OpBuilder &builder, OperationState &result, " "Value memref, ValueRange ivs"> ]; let extraClassDeclaration = [{ - OpBuilder getBodyBuilder() { - assert(!body().empty() && "Unexpected empty 'body' region."); - Block &block = body().front(); - return OpBuilder(&block, block.end()); - } // The value stored in memref[ivs]. Value getCurrentValue() { return body().front().getArgument(0); @@ -567,7 +560,8 @@ def AtomicYieldOp : Std_Op<"atomic_yield", [ //===----------------------------------------------------------------------===// def BranchOp : Std_Op<"br", - [DeclareOpInterfaceMethods, NoSideEffect, Terminator]> { + [DeclareOpInterfaceMethods, + NoSideEffect, Terminator]> { let summary = "branch operation"; let description = [{ The `br` operation represents a branch operation in a function. @@ -588,7 +582,7 @@ def BranchOp : Std_Op<"br", let arguments = (ins Variadic:$destOperands); let successors = (successor AnySuccessor:$dest); - let builders = [OpBuilder<"Builder *, OperationState &result, Block *dest, " + let builders = [OpBuilder<"OpBuilder &, OperationState &result, Block *dest, " "ValueRange destOperands = {}", [{ result.addSuccessors(dest); result.addOperands(destOperands); @@ -603,10 +597,6 @@ def BranchOp : Std_Op<"br", /// Erase the operand at 'index' from the operand list. void eraseOperand(unsigned index); - - /// Returns the successor that would be chosen with the given constant - /// operands. Returns nullptr if a single successor could not be chosen. - Block *getSuccessorForOperands(ArrayRef); }]; let hasCanonicalizer = 1; @@ -638,21 +628,21 @@ def CallOp : Std_Op<"call", [CallOpInterface]> { let results = (outs Variadic); let builders = [OpBuilder< - "Builder *builder, OperationState &result, FuncOp callee," + "OpBuilder &builder, OperationState &result, FuncOp callee," "ValueRange operands = {}", [{ result.addOperands(operands); - result.addAttribute("callee", builder->getSymbolRefAttr(callee)); + result.addAttribute("callee", builder.getSymbolRefAttr(callee)); result.addTypes(callee.getType().getResults()); }]>, OpBuilder< - "Builder *builder, OperationState &result, SymbolRefAttr callee," + "OpBuilder &builder, OperationState &result, SymbolRefAttr callee," "ArrayRef results, ValueRange operands = {}", [{ result.addOperands(operands); result.addAttribute("callee", callee); result.addTypes(results); }]>, OpBuilder< - "Builder *builder, OperationState &result, StringRef callee," + "OpBuilder &builder, OperationState &result, StringRef callee," "ArrayRef results, ValueRange operands = {}", [{ - build(builder, result, builder->getSymbolRefAttr(callee), results, + build(builder, result, builder.getSymbolRefAttr(callee), results, operands); }]>]; @@ -714,7 +704,7 @@ def CallIndirectOp : Std_Op<"call_indirect", [ let results = (outs Variadic:$results); let builders = [OpBuilder< - "Builder *, OperationState &result, Value callee," + "OpBuilder &, OperationState &result, Value callee," "ValueRange operands = {}", [{ result.operands.push_back(callee); result.addOperands(operands); @@ -847,7 +837,7 @@ def CmpFOp : Std_Op<"cmpf", let results = (outs BoolLike:$result); let builders = [OpBuilder< - "Builder *builder, OperationState &result, CmpFPredicate predicate," + "OpBuilder &builder, OperationState &result, CmpFPredicate predicate," "Value lhs, Value rhs", [{ ::buildCmpFOp(builder, result, predicate, lhs, rhs); }]>]; @@ -969,7 +959,7 @@ def CmpIOp : Std_Op<"cmpi", let results = (outs BoolLike:$result); let builders = [OpBuilder< - "Builder *builder, OperationState &result, CmpIPredicate predicate," + "OpBuilder &builder, OperationState &result, CmpIPredicate predicate," "Value lhs, Value rhs", [{ ::buildCmpIOp(builder, result, predicate, lhs, rhs); }]>]; @@ -991,12 +981,47 @@ def CmpIOp : Std_Op<"cmpi", let assemblyFormat = "$predicate `,` $lhs `,` $rhs attr-dict `:` type($lhs)"; } +//===----------------------------------------------------------------------===// +// CreateComplexOp +//===----------------------------------------------------------------------===// + +def CreateComplexOp : Std_Op<"create_complex", + [NoSideEffect, + AllTypesMatch<["real", "imaginary"]>, + TypesMatchWith<"complex element type matches real operand type", + "complex", "real", + "$_self.cast().getElementType()">, + TypesMatchWith<"complex element type matches imaginary operand type", + "complex", "imaginary", + "$_self.cast().getElementType()">]> { + let summary = "creates a complex number"; + let description = [{ + The `create_complex` operation creates a complex number from two + floating-point operands, the real and the imaginary part. + + Example: + + ```mlir + %a = create_complex %b, %c : complex + ``` + }]; + + let arguments = (ins AnyFloat:$real, AnyFloat:$imaginary); + let results = (outs Complex:$complex); + + let assemblyFormat = "$real `,` $imaginary attr-dict `:` type($complex)"; + + // `CreateComplexOp` is fully verified by its traits. + let verifier = ?; +} + //===----------------------------------------------------------------------===// // CondBranchOp //===----------------------------------------------------------------------===// def CondBranchOp : Std_Op<"cond_br", - [AttrSizedOperandSegments, DeclareOpInterfaceMethods, + [AttrSizedOperandSegments, + DeclareOpInterfaceMethods, NoSideEffect, Terminator]> { let summary = "conditional branch operation"; let description = [{ @@ -1032,13 +1057,13 @@ def CondBranchOp : Std_Op<"cond_br", let successors = (successor AnySuccessor:$trueDest, AnySuccessor:$falseDest); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value condition," + "OpBuilder &builder, OperationState &result, Value condition," "Block *trueDest, ValueRange trueOperands," "Block *falseDest, ValueRange falseOperands", [{ build(builder, result, condition, trueOperands, falseOperands, trueDest, falseDest); }]>, OpBuilder< - "Builder *builder, OperationState &result, Value condition," + "OpBuilder &builder, OperationState &result, Value condition," "Block *trueDest, Block *falseDest, ValueRange falseOperands = {}", [{ build(builder, result, condition, trueDest, ValueRange(), falseDest, falseOperands); @@ -1081,7 +1106,7 @@ def CondBranchOp : Std_Op<"cond_br", /// Erase the operand at 'index' from the true operand list. void eraseTrueOperand(unsigned index) { - eraseSuccessorOperand(trueIndex, index); + trueDestOperandsMutable().erase(index); } // Accessors for operands to the 'false' destination. @@ -1100,13 +1125,9 @@ def CondBranchOp : Std_Op<"cond_br", /// Erase the operand at 'index' from the false operand list. void eraseFalseOperand(unsigned index) { - eraseSuccessorOperand(falseIndex, index); + falseDestOperandsMutable().erase(index); } - /// Returns the successor that would be chosen with the given constant - /// operands. Returns nullptr if a single successor could not be chosen. - Block *getSuccessorForOperands(ArrayRef operands); - private: /// Get the index of the first true destination operand. unsigned getTrueDestOperandIndex() { return 1; } @@ -1170,7 +1191,7 @@ def ConstantOp : Std_Op<"constant", let results = (outs AnyType); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Attribute value", + "OpBuilder &builder, OperationState &result, Attribute value", [{ build(builder, result, value.getType(), value); }]>]; let extraClassDeclaration = [{ @@ -1251,6 +1272,36 @@ def CosOp : FloatUnaryOp<"cos"> { }]; } +def SinOp : FloatUnaryOp<"sin"> { + let summary = "sine of the specified value"; + let description = [{ + Syntax: + + ``` + operation ::= ssa-id `=` `std.sin` ssa-use `:` type + ``` + + The `sin` operation computes the sine of a given value. It takes one + operand and returns one result of the same type. This type may be a float + scalar type, a vector whose element type is float, or a tensor of floats. + It has no standard attributes. + + Example: + + ```mlir + // Scalar sine value. + %a = sin %b : f64 + + // SIMD vector element-wise sine value. + %f = sin %g : vector<4xf32> + + // Tensor element-wise sine value. + %x = sin %y : tensor<4x?xf8> + ``` + }]; +} + + //===----------------------------------------------------------------------===// // DeallocOp //===----------------------------------------------------------------------===// @@ -1319,10 +1370,10 @@ def DimOp : Std_Op<"dim", [NoSideEffect]> { let results = (outs Index); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value memrefOrTensor," + "OpBuilder &builder, OperationState &result, Value memrefOrTensor," "unsigned index", [{ - auto indexType = builder->getIndexType(); - auto indexAttr = builder->getIntegerAttr(indexType, index); + auto indexType = builder.getIndexType(); + auto indexAttr = builder.getIntegerAttr(indexType, index); build(builder, result, indexType, memrefOrTensor, indexAttr); }]>]; @@ -1415,7 +1466,7 @@ def ExtractElementOp : Std_Op<"extract_element", let results = (outs AnyType:$result); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value aggregate," + "OpBuilder &builder, OperationState &result, Value aggregate," "ValueRange indices = {}", [{ auto resType = aggregate.getType().cast() .getElementType(); @@ -1480,6 +1531,36 @@ def FPTruncOp : CastOp<"fptrunc">, Arguments<(ins AnyType:$in)> { let hasFolder = 0; } +//===----------------------------------------------------------------------===// +// ImOp +//===----------------------------------------------------------------------===// + +def ImOp : Std_Op<"im", + [NoSideEffect, + TypesMatchWith<"complex element type matches result type", + "complex", "imaginary", + "$_self.cast().getElementType()">]> { + let summary = "extracts the imaginary part of a complex number"; + let description = [{ + The `im` operation takes a single complex number as its operand and extracts + the imaginary part as a floating-point value. + + Example: + + ```mlir + %a = im %b : complex + ``` + }]; + + let arguments = (ins Complex:$complex); + let results = (outs AnyFloat:$imaginary); + + let assemblyFormat = "$complex attr-dict `:` type($complex)"; + + // `ImOp` is fully verified by its traits. + let verifier = ?; +} + //===----------------------------------------------------------------------===// // IndexCastOp //===----------------------------------------------------------------------===// @@ -1553,7 +1634,7 @@ def LoadOp : Std_Op<"load", let results = (outs AnyType:$result); let builders = [OpBuilder< - "Builder *, OperationState &result, Value memref," + "OpBuilder &, OperationState &result, Value memref," "ValueRange indices = {}", [{ auto memrefType = memref.getType().cast(); result.addOperands(memref); @@ -1805,12 +1886,12 @@ def PrefetchOp : Std_Op<"prefetch"> { BoolAttr:$isDataCache); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value memref," + "OpBuilder &builder, OperationState &result, Value memref," "ArrayRef indices, bool isWrite, unsigned hint, bool isData", [{ - auto hintAttr = builder->getI32IntegerAttr(hint); - auto isWriteAttr = builder->getBoolAttr(isWrite); - auto isDataCacheAttr = builder->getBoolAttr(isData); + auto hintAttr = builder.getI32IntegerAttr(hint); + auto isWriteAttr = builder.getBoolAttr(isWrite); + auto isDataCacheAttr = builder.getBoolAttr(isData); result.addOperands(memref); result.addOperands(indices); result.addAttribute("localityHint", hintAttr); @@ -1851,8 +1932,8 @@ def RankOp : Std_Op<"rank", [NoSideEffect]> { let verifier = ?; let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value tensor", [{ - auto indexType = builder->getIndexType(); + "OpBuilder &builder, OperationState &result, Value tensor", [{ + auto indexType = builder.getIndexType(); build(builder, result, indexType, tensor); }]>]; @@ -1860,6 +1941,36 @@ def RankOp : Std_Op<"rank", [NoSideEffect]> { let assemblyFormat = "operands attr-dict `:` type(operands)"; } +//===----------------------------------------------------------------------===// +// ReOp +//===----------------------------------------------------------------------===// + +def ReOp : Std_Op<"re", + [NoSideEffect, + TypesMatchWith<"complex element type matches result type", + "complex", "real", + "$_self.cast().getElementType()">]> { + let summary = "extracts the real part of a complex number"; + let description = [{ + The `re` operation takes a single complex number as its operand and extracts + the real part as a floating-point value. + + Example: + + ```mlir + %a = re %b : complex + ``` + }]; + + let arguments = (ins Complex:$complex); + let results = (outs AnyFloat:$real); + + let assemblyFormat = "$complex attr-dict `:` type($complex)"; + + // `ReOp` is fully verified by its traits. + let verifier = ?; +} + //===----------------------------------------------------------------------===// // RemFOp //===----------------------------------------------------------------------===// @@ -1894,7 +2005,7 @@ def ReturnOp : Std_Op<"return", [NoSideEffect, HasParent<"FuncOp">, ReturnLike, let arguments = (ins Variadic:$operands); let builders = [OpBuilder< - "Builder *b, OperationState &result", [{ build(b, result, llvm::None); }] + "OpBuilder &b, OperationState &result", [{ build(b, result, llvm::None); }] >]; let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; @@ -1959,7 +2070,7 @@ def SelectOp : Std_Op<"select", [NoSideEffect, let results = (outs AnyType:$result); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value condition," + "OpBuilder &builder, OperationState &result, Value condition," "Value trueValue, Value falseValue", [{ result.addOperands({condition, trueValue, falseValue}); result.addTypes(trueValue.getType()); @@ -2118,7 +2229,7 @@ def SignExtendIOp : Std_Op<"sexti", let results = (outs SignlessIntegerLike); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value value, Type destType", [{ + "OpBuilder &builder, OperationState &result, Value value, Type destType", [{ result.addOperands(value); result.addTypes(destType); }]>]; @@ -2192,7 +2303,7 @@ def SplatOp : Std_Op<"splat", [NoSideEffect, let results = (outs AnyTypeOf<[AnyVector, AnyStaticShapeTensor]>:$aggregate); let builders = - [OpBuilder<"Builder *builder, OperationState &result, Value element, " + [OpBuilder<"OpBuilder &builder, OperationState &result, Value element, " "Type aggregateType", [{ build(builder, result, aggregateType, element); }]>]; @@ -2270,7 +2381,7 @@ def StoreOp : Std_Op<"store", Variadic:$indices); let builders = [OpBuilder< - "Builder *, OperationState &result, Value valueToStore, Value memref", [{ + "OpBuilder &, OperationState &result, Value valueToStore, Value memref", [{ result.addOperands(valueToStore); result.addOperands(memref); }]>]; @@ -2450,12 +2561,12 @@ def SubViewOp : Std_Op<"subview", [ let builders = [ OpBuilder< - "Builder *b, OperationState &result, Value source, " + "OpBuilder &b, OperationState &result, Value source, " "ValueRange offsets, ValueRange sizes, " "ValueRange strides, Type resultType = Type(), " "ArrayRef attrs = {}">, OpBuilder< - "Builder *builder, OperationState &result, " + "OpBuilder &builder, OperationState &result, " "Type resultType, Value source"> ]; @@ -2608,7 +2719,7 @@ def TensorLoadOp : Std_Op<"tensor_load", let verifier = ?; let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value memref", [{ + "OpBuilder &builder, OperationState &result, Value memref", [{ auto memrefType = memref.getType().cast(); auto resultType = RankedTensorType::get(memrefType.getShape(), memrefType.getElementType()); @@ -2684,7 +2795,7 @@ def TruncateIOp : Std_Op<"trunci", [NoSideEffect, SameOperandsAndResultShape]> { let results = (outs SignlessIntegerLike); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value value, Type destType", [{ + "OpBuilder &builder, OperationState &result, Value value, Type destType", [{ result.addOperands(value); result.addTypes(destType); }]>]; @@ -2915,7 +3026,7 @@ def ZeroExtendIOp : Std_Op<"zexti", [NoSideEffect, SameOperandsAndResultShape]> let results = (outs SignlessIntegerLike); let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value value, Type destType", [{ + "OpBuilder &builder, OperationState &result, Value value, Type destType", [{ result.addOperands(value); result.addTypes(destType); }]>]; diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/StandardOps/Transforms/CMakeLists.txt new file mode 100644 index 0000000000000..413c6523a7564 --- /dev/null +++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRStandardTransformsIncGen) + +add_mlir_doc(Passes -gen-pass-doc StandardPasses ./) diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h new file mode 100644 index 0000000000000..c0622e529564b --- /dev/null +++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h @@ -0,0 +1,29 @@ + +//===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header file defines prototypes that expose pass constructors in the loop +// transformation library. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES_H_ +#define MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES_H_ + +#include + +namespace mlir { + +class Pass; + +/// Creates an instance of the ExpandAtomic pass. +std::unique_ptr createExpandAtomicPass(); + +} // end namespace mlir + +#endif // MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES_H_ diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td new file mode 100644 index 0000000000000..b65c03d33fc1b --- /dev/null +++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td @@ -0,0 +1,19 @@ +//===-- Passes.td - StandardOps pass definition file -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES +#define MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES + +include "mlir/Pass/PassBase.td" + +def ExpandAtomic : FunctionPass<"expand-atomic"> { + let summary = "Expands AtomicRMWOp into GenericAtomicRMWOp."; + let constructor = "mlir::createExpandAtomicPass()"; +} + +#endif // MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/Vector/EDSC/Builders.h b/mlir/include/mlir/Dialect/Vector/EDSC/Builders.h index 396053f63213f..65767af61b2df 100644 --- a/mlir/include/mlir/Dialect/Vector/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/Vector/EDSC/Builders.h @@ -16,7 +16,6 @@ #include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/EDSC/Builders.h" -#include "mlir/EDSC/Intrinsics.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/Builders.h" diff --git a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h index 87e552c561e22..49d2ee67444bc 100644 --- a/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h +++ b/mlir/include/mlir/Dialect/Vector/EDSC/Intrinsics.h @@ -18,6 +18,8 @@ using vector_broadcast = ValueBuilder; using vector_contract = ValueBuilder; using vector_matmul = ValueBuilder; using vector_print = OperationBuilder; +using vector_transfer_read = ValueBuilder; +using vector_transfer_write = OperationBuilder; using vector_type_cast = ValueBuilder; } // namespace intrinsics diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index 0abfbdc9c0da5..94261fa93e4db 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -143,10 +143,10 @@ def Vector_ContractionOp : ``` }]; let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value lhs, Value rhs, " + "OpBuilder &builder, OperationState &result, Value lhs, Value rhs, " "Value acc, ArrayAttr indexingMaps, ArrayAttr iteratorTypes">, OpBuilder< - "Builder *builder, OperationState &result, Value lhs, Value rhs, " + "OpBuilder &builder, OperationState &result, Value lhs, Value rhs, " "Value acc, ArrayRef> indexingExprs, " "ArrayRef iteratorTypes">]; let extraClassDeclaration = [{ @@ -304,7 +304,7 @@ def Vector_ShuffleOp : : vector<2xf32>, vector<2xf32> ; yields vector<4xf32> ``` }]; - let builders = [OpBuilder<"Builder *builder, OperationState &result," + let builders = [OpBuilder<"OpBuilder &builder, OperationState &result," "Value v1, Value v2, ArrayRef">]; let extraClassDeclaration = [{ static StringRef getMaskAttrName() { return "mask"; } @@ -372,7 +372,7 @@ def Vector_ExtractOp : ``` }]; let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value source," + "OpBuilder &builder, OperationState &result, Value source," "ArrayRef">]; let extraClassDeclaration = [{ static StringRef getPositionAttrName() { return "position"; } @@ -418,7 +418,7 @@ def Vector_ExtractSlicesOp : ``` }]; let builders = [OpBuilder< - "Builder *builder, OperationState &result, TupleType tupleType, " # + "OpBuilder &builder, OperationState &result, TupleType tupleType, " # "Value vector, ArrayRef sizes, " # "ArrayRef strides">]; let extraClassDeclaration = [{ @@ -464,7 +464,7 @@ def Vector_FMAOp : let verifier = ?; let assemblyFormat = "$lhs `,` $rhs `,` $acc attr-dict `:` type($lhs)"; let builders = [OpBuilder< - "Builder *b, OperationState &result, Value lhs, Value rhs, Value acc", + "OpBuilder &b, OperationState &result, Value lhs, Value rhs, Value acc", "build(b, result, lhs.getType(), lhs, rhs, acc);">]; let extraClassDeclaration = [{ VectorType getVectorType() { return lhs().getType().cast(); } @@ -535,7 +535,7 @@ def Vector_InsertOp : }]; let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value source, " # + "OpBuilder &builder, OperationState &result, Value source, " # "Value dest, ArrayRef">]; let extraClassDeclaration = [{ static StringRef getPositionAttrName() { return "position"; } @@ -638,7 +638,7 @@ def Vector_InsertStridedSliceOp : }]; let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value source, Value dest, " # + "OpBuilder &builder, OperationState &result, Value source, Value dest, " # "ArrayRef offsets, ArrayRef strides">]; let extraClassDeclaration = [{ static StringRef getOffsetsAttrName() { return "offsets"; } @@ -849,7 +849,7 @@ def Vector_StridedSliceOp : ``` }]; let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value source, " # + "OpBuilder &builder, OperationState &result, Value source, " # "ArrayRef offsets, ArrayRef sizes, " # "ArrayRef strides">]; let extraClassDeclaration = [{ @@ -1131,7 +1131,7 @@ def Vector_ShapeCastOp : def Vector_TypeCastOp : Vector_Op<"type_cast", [NoSideEffect]>, Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>, - Results<(outs AnyMemRef)> { + Results<(outs AnyMemRef:$result)> { let summary = "type_cast op converts a scalar memref to a vector memref"; let description = [{ Performs a conversion from a memref with scalar element to a memref with a @@ -1154,12 +1154,10 @@ def Vector_TypeCastOp : ``` }]; + /// Build the canonical memRefType with a single vector. + /// E.g. memref<4 x 5 x vector<6 x f32>> -> memref>. let builders = [OpBuilder< - "Builder *builder, OperationState &result, Value source">]; - - let parser = [{ - return impl::parseCastOp(parser, result); - }]; + "OpBuilder &builder, OperationState &result, Value source">]; let extraClassDeclaration = [{ MemRefType getMemRefType() { @@ -1169,6 +1167,10 @@ def Vector_TypeCastOp : return getResult().getType().cast(); } }]; + + let assemblyFormat = [{ + $memref attr-dict `:` type($memref) `to` type($result) + }]; } def Vector_ConstantMaskOp : @@ -1439,13 +1441,13 @@ def Vector_MatmulOp : Vector_Op<"matrix_multiply", [NoSideEffect, ``` }]; let builders = [ - OpBuilder<"Builder *builder, OperationState &result, Value lhs, Value rhs, " + OpBuilder<"OpBuilder &builder, OperationState &result, Value lhs, Value rhs, " "unsigned lhsRows, unsigned lhsColumns, unsigned rhsColumns", [{ result.addOperands({lhs, rhs}); - result.addAttribute("lhs_rows", builder->getI32IntegerAttr(lhsRows)); - result.addAttribute("lhs_columns", builder->getI32IntegerAttr(lhsColumns)); - result.addAttribute("rhs_columns", builder->getI32IntegerAttr(rhsColumns)); + result.addAttribute("lhs_rows", builder.getI32IntegerAttr(lhsRows)); + result.addAttribute("lhs_columns", builder.getI32IntegerAttr(lhsColumns)); + result.addAttribute("rhs_columns", builder.getI32IntegerAttr(rhsColumns)); result.addTypes(VectorType::get(lhsRows * rhsColumns, lhs.getType().cast().getElementType())); }]>, diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h index aa2a94cdfbb0c..d2858b8d5c20b 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h +++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h @@ -16,7 +16,7 @@ class MLIRContext; class OwningRewritePatternList; /// Collect a set of patterns to convert from the Vector dialect to itself. -/// Should be merged with populateVectorToAffineLoopsConversionPatterns. +/// Should be merged with populateVectorToLoopsLoweringPattern. void populateVectorToVectorConversionPatterns( MLIRContext *context, OwningRewritePatternList &patterns, ArrayRef coarseVectorShape = {}, diff --git a/mlir/include/mlir/EDSC/Builders.h b/mlir/include/mlir/EDSC/Builders.h index 0ef894930ed8c..c1437892f6f65 100644 --- a/mlir/include/mlir/EDSC/Builders.h +++ b/mlir/include/mlir/EDSC/Builders.h @@ -33,17 +33,17 @@ class NestedBuilder; /// setting and restoring of insertion points. class ScopedContext { public: - ScopedContext(OpBuilder &builder, Location location); + ScopedContext(OpBuilder &b, Location location); /// Sets the insertion point of the builder to 'newInsertPt' for the duration /// of the scope. The existing insertion point of the builder is restored on /// destruction. - ScopedContext(OpBuilder &builder, OpBuilder::InsertPoint newInsertPt, + ScopedContext(OpBuilder &b, OpBuilder::InsertPoint newInsertPt, Location location); ~ScopedContext(); static MLIRContext *getContext(); - static OpBuilder &getBuilder(); + static OpBuilder &getBuilderRef(); static Location getLocation(); private: @@ -59,34 +59,38 @@ class ScopedContext { /// Top level OpBuilder. OpBuilder &builder; - /// The previous insertion point of the builder. - Optional prevBuilderInsertPoint; + /// Guard to the previous insertion point. + OpBuilder::InsertionGuard guard; /// Current location. Location location; /// Parent context we return into. ScopedContext *enclosingScopedContext; - /// Defensively keeps track of the current NestedBuilder to ensure proper - /// scoping usage. - NestedBuilder *nestedBuilder; }; template struct ValueBuilder { - // Builder-based template ValueBuilder(Args... args) { - Operation *op = ScopedContext::getBuilder() - .create(ScopedContext::getLocation(), args...) - .getOperation(); - if (op->getNumResults() != 1) - llvm_unreachable("unsupported operation, use OperationBuilder instead"); - value = op->getResult(0); + value = ScopedContext::getBuilderRef() + .create(ScopedContext::getLocation(), args...) + .getResult(); } - operator Value() { return value; } Value value; }; +template +struct OperationBuilder { + template + OperationBuilder(Args... args) { + op = ScopedContext::getBuilderRef().create(ScopedContext::getLocation(), + args...); + } + operator Op() { return op; } + operator Operation *() { return op.getOperation(); } + Op op; +}; + /// A NestedBuilder is a scoping abstraction to create an idiomatic syntax /// embedded in C++ that serves the purpose of building nested MLIR. /// Nesting and compositionality is obtained by using the strict ordering that @@ -113,21 +117,20 @@ class NestedBuilder { /// scoping itself, we use enter/exit pairs of operations. /// As a consequence we must allocate a new OpBuilder + ScopedContext and /// let the escape. - /// Step back "prev" times from the end of the block to set up the insertion - /// point, which is useful for non-empty blocks. - void enter(mlir::Block *block, int prev = 0) { - bodyScope = new ScopedContext( - ScopedContext::getBuilder(), - OpBuilder::InsertPoint(block, std::prev(block->end(), prev)), - ScopedContext::getLocation()); - bodyScope->nestedBuilder = this; + void enter(mlir::Block *block) { + bodyScope = new ScopedContext(ScopedContext::getBuilderRef(), + OpBuilder::InsertPoint(block, block->end()), + ScopedContext::getLocation()); + if (!block->empty()) { + auto &termOp = block->back(); + if (termOp.isKnownTerminator()) + ScopedContext::getBuilderRef().setInsertionPoint(&termOp); + } } /// Exit the current mlir::Block by explicitly deleting the dynamically /// allocated OpBuilder and ScopedContext. void exit() { - // Reclaim now to exit the scope. - bodyScope->nestedBuilder = nullptr; delete bodyScope; bodyScope = nullptr; } @@ -168,15 +171,13 @@ class LoopBuilder : public NestedBuilder { private: LoopBuilder() = default; - friend LoopBuilder makeAffineLoopBuilder(Value *iv, ArrayRef lbHandles, - ArrayRef ubHandles, - int64_t step); + friend LoopBuilder makeAffineLoopBuilder(Value *iv, ArrayRef lbs, + ArrayRef ubs, int64_t step); friend LoopBuilder makeParallelLoopBuilder(MutableArrayRef ivs, - ArrayRef lbHandles, - ArrayRef ubHandles, + ArrayRef lbs, + ArrayRef ubs, ArrayRef steps); - friend LoopBuilder makeLoopBuilder(Value *iv, Value lbHandle, Value ubHandle, - Value stepHandle, + friend LoopBuilder makeLoopBuilder(Value *iv, Value lb, Value ub, Value step, MutableArrayRef iterArgsHandles, ValueRange iterArgsInitValues); Operation *op; @@ -194,7 +195,8 @@ class Append {}; class BlockBuilder : public NestedBuilder { public: /// Enters the mlir::Block* previously captured by `bh` and sets the insertion - /// point to its end. + /// point to its end. If the block already contains a terminator, set the + /// insertion point before the terminator. BlockBuilder(BlockHandle bh, Append); /// Constructs a new mlir::Block with argument types derived from `args`. @@ -229,52 +231,12 @@ class BlockBuilder : public NestedBuilder { BlockBuilder &operator=(BlockBuilder &other) = delete; }; -/// Base class for Value, OperationHandle and BlockHandle. -/// Not meant to be used outside of these classes. -class CapturableHandle { -protected: - CapturableHandle() = default; -}; - -/// An OperationHandle can be used in lieu of Value to capture the -/// operation in cases when one does not care about, or cannot extract, a -/// unique Value from the operation. -/// This can be used for capturing zero result operations as well as -/// multi-result operations that are not supported by Value. -/// We do not distinguish further between zero and multi-result operations at -/// this time. -struct OperationHandle : public CapturableHandle { - OperationHandle() : op(nullptr) {} - OperationHandle(Operation *op) : op(op) {} - - OperationHandle(const OperationHandle &) = default; - OperationHandle &operator=(const OperationHandle &) = default; - - /// Generic mlir::Op create. This is the key to being extensible to the whole - /// of MLIR without duplicating the type system or the op definitions. - template - static OperationHandle create(Args... args); - template - static Op createOp(Args... args); - - /// Generic create for a named operation. - static OperationHandle create(StringRef name, ArrayRef operands, - ArrayRef resultTypes, - ArrayRef attributes = {}); - - operator Operation *() { return op; } - Operation *getOperation() const { return op; } - -private: - Operation *op; -}; - /// A BlockHandle represents a (potentially "delayed") Block abstraction. /// This extra abstraction is necessary because an mlir::Block is not an /// mlir::Value. /// A BlockHandle should be captured by pointer but otherwise passed by Value /// everywhere. -class BlockHandle : public CapturableHandle { +class BlockHandle { public: /// A BlockHandle constructed without an mlir::Block* represents a "delayed" /// Block. A delayed Block represents the declaration (in the PL sense) of a @@ -361,22 +323,6 @@ struct StructuredIndexed { SmallVector exprs; }; -template -OperationHandle OperationHandle::create(Args... args) { - return OperationHandle(ScopedContext::getBuilder() - .create(ScopedContext::getLocation(), args...) - .getOperation()); -} - -template -Op OperationHandle::createOp(Args... args) { - return cast( - OperationHandle(ScopedContext::getBuilder() - .create(ScopedContext::getLocation(), args...) - .getOperation()) - .getOperation()); -} - /// A TemplatedIndexedValue brings an index notation over the template Load and /// Store parameters. Assigning to an IndexedValue emits an actual `Store` /// operation, while converting an IndexedValue to a Value emits an actual @@ -404,10 +350,10 @@ class TemplatedIndexedValue { } /// Emits a `store`. - OperationHandle operator=(const TemplatedIndexedValue &rhs) { + Store operator=(const TemplatedIndexedValue &rhs) { return Store(rhs, value, indices); } - OperationHandle operator=(Value rhs) { return Store(rhs, value, indices); } + Store operator=(Value rhs) { return Store(rhs, value, indices); } /// Emits a `load` when converting to a Value. operator Value() const { return Load(value, indices); } @@ -441,28 +387,28 @@ class TemplatedIndexedValue { } /// Assignment-arithmetic operator overloadings. - OperationHandle operator+=(Value e); - OperationHandle operator-=(Value e); - OperationHandle operator*=(Value e); - OperationHandle operator/=(Value e); - OperationHandle operator%=(Value e); - OperationHandle operator^=(Value e); - OperationHandle operator+=(TemplatedIndexedValue e) { + Store operator+=(Value e); + Store operator-=(Value e); + Store operator*=(Value e); + Store operator/=(Value e); + Store operator%=(Value e); + Store operator^=(Value e); + Store operator+=(TemplatedIndexedValue e) { return this->operator+=(static_cast(e)); } - OperationHandle operator-=(TemplatedIndexedValue e) { + Store operator-=(TemplatedIndexedValue e) { return this->operator-=(static_cast(e)); } - OperationHandle operator*=(TemplatedIndexedValue e) { + Store operator*=(TemplatedIndexedValue e) { return this->operator*=(static_cast(e)); } - OperationHandle operator/=(TemplatedIndexedValue e) { + Store operator/=(TemplatedIndexedValue e) { return this->operator/=(static_cast(e)); } - OperationHandle operator%=(TemplatedIndexedValue e) { + Store operator%=(TemplatedIndexedValue e) { return this->operator%=(static_cast(e)); } - OperationHandle operator^=(TemplatedIndexedValue e) { + Store operator^=(TemplatedIndexedValue e) { return this->operator^=(static_cast(e)); } diff --git a/mlir/include/mlir/EDSC/Intrinsics.h b/mlir/include/mlir/EDSC/Intrinsics.h deleted file mode 100644 index 14fa16ae06023..0000000000000 --- a/mlir/include/mlir/EDSC/Intrinsics.h +++ /dev/null @@ -1,56 +0,0 @@ -//===- Intrinsics.h - MLIR Operations for Declarative Builders ---*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Provides intuitive composable intrinsics for building snippets of MLIR -// declaratively. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_EDSC_INTRINSICS_H_ -#define MLIR_EDSC_INTRINSICS_H_ - -#include "mlir/EDSC/Builders.h" -#include "mlir/IR/StandardTypes.h" -#include "mlir/Support/LLVM.h" - -namespace mlir { - -class MemRefType; -class Type; - -namespace edsc { - -/// Provides a set of first class intrinsics. -/// In the future, most of intrinsics related to Operation that don't contain -/// other operations should be Tablegen'd. -namespace intrinsics { - -template -struct OperationBuilder : public OperationHandle { - template - OperationBuilder(Args... args) - : OperationHandle(OperationHandle::create(args...)) {} - OperationBuilder(ArrayRef vs) - : OperationHandle(OperationHandle::create(vs)) {} - template - OperationBuilder(ArrayRef vs, Args... args) - : OperationHandle(OperationHandle::create(vs, args...)) {} - template - OperationBuilder(T t, ArrayRef vs, Args... args) - : OperationHandle(OperationHandle::create(t, vs, args...)) {} - template - OperationBuilder(T1 t1, T2 t2, ArrayRef vs, Args... args) - : OperationHandle(OperationHandle::create(t1, t2, vs, args...)) {} - OperationBuilder() : OperationHandle(OperationHandle::create()) {} -}; - -} // namespace intrinsics -} // namespace edsc -} // namespace mlir - -#endif // MLIR_EDSC_INTRINSICS_H_ diff --git a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h index 020c66e07e854..8155820d63473 100644 --- a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h +++ b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h @@ -167,6 +167,7 @@ struct UnrankedMemRefType { //===----------------------------------------------------------------------===// // Small runtime support "lib" for vector.print lowering during codegen. //===----------------------------------------------------------------------===// +extern "C" MLIR_CRUNNERUTILS_EXPORT void print_i1(bool b); extern "C" MLIR_CRUNNERUTILS_EXPORT void print_i32(int32_t i); extern "C" MLIR_CRUNNERUTILS_EXPORT void print_i64(int64_t l); extern "C" MLIR_CRUNNERUTILS_EXPORT void print_f32(float f); diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h index 21c39baffeac6..1bd6f14ffb5fe 100644 --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -66,6 +66,11 @@ class AffineMap { static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context); + /// Returns an identity affine map (d0, ..., dn) -> (dp, ..., dn) on the most + /// minor dimensions. + static AffineMap getMinorIdentityMap(unsigned dims, unsigned results, + MLIRContext *context); + /// Returns an AffineMap representing a permutation. /// The permutation is expressed as a non-empty vector of integers. /// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with @@ -94,6 +99,10 @@ class AffineMap { /// dimensional identifiers. bool isIdentity() const; + /// Returns true if the map is a minor identity map, i.e. an identity affine + /// map (d0, ..., dn) -> (dp, ..., dn) on the most minor dimensions. + static bool isMinorIdentity(AffineMap map); + /// Returns true if this affine map is an empty map, i.e., () -> (). bool isEmpty() const; diff --git a/mlir/include/mlir/IR/Attributes.h b/mlir/include/mlir/IR/Attributes.h index 4774946619728..6d24cd0876485 100644 --- a/mlir/include/mlir/IR/Attributes.h +++ b/mlir/include/mlir/IR/Attributes.h @@ -296,6 +296,10 @@ class DictionaryAttr Attribute get(StringRef name) const; Attribute get(Identifier name) const; + /// Return the specified named attribute if present, None otherwise. + Optional getNamed(StringRef name) const; + Optional getNamed(Identifier name) const; + /// Support range iteration. using iterator = llvm::ArrayRef::iterator; iterator begin() const; @@ -303,6 +307,11 @@ class DictionaryAttr bool empty() const { return size() == 0; } size_t size() const; + /// Sorts the NamedAttributes in the array ordered by name as expected by + /// getWithSorted. + /// Requires: uniquely named attributes. + static void sort(SmallVectorImpl &array); + /// Methods for supporting type inquiry through isa, cast, and dyn_cast. static bool kindof(unsigned kind) { return kind == StandardAttributes::Dictionary; @@ -694,7 +703,8 @@ class DenseElementsAttr : public ElementsAttr { /// Constructs a dense elements attribute from an array of element values. /// Each element attribute value is expected to be an element of 'type'. - /// 'type' must be a vector or tensor with static shape. + /// 'type' must be a vector or tensor with static shape. If the element of + /// `type` is non-integer/index/float it is assumed to be a string type. static DenseElementsAttr get(ShapedType type, ArrayRef values); /// Constructs a dense integer elements attribute from an array of integer @@ -1471,26 +1481,26 @@ inline ::llvm::hash_code hash_value(Attribute arg) { } //===----------------------------------------------------------------------===// -// NamedAttributeList +// MutableDictionaryAttr //===----------------------------------------------------------------------===// -/// A NamedAttributeList is a mutable wrapper around a DictionaryAttr. It +/// A MutableDictionaryAttr is a mutable wrapper around a DictionaryAttr. It /// provides additional interfaces for adding, removing, replacing attributes /// within a DictionaryAttr. /// /// We assume there will be relatively few attributes on a given operation /// (maybe a dozen or so, but not hundreds or thousands) so we use linear /// searches for everything. -class NamedAttributeList { +class MutableDictionaryAttr { public: - NamedAttributeList(DictionaryAttr attrs = nullptr) + MutableDictionaryAttr(DictionaryAttr attrs = nullptr) : attrs((attrs && !attrs.empty()) ? attrs : nullptr) {} - NamedAttributeList(ArrayRef attributes); + MutableDictionaryAttr(ArrayRef attributes); - bool operator!=(const NamedAttributeList &other) const { + bool operator!=(const MutableDictionaryAttr &other) const { return !(*this == other); } - bool operator==(const NamedAttributeList &other) const { + bool operator==(const MutableDictionaryAttr &other) const { return attrs == other.attrs; } @@ -1508,6 +1518,10 @@ class NamedAttributeList { Attribute get(StringRef name) const; Attribute get(Identifier name) const; + /// Return the specified named attribute if present, None otherwise. + Optional getNamed(StringRef name) const; + Optional getNamed(Identifier name) const; + /// If the an attribute exists with the specified name, change it to the new /// value. Otherwise, add a new attribute with the specified name/value. void set(Identifier name, Attribute value); diff --git a/mlir/include/mlir/IR/Block.h b/mlir/include/mlir/IR/Block.h index 12f82f84b52a7..859fa1713ffde 100644 --- a/mlir/include/mlir/IR/Block.h +++ b/mlir/include/mlir/IR/Block.h @@ -156,54 +156,23 @@ class Block : public IRObjectWithUseList, /// Recomputes the ordering of child operations within the block. void recomputeOpOrder(); -private: - /// A utility iterator that filters out operations that are not 'OpT'. - template - class op_filter_iterator - : public llvm::filter_iterator { - static bool filter(Operation &op) { return llvm::isa(op); } - - public: - op_filter_iterator(Block::iterator it, Block::iterator end) - : llvm::filter_iterator( - it, end, &filter) {} - - /// Allow implicit conversion to the underlying block iterator. - operator Block::iterator() const { return this->wrapped(); } - }; - -public: /// This class provides iteration over the held operations of a block for a /// specific operation type. template - class op_iterator : public llvm::mapped_iterator, - OpT (*)(Operation &)> { - static OpT unwrap(Operation &op) { return cast(op); } - - public: - using reference = OpT; - - /// Initializes the iterator to the specified filter iterator. - op_iterator(op_filter_iterator it) - : llvm::mapped_iterator, OpT (*)(Operation &)>( - it, &unwrap) {} - - /// Allow implicit conversion to the underlying block iterator. - operator Block::iterator() const { return this->wrapped(); } - }; + using op_iterator = detail::op_iterator; /// Return an iterator range over the operations within this block that are of /// 'OpT'. template iterator_range> getOps() { auto endIt = end(); - return {op_filter_iterator(begin(), endIt), - op_filter_iterator(endIt, endIt)}; + return {detail::op_filter_iterator(begin(), endIt), + detail::op_filter_iterator(endIt, endIt)}; } template op_iterator op_begin() { - return op_filter_iterator(begin(), end()); + return detail::op_filter_iterator(begin(), end()); } template op_iterator op_end() { - return op_filter_iterator(end(), end()); + return detail::op_filter_iterator(end(), end()); } /// Return an iterator range over the operation within this block excluding diff --git a/mlir/include/mlir/IR/BlockSupport.h b/mlir/include/mlir/IR/BlockSupport.h index 3c246749c584c..f3dd6140420e4 100644 --- a/mlir/include/mlir/IR/BlockSupport.h +++ b/mlir/include/mlir/IR/BlockSupport.h @@ -75,10 +75,73 @@ class SuccessorRange final friend RangeBaseT; }; +//===----------------------------------------------------------------------===// +// Operation Iterators +//===----------------------------------------------------------------------===// + +namespace detail { +/// A utility iterator that filters out operations that are not 'OpT'. +template +class op_filter_iterator + : public llvm::filter_iterator { + static bool filter(Operation &op) { return llvm::isa(op); } + +public: + op_filter_iterator(IteratorT it, IteratorT end) + : llvm::filter_iterator(it, end, + &filter) {} + + /// Allow implicit conversion to the underlying iterator. + operator IteratorT() const { return this->wrapped(); } +}; + +/// This class provides iteration over the held operations of a block for a +/// specific operation type. +template +class op_iterator + : public llvm::mapped_iterator, + OpT (*)(Operation &)> { + static OpT unwrap(Operation &op) { return cast(op); } + +public: + using reference = OpT; + + /// Initializes the iterator to the specified filter iterator. + op_iterator(op_filter_iterator it) + : llvm::mapped_iterator, + OpT (*)(Operation &)>(it, &unwrap) {} + + /// Allow implicit conversion to the underlying block iterator. + operator IteratorT() const { return this->wrapped(); } +}; +} // end namespace detail } // end namespace mlir namespace llvm { +/// Provide support for hashing successor ranges. +template <> +struct DenseMapInfo { + static mlir::SuccessorRange getEmptyKey() { + auto *pointer = llvm::DenseMapInfo::getEmptyKey(); + return mlir::SuccessorRange(pointer, 0); + } + static mlir::SuccessorRange getTombstoneKey() { + auto *pointer = llvm::DenseMapInfo::getTombstoneKey(); + return mlir::SuccessorRange(pointer, 0); + } + static unsigned getHashValue(mlir::SuccessorRange value) { + return llvm::hash_combine_range(value.begin(), value.end()); + } + static bool isEqual(mlir::SuccessorRange lhs, mlir::SuccessorRange rhs) { + if (rhs.getBase() == getEmptyKey().getBase()) + return lhs.getBase() == getEmptyKey().getBase(); + if (rhs.getBase() == getTombstoneKey().getBase()) + return lhs.getBase() == getTombstoneKey().getBase(); + return lhs == rhs; + } +}; + //===----------------------------------------------------------------------===// // ilist_traits for Operation //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/Builders.h b/mlir/include/mlir/IR/Builders.h index bceba806024d8..a11da75f3ebf6 100644 --- a/mlir/include/mlir/IR/Builders.h +++ b/mlir/include/mlir/IR/Builders.h @@ -171,41 +171,84 @@ class Builder { /// automatically inserted at an insertion point. The builder is copyable. class OpBuilder : public Builder { public: + struct Listener; + /// Create a builder with the given context. - explicit OpBuilder(MLIRContext *ctx) : Builder(ctx) {} + explicit OpBuilder(MLIRContext *ctx, Listener *listener = nullptr) + : Builder(ctx), listener(listener) {} /// Create a builder and set the insertion point to the start of the region. - explicit OpBuilder(Region *region) : Builder(region->getContext()) { + explicit OpBuilder(Region *region, Listener *listener = nullptr) + : OpBuilder(region->getContext(), listener) { if (!region->empty()) setInsertionPoint(®ion->front(), region->front().begin()); } - explicit OpBuilder(Region ®ion) : OpBuilder(®ion) {} - - virtual ~OpBuilder(); + explicit OpBuilder(Region ®ion, Listener *listener = nullptr) + : OpBuilder(®ion, listener) {} /// Create a builder and set insertion point to the given operation, which /// will cause subsequent insertions to go right before it. - explicit OpBuilder(Operation *op) : Builder(op->getContext()) { + explicit OpBuilder(Operation *op, Listener *listener = nullptr) + : OpBuilder(op->getContext(), listener) { setInsertionPoint(op); } - OpBuilder(Block *block, Block::iterator insertPoint) - : OpBuilder(block->getParent()) { + OpBuilder(Block *block, Block::iterator insertPoint, + Listener *listener = nullptr) + : OpBuilder(block->getParent()->getContext(), listener) { setInsertionPoint(block, insertPoint); } /// Create a builder and set the insertion point to before the first operation - /// in the block but still inside th block. - static OpBuilder atBlockBegin(Block *block) { - return OpBuilder(block, block->begin()); + /// in the block but still inside the block. + static OpBuilder atBlockBegin(Block *block, Listener *listener = nullptr) { + return OpBuilder(block, block->begin(), listener); } /// Create a builder and set the insertion point to after the last operation /// in the block but still inside the block. - static OpBuilder atBlockEnd(Block *block) { - return OpBuilder(block, block->end()); + static OpBuilder atBlockEnd(Block *block, Listener *listener = nullptr) { + return OpBuilder(block, block->end(), listener); } + /// Create a builder and set the insertion point to before the block + /// terminator. + static OpBuilder atBlockTerminator(Block *block, + Listener *listener = nullptr) { + auto *terminator = block->getTerminator(); + assert(terminator != nullptr && "the block has no terminator"); + return OpBuilder(block, Block::iterator(terminator), listener); + } + + //===--------------------------------------------------------------------===// + // Listeners + //===--------------------------------------------------------------------===// + + /// This class represents a listener that may be used to hook into various + /// actions within an OpBuilder. + struct Listener { + virtual ~Listener(); + + /// Notification handler for when an operation is inserted into the builder. + /// `op` is the operation that was inserted. + virtual void notifyOperationInserted(Operation *op) {} + + /// Notification handler for when a block is created using the builder. + /// `block` is the block that was created. + virtual void notifyBlockCreated(Block *block) {} + }; + + /// Sets the listener of this builder to the one provided. + void setListener(Listener *newListener) { listener = newListener; } + + /// Returns the current listener of this builder, or nullptr if this builder + /// doesn't have a listener. + Listener *getListener() const { return listener; } + + //===--------------------------------------------------------------------===// + // Insertion Point Management + //===--------------------------------------------------------------------===// + /// This class represents a saved insertion point. class InsertPoint { public: @@ -296,21 +339,29 @@ class OpBuilder : public Builder { /// Returns the current insertion point of the builder. Block::iterator getInsertionPoint() const { return insertPoint; } - /// Insert the given operation at the current insertion point and return it. - virtual Operation *insert(Operation *op); + /// Returns the current block of the builder. + Block *getBlock() const { return block; } + + //===--------------------------------------------------------------------===// + // Block Creation + //===--------------------------------------------------------------------===// /// Add new block with 'argTypes' arguments and set the insertion point to the /// end of it. The block is inserted at the provided insertion point of /// 'parent'. - virtual Block *createBlock(Region *parent, Region::iterator insertPt = {}, - TypeRange argTypes = llvm::None); + Block *createBlock(Region *parent, Region::iterator insertPt = {}, + TypeRange argTypes = llvm::None); /// Add new block with 'argTypes' arguments and set the insertion point to the /// end of it. The block is placed before 'insertBefore'. Block *createBlock(Block *insertBefore, TypeRange argTypes = llvm::None); - /// Returns the current block of the builder. - Block *getBlock() const { return block; } + //===--------------------------------------------------------------------===// + // Operation Creation + //===--------------------------------------------------------------------===// + + /// Insert the given operation at the current insertion point and return it. + Operation *insert(Operation *op); /// Creates an operation given the fields represented as an OperationState. Operation *createOperation(const OperationState &state); @@ -319,10 +370,10 @@ class OpBuilder : public Builder { template OpTy create(Location location, Args &&... args) { OperationState state(location, OpTy::getOperationName()); - OpTy::build(this, state, std::forward(args)...); + OpTy::build(*this, state, std::forward(args)...); auto *op = createOperation(state); auto result = dyn_cast(op); - assert(result && "Builder didn't return the right type"); + assert(result && "builder didn't return the right type"); return result; } @@ -335,7 +386,7 @@ class OpBuilder : public Builder { // Create the operation without using 'createOperation' as we don't want to // insert it yet. OperationState state(location, OpTy::getOperationName()); - OpTy::build(this, state, std::forward(args)...); + OpTy::build(*this, state, std::forward(args)...); Operation *op = Operation::create(state); // Fold the operation. If successful destroy it, otherwise insert it. @@ -398,8 +449,13 @@ class OpBuilder : public Builder { } private: + /// The current block this builder is inserting into. Block *block = nullptr; + /// The insertion point within the block that this builder is inserting + /// before. Block::iterator insertPoint; + /// The optional listener for events of this builder. + Listener *listener; }; } // namespace mlir diff --git a/mlir/include/mlir/IR/CMakeLists.txt b/mlir/include/mlir/IR/CMakeLists.txt index 555b16fd29d03..06c177aff3a84 100644 --- a/mlir/include/mlir/IR/CMakeLists.txt +++ b/mlir/include/mlir/IR/CMakeLists.txt @@ -2,3 +2,10 @@ set(LLVM_TARGET_DEFINITIONS OpAsmInterface.td) mlir_tablegen(OpAsmInterface.h.inc -gen-op-interface-decls) mlir_tablegen(OpAsmInterface.cpp.inc -gen-op-interface-defs) add_public_tablegen_target(MLIROpAsmInterfacesIncGen) +add_dependencies(mlir-generic-headers MLIROpAsmInterfacesIncGen) + +set(LLVM_TARGET_DEFINITIONS SymbolInterfaces.td) +mlir_tablegen(SymbolInterfaces.h.inc -gen-op-interface-decls) +mlir_tablegen(SymbolInterfaces.cpp.inc -gen-op-interface-defs) +add_public_tablegen_target(MLIRSymbolInterfacesIncGen) +add_dependencies(mlir-generic-headers MLIRSymbolInterfacesIncGen) diff --git a/mlir/include/mlir/IR/Dialect.h b/mlir/include/mlir/IR/Dialect.h index 292c14db3f1f4..73d55c2260c0f 100644 --- a/mlir/include/mlir/IR/Dialect.h +++ b/mlir/include/mlir/IR/Dialect.h @@ -281,4 +281,14 @@ template struct DialectRegistration { } // namespace mlir +namespace llvm { +/// Provide isa functionality for Dialects. +template +struct isa_impl { + static inline bool doit(const ::mlir::Dialect &dialect) { + return T::getDialectNamespace() == dialect.getNamespace(); + } +}; +} // namespace llvm + #endif diff --git a/mlir/include/mlir/Analysis/Dominance.h b/mlir/include/mlir/IR/Dominance.h similarity index 98% rename from mlir/include/mlir/Analysis/Dominance.h rename to mlir/include/mlir/IR/Dominance.h index 7d6200d833319..27ea3c6948e7b 100644 --- a/mlir/include/mlir/Analysis/Dominance.h +++ b/mlir/include/mlir/IR/Dominance.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_ANALYSIS_DOMINANCE_H -#define MLIR_ANALYSIS_DOMINANCE_H +#ifndef MLIR_IR_DOMINANCE_H +#define MLIR_IR_DOMINANCE_H #include "mlir/IR/RegionGraphTraits.h" #include "llvm/Support/GenericDomTree.h" diff --git a/mlir/include/mlir/IR/Function.h b/mlir/include/mlir/IR/Function.h index 1b2dd19eedab1..e39b88091f2b6 100644 --- a/mlir/include/mlir/IR/Function.h +++ b/mlir/include/mlir/IR/Function.h @@ -32,9 +32,10 @@ namespace mlir { /// symbols referenced by name via a string attribute). class FuncOp : public Op { + OpTrait::PolyhedralScope, CallableOpInterface::Trait, + SymbolOpInterface::Trait> { public: using Op::Op; using Op::print; @@ -47,13 +48,13 @@ class FuncOp iterator_range attrs); static FuncOp create(Location location, StringRef name, FunctionType type, ArrayRef attrs, - ArrayRef argAttrs); + ArrayRef argAttrs); - static void build(Builder *builder, OperationState &result, StringRef name, + static void build(OpBuilder &builder, OperationState &result, StringRef name, FunctionType type, ArrayRef attrs); - static void build(Builder *builder, OperationState &result, StringRef name, + static void build(OpBuilder &builder, OperationState &result, StringRef name, FunctionType type, ArrayRef attrs, - ArrayRef argAttrs); + ArrayRef argAttrs); /// Operation hooks. static ParseResult parse(OpAsmParser &parser, OperationState &result); diff --git a/mlir/include/mlir/IR/FunctionSupport.h b/mlir/include/mlir/IR/FunctionSupport.h index 0787aad534d73..53c513023e294 100644 --- a/mlir/include/mlir/IR/FunctionSupport.h +++ b/mlir/include/mlir/IR/FunctionSupport.h @@ -243,7 +243,7 @@ class FunctionLike : public OpTrait::TraitBase { } /// Return all argument attributes of this function. - void getAllArgAttrs(SmallVectorImpl &result) { + void getAllArgAttrs(SmallVectorImpl &result) { for (unsigned i = 0, e = getNumArguments(); i != e; ++i) result.emplace_back(getArgAttrDict(i)); } @@ -270,8 +270,8 @@ class FunctionLike : public OpTrait::TraitBase { /// Set the attributes held by the argument at 'index'. void setArgAttrs(unsigned index, ArrayRef attributes); - void setArgAttrs(unsigned index, NamedAttributeList attributes); - void setAllArgAttrs(ArrayRef attributes) { + void setArgAttrs(unsigned index, MutableDictionaryAttr attributes); + void setAllArgAttrs(ArrayRef attributes) { assert(attributes.size() == getNumArguments()); for (unsigned i = 0, e = attributes.size(); i != e; ++i) setArgAttrs(i, attributes[i]); @@ -286,8 +286,8 @@ class FunctionLike : public OpTrait::TraitBase { } /// Remove the attribute 'name' from the argument at 'index'. - NamedAttributeList::RemoveResult removeArgAttr(unsigned index, - Identifier name); + MutableDictionaryAttr::RemoveResult removeArgAttr(unsigned index, + Identifier name); //===--------------------------------------------------------------------===// // Result Attributes @@ -306,7 +306,7 @@ class FunctionLike : public OpTrait::TraitBase { } /// Return all result attributes of this function. - void getAllResultAttrs(SmallVectorImpl &result) { + void getAllResultAttrs(SmallVectorImpl &result) { for (unsigned i = 0, e = getNumResults(); i != e; ++i) result.emplace_back(getResultAttrDict(i)); } @@ -333,8 +333,8 @@ class FunctionLike : public OpTrait::TraitBase { /// Set the attributes held by the result at 'index'. void setResultAttrs(unsigned index, ArrayRef attributes); - void setResultAttrs(unsigned index, NamedAttributeList attributes); - void setAllResultAttrs(ArrayRef attributes) { + void setResultAttrs(unsigned index, MutableDictionaryAttr attributes); + void setAllResultAttrs(ArrayRef attributes) { assert(attributes.size() == getNumResults()); for (unsigned i = 0, e = attributes.size(); i != e; ++i) setResultAttrs(i, attributes[i]); @@ -350,8 +350,8 @@ class FunctionLike : public OpTrait::TraitBase { } /// Remove the attribute 'name' from the result at 'index'. - NamedAttributeList::RemoveResult removeResultAttr(unsigned index, - Identifier name); + MutableDictionaryAttr::RemoveResult removeResultAttr(unsigned index, + Identifier name); protected: /// Returns the attribute entry name for the set of argument attributes at @@ -514,7 +514,7 @@ void FunctionLike::setArgAttrs( template void FunctionLike::setArgAttrs(unsigned index, - NamedAttributeList attributes) { + MutableDictionaryAttr attributes) { assert(index < getNumArguments() && "invalid argument number"); SmallString<8> nameOut; if (auto newAttr = attributes.getDictionary()) @@ -529,25 +529,25 @@ template void FunctionLike::setArgAttr(unsigned index, Identifier name, Attribute value) { auto curAttr = getArgAttrDict(index); - NamedAttributeList attrList(curAttr); - attrList.set(name, value); + MutableDictionaryAttr attrDict(curAttr); + attrDict.set(name, value); // If the attribute changed, then set the new arg attribute list. - if (curAttr != attrList.getDictionary()) - setArgAttrs(index, attrList); + if (curAttr != attrDict.getDictionary()) + setArgAttrs(index, attrDict); } /// Remove the attribute 'name' from the argument at 'index'. template -NamedAttributeList::RemoveResult +MutableDictionaryAttr::RemoveResult FunctionLike::removeArgAttr(unsigned index, Identifier name) { // Build an attribute list and remove the attribute at 'name'. - NamedAttributeList attrList(getArgAttrDict(index)); - auto result = attrList.remove(name); + MutableDictionaryAttr attrDict(getArgAttrDict(index)); + auto result = attrDict.remove(name); // If the attribute was removed, then update the argument dictionary. - if (result == NamedAttributeList::RemoveResult::Removed) - setArgAttrs(index, attrList); + if (result == MutableDictionaryAttr::RemoveResult::Removed) + setArgAttrs(index, attrDict); return result; } @@ -570,8 +570,8 @@ void FunctionLike::setResultAttrs( } template -void FunctionLike::setResultAttrs(unsigned index, - NamedAttributeList attributes) { +void FunctionLike::setResultAttrs( + unsigned index, MutableDictionaryAttr attributes) { assert(index < getNumResults() && "invalid result number"); SmallString<8> nameOut; if (auto newAttr = attributes.getDictionary()) @@ -587,25 +587,25 @@ template void FunctionLike::setResultAttr(unsigned index, Identifier name, Attribute value) { auto curAttr = getResultAttrDict(index); - NamedAttributeList attrList(curAttr); - attrList.set(name, value); + MutableDictionaryAttr attrDict(curAttr); + attrDict.set(name, value); // If the attribute changed, then set the new arg attribute list. - if (curAttr != attrList.getDictionary()) - setResultAttrs(index, attrList); + if (curAttr != attrDict.getDictionary()) + setResultAttrs(index, attrDict); } /// Remove the attribute 'name' from the result at 'index'. template -NamedAttributeList::RemoveResult +MutableDictionaryAttr::RemoveResult FunctionLike::removeResultAttr(unsigned index, Identifier name) { // Build an attribute list and remove the attribute at 'name'. - NamedAttributeList attrList(getResultAttrDict(index)); - auto result = attrList.remove(name); + MutableDictionaryAttr attrDict(getResultAttrDict(index)); + auto result = attrDict.remove(name); // If the attribute was removed, then update the result dictionary. - if (result == NamedAttributeList::RemoveResult::Removed) - setResultAttrs(index, attrList); + if (result == MutableDictionaryAttr::RemoveResult::Removed) + setResultAttrs(index, attrDict); return result; } diff --git a/mlir/include/mlir/IR/MLIRContext.h b/mlir/include/mlir/IR/MLIRContext.h index e1213889fe730..da0b0bd826ced 100644 --- a/mlir/include/mlir/IR/MLIRContext.h +++ b/mlir/include/mlir/IR/MLIRContext.h @@ -55,6 +55,15 @@ class MLIRContext { /// Enables creating operations in unregistered dialects. void allowUnregisteredDialects(bool allow = true); + /// Return true if multi-threading is enabled by the context. + bool isMultithreadingEnabled(); + + /// Set the flag specifying if multi-threading is disabled by the context. + void disableMultithreading(bool disable = true); + void enableMultithreading(bool enable = true) { + disableMultithreading(!enable); + } + /// Return true if we should attach the operation to diagnostics emitted via /// Operation::emit. bool shouldPrintOpOnDiagnostic(); diff --git a/mlir/include/mlir/IR/Module.h b/mlir/include/mlir/IR/Module.h index b02d10472b0da..e724dd034030d 100644 --- a/mlir/include/mlir/IR/Module.h +++ b/mlir/include/mlir/IR/Module.h @@ -30,15 +30,17 @@ class ModuleTerminatorOp; class ModuleOp : public Op< ModuleOp, OpTrait::ZeroOperands, OpTrait::ZeroResult, - OpTrait::IsIsolatedFromAbove, OpTrait::SymbolTable, - OpTrait::SingleBlockImplicitTerminator::Impl> { + OpTrait::IsIsolatedFromAbove, OpTrait::PolyhedralScope, + OpTrait::SymbolTable, + OpTrait::SingleBlockImplicitTerminator::Impl, + SymbolOpInterface::Trait> { public: using Op::Op; using Op::print; static StringRef getOperationName() { return "module"; } - static void build(Builder *builder, OperationState &result, + static void build(OpBuilder &builder, OperationState &result, Optional name = llvm::None); /// Construct a module from the given location with an optional name. @@ -95,6 +97,13 @@ class ModuleOp insertPt = Block::iterator(body->getTerminator()); body->getOperations().insert(insertPt, op); } + + //===--------------------------------------------------------------------===// + // SymbolOpInterface Methods + //===--------------------------------------------------------------------===// + + /// A ModuleOp may optionally define a symbol. + bool isOptionalSymbol() { return true; } }; /// The ModuleTerminatorOp is a special terminator operation for the body of a @@ -109,7 +118,7 @@ class ModuleTerminatorOp public: using Op::Op; static StringRef getOperationName() { return "module_terminator"; } - static void build(Builder *, OperationState &) {} + static void build(OpBuilder &, OperationState &) {} }; /// This class acts as an owning reference to a module, and will automatically diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 7679d8b1008e3..c6f144edb94df 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -1188,15 +1188,16 @@ class BitEnumAttr()">, - "dictionary of named attribute values"> { +class DictionaryAttrBase : + Attr { let storageType = [{ DictionaryAttr }]; let returnType = [{ DictionaryAttr }]; let valueType = NoneType; let convertFromStorage = "$_self"; } -def DictionaryAttr : DictionaryAttrBase; +def DictionaryAttr : DictionaryAttrBase()">, + "dictionary of named attribute values">; class ElementsAttrBase : Attr { @@ -1380,7 +1381,11 @@ class StructFieldAttr { // validation method and set of accessors for a fixed set of fields. This is // useful when representing data that would normally be in a structure. class StructAttr attributes> : DictionaryAttrBase { + list attributes> : + DictionaryAttrBase()">, + "DictionaryAttr with field(s): " # + StrJoin.result # + " (each field having its own constraints)"> { // Name for this StructAttr. string className = name; @@ -1435,7 +1440,7 @@ class DerivedAttr : let returnType = ret; code body = b; - // Specify how to convert from the derived attribute to an attibute. + // Specify how to convert from the derived attribute to an attribute. // // ## Special placeholders // @@ -1643,6 +1648,8 @@ def ConstantLike : NativeOpTrait<"ConstantLike">; def FunctionLike : NativeOpTrait<"FunctionLike">; // Op is isolated from above. def IsolatedFromAbove : NativeOpTrait<"IsIsolatedFromAbove">; +// Op defines a polyhedral scope. +def PolyhedralScope : NativeOpTrait<"PolyhedralScope">; // Op results are float or vectors/tensors thereof. def ResultsAreFloatLike : NativeOpTrait<"ResultsAreFloatLike">; // Op has the same operand type. @@ -1658,10 +1665,6 @@ def SameOperandsElementType : NativeOpTrait<"SameOperandsElementType">; // Op has the same operand and result element type (or type itself, if scalar). def SameOperandsAndResultElementType : NativeOpTrait<"SameOperandsAndResultElementType">; -// Op is a symbol. -def Symbol : NativeOpTrait<"Symbol">; -// Op defines a symbol table. -def SymbolTable : NativeOpTrait<"SymbolTable">; // Op is a terminator. def Terminator : NativeOpTrait<"IsTerminator">; @@ -1721,6 +1724,10 @@ class OpInterfaceTrait : NativeOpTrait<""> // Specify the body of the verification function. `$_op` will be replaced with // the operation being verified. code verify = verifyBody; + + // An optional code block containing extra declarations to place in the + // interface trait declaration. + code extraTraitClassDeclaration = ""; } // This class represents a single, optionally static, interface method. @@ -1773,12 +1780,20 @@ class OpInterface : OpInterfaceTrait { // Whether to declare the op interface methods in the op's header. This class // simply wraps an OpInterface but is used to indicate that the method -// declarations should be generated. -class DeclareOpInterfaceMethods : - OpInterface { +// declarations should be generated. This class takes an optional set of methods +// that should have declarations generated even if the method has a default +// implementation. +class DeclareOpInterfaceMethods overridenMethods = []> + : OpInterface { let description = interface.description; let cppClassName = interface.cppClassName; let methods = interface.methods; + + // This field contains a set of method names that should always have their + // declarations generated. This allows for generating declarations for + // methods with default implementations that need to be overridden. + list alwaysOverriddenMethods = overridenMethods; } //===----------------------------------------------------------------------===// @@ -1803,7 +1818,7 @@ def successor; // The signature of the builder is always // // ```c++ -// static void build(Builder *builder, OperationState &state, +// static void build(OpBuilder &builder, OperationState &state, // ...) { // ... // } @@ -1883,7 +1898,7 @@ class Op props = []> { // following signatures: // // ```c++ - // static void build(Builder *, OperationState &odsState, + // static void build(OpBuilder &, OperationState &odsState, // Type , Type , ..., // Value , Value , ..., // Attribute , Attribute , ...); @@ -1891,7 +1906,7 @@ class Op props = []> { // * where the attributes follow the same declaration order as in the op. // // ```c++ - // static void build(Builder *, OperationState &odsState, + // static void build(OpBuilder &, OperationState &odsState, // ArrayRef resultTypes, // ArrayRef operands, // ArrayRef attributes); diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h index f2b174c067ffe..b1830de88eeff 100644 --- a/mlir/include/mlir/IR/OpDefinition.h +++ b/mlir/include/mlir/IR/OpDefinition.h @@ -25,6 +25,7 @@ namespace mlir { class Builder; +class OpBuilder; namespace OpTrait { template class OneResult; @@ -77,14 +78,15 @@ namespace impl { /// region's only block if it does not have a terminator already. If the region /// is empty, insert a new block first. `buildTerminatorOp` should return the /// terminator operation to insert. -void ensureRegionTerminator(Region ®ion, Location loc, - function_ref buildTerminatorOp); +void ensureRegionTerminator( + Region ®ion, Location loc, + function_ref buildTerminatorOp); /// Templated version that fills the generates the provided operation type. template void ensureRegionTerminator(Region ®ion, Builder &builder, Location loc) { - ensureRegionTerminator(region, loc, [&] { + ensureRegionTerminator(region, loc, [&](OpBuilder &b) { OperationState state(loc, OpTy::getOperationName()); - OpTy::build(&builder, state); + OpTy::build(b, state); return Operation::create(state); }); } @@ -176,7 +178,7 @@ class OpState { void setAttrs(ArrayRef attributes) { state->setAttrs(attributes); } - void setAttrs(NamedAttributeList newAttrs) { state->setAttrs(newAttrs); } + void setAttrs(MutableDictionaryAttr newAttrs) { state->setAttrs(newAttrs); } /// Set the dialect attributes for this operation, and preserve all dependent. template void setDialectAttrs(DialectAttrs &&attrs) { @@ -185,10 +187,10 @@ class OpState { /// Remove the attribute with the specified name if it exists. The return /// value indicates whether the attribute was present or not. - NamedAttributeList::RemoveResult removeAttr(Identifier name) { + MutableDictionaryAttr::RemoveResult removeAttr(Identifier name) { return state->removeAttr(name); } - NamedAttributeList::RemoveResult removeAttr(StringRef name) { + MutableDictionaryAttr::RemoveResult removeAttr(StringRef name) { return state->removeAttr(Identifier::get(name, getContext())); } @@ -581,6 +583,13 @@ class OneRegion : public TraitBase { public: Region &getRegion() { return this->getOperation()->getRegion(0); } + /// Returns a range of operations within the region of this operation. + auto getOps() { return getRegion().getOps(); } + template + auto getOps() { + return getRegion().template getOps(); + } + static LogicalResult verifyTrait(Operation *op) { return impl::verifyOneRegion(op); } @@ -1032,6 +1041,21 @@ class IsIsolatedFromAbove } }; +/// A trait of region holding operations that defines a new scope for polyhedral +/// optimization purposes. Any SSA values of 'index' type that either dominate +/// such an operation or are used at the top-level of such an operation +/// automatically become valid symbols for the polyhedral scope defined by that +/// operation. For more details, see `Traits.md#PolyhedralScope`. +template +class PolyhedralScope : public TraitBase { +public: + static LogicalResult verifyTrait(Operation *op) { + static_assert(!ConcreteType::template hasTrait(), + "expected operation to have one or more regions"); + return success(); + } +}; + /// A trait of region holding operations that define a new scope for automatic /// allocations, i.e., allocations that are freed when control is transferred /// back from the operation's region. Any operations performing such allocations @@ -1093,6 +1117,12 @@ template struct SingleBlockImplicitTerminator { ::mlir::impl::template ensureRegionTerminator( region, builder, loc); } + + Block *getBody(unsigned idx = 0) { + Region ®ion = this->getOperation()->getRegion(idx); + assert(!region.empty() && "unexpected empty region"); + return ®ion.front(); + } }; }; @@ -1353,6 +1383,7 @@ class OpInterface : public Op { public: using Concept = typename Traits::Concept; template using Model = typename Traits::template Model; + using Base = OpInterface; OpInterface(Operation *op = nullptr) : Op(op), impl(op ? getInterfaceFor(op) : nullptr) { @@ -1415,7 +1446,7 @@ namespace impl { ParseResult parseOneResultOneOperandTypeOp(OpAsmParser &parser, OperationState &result); -void buildBinaryOp(Builder *builder, OperationState &result, Value lhs, +void buildBinaryOp(OpBuilder &builder, OperationState &result, Value lhs, Value rhs); ParseResult parseOneResultSameOperandTypeOp(OpAsmParser &parser, OperationState &result); @@ -1429,7 +1460,7 @@ void printOneResultOp(Operation *op, OpAsmPrinter &p); // These functions are out-of-line implementations of the methods in CastOp, // which avoids them being template instantiated/duplicated. namespace impl { -void buildCastOp(Builder *builder, OperationState &result, Value source, +void buildCastOp(OpBuilder &builder, OperationState &result, Value source, Type destType); ParseResult parseCastOp(OpAsmParser &parser, OperationState &result); void printCastOp(Operation *op, OpAsmPrinter &p); diff --git a/mlir/include/mlir/IR/Operation.h b/mlir/include/mlir/IR/Operation.h index 1dc0e787aa4f3..fcde73efd5661 100644 --- a/mlir/include/mlir/IR/Operation.h +++ b/mlir/include/mlir/IR/Operation.h @@ -36,11 +36,11 @@ class Operation final ArrayRef attributes, ArrayRef successors, unsigned numRegions); - /// Overload of create that takes an existing NamedAttributeList to avoid + /// Overload of create that takes an existing MutableDictionaryAttr to avoid /// unnecessarily uniquing a list of attributes. static Operation *create(Location location, OperationName name, ArrayRef resultTypes, ArrayRef operands, - NamedAttributeList attributes, + MutableDictionaryAttr attributes, ArrayRef successors, unsigned numRegions); /// Create a new Operation from the fields stored in `state`. @@ -49,7 +49,7 @@ class Operation final /// Create a new Operation with the specific fields. static Operation *create(Location location, OperationName name, ArrayRef resultTypes, ArrayRef operands, - NamedAttributeList attributes, + MutableDictionaryAttr attributes, ArrayRef successors = {}, RegionRange regions = {}); @@ -205,6 +205,14 @@ class Operation final /// 'operands'. void setOperands(ValueRange operands); + /// Replace the operands beginning at 'start' and ending at 'start' + 'length' + /// with the ones provided in 'operands'. 'operands' may be smaller or larger + /// than the range pointed to by 'start'+'length'. + void setOperands(unsigned start, unsigned length, ValueRange operands); + + /// Insert the given operands into the operand list at the given 'index'. + void insertOperands(unsigned index, ValueRange operands); + unsigned getNumOperands() { return LLVM_LIKELY(hasOperandStorage) ? getOperandStorage().size() : 0; } @@ -214,6 +222,15 @@ class Operation final return getOpOperand(idx).set(value); } + /// Erase the operand at position `idx`. + void eraseOperand(unsigned idx) { eraseOperands(idx); } + + /// Erase the operands starting at position `idx` and ending at position + /// 'idx'+'length'. + void eraseOperands(unsigned idx, unsigned length = 1) { + getOperandStorage().eraseOperands(idx, length); + } + // Support operand iteration. using operand_range = OperandRange; using operand_iterator = operand_range::iterator; @@ -221,12 +238,9 @@ class Operation final operand_iterator operand_begin() { return getOperands().begin(); } operand_iterator operand_end() { return getOperands().end(); } - /// Returns an iterator on the underlying Value's (Value ). + /// Returns an iterator on the underlying Value's. operand_range getOperands() { return operand_range(this); } - /// Erase the operand at position `idx`. - void eraseOperand(unsigned idx) { getOperandStorage().eraseOperand(idx); } - MutableArrayRef getOpOperands() { return LLVM_LIKELY(hasOperandStorage) ? getOperandStorage().getOperands() : MutableArrayRef(); @@ -280,13 +294,13 @@ class Operation final /// Return all of the attributes on this operation. ArrayRef getAttrs() { return attrs.getAttrs(); } - /// Return the internal attribute list on this operation. - NamedAttributeList &getAttrList() { return attrs; } + /// Return mutable container of all the attributes on this operation. + MutableDictionaryAttr &getMutableAttrDict() { return attrs; } - /// Set the attribute list on this operation. - /// Using a NamedAttributeList is more efficient as it does not require new + /// Set the attribute dictionary on this operation. + /// Using a MutableDictionaryAttr is more efficient as it does not require new /// uniquing in the MLIRContext. - void setAttrs(NamedAttributeList newAttrs) { attrs = newAttrs; } + void setAttrs(MutableDictionaryAttr newAttrs) { attrs = newAttrs; } /// Return the specified attribute if present, null otherwise. Attribute getAttr(Identifier name) { return attrs.get(name); } @@ -309,7 +323,7 @@ class Operation final /// Remove the attribute with the specified name if it exists. The return /// value indicates whether the attribute was present or not. - NamedAttributeList::RemoveResult removeAttr(Identifier name) { + MutableDictionaryAttr::RemoveResult removeAttr(Identifier name) { return attrs.remove(name); } @@ -540,6 +554,14 @@ class Operation final [](OpResult result) { return result.use_empty(); }); } + /// Returns true if the results of this operation are used outside of the + /// given block. + bool isUsedOutsideOfBlock(Block *block) { + return llvm::any_of(getOpResults(), [block](OpResult result) { + return result.isUsedOutsideOfBlock(block); + }); + } + //===--------------------------------------------------------------------===// // Users //===--------------------------------------------------------------------===// @@ -596,7 +618,7 @@ class Operation final private: Operation(Location location, OperationName name, ArrayRef resultTypes, unsigned numSuccessors, unsigned numRegions, - const NamedAttributeList &attributes, bool hasOperandStorage); + const MutableDictionaryAttr &attributes, bool hasOperandStorage); // Operations are deleted through the destroy() member because they are // allocated with malloc. @@ -658,7 +680,7 @@ class Operation final OperationName name; /// This holds general named attributes for the operation. - NamedAttributeList attrs; + MutableDictionaryAttr attrs; // allow ilist_traits access to 'block' field. friend struct llvm::ilist_traits; diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index 852404f4e0712..8c0a3f12d4260 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -20,6 +20,7 @@ #include "mlir/IR/Types.h" #include "mlir/IR/Value.h" #include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include "llvm/Support/TrailingObjects.h" @@ -369,8 +370,14 @@ class OperandStorage final /// 'values'. void setOperands(Operation *owner, ValueRange values); - /// Erase an operand held by the storage. - void eraseOperand(unsigned index); + /// Replace the operands beginning at 'start' and ending at 'start' + 'length' + /// with the ones provided in 'operands'. 'operands' may be smaller or larger + /// than the range pointed to by 'start'+'length'. + void setOperands(Operation *owner, unsigned start, unsigned length, + ValueRange operands); + + /// Erase the operands held by the storage within the given range. + void eraseOperands(unsigned start, unsigned length); /// Get the operation operands held by the storage. MutableArrayRef getOperands() { @@ -611,6 +618,17 @@ class ValueTypeRange final ValueTypeIterator>::iterator_range; template ValueTypeRange(Container &&c) : ValueTypeRange(c.begin(), c.end()) {} + + /// Compare this range with another. + template + bool operator==(const OtherT &other) const { + return llvm::size(*this) == llvm::size(other) && + std::equal(this->begin(), this->end(), other.begin()); + } + template + bool operator!=(const OtherT &other) const { + return !(*this == other); + } }; template @@ -653,6 +671,69 @@ class OperandRange final : public llvm::detail::indexed_accessor_range_base< friend RangeBaseT; }; +//===----------------------------------------------------------------------===// +// MutableOperandRange + +/// This class provides a mutable adaptor for a range of operands. It allows for +/// setting, inserting, and erasing operands from the given range. +class MutableOperandRange { +public: + /// A pair of a named attribute corresponding to an operand segment attribute, + /// and the index within that attribute. The attribute should correspond to an + /// i32 DenseElementsAttr. + using OperandSegment = std::pair; + + /// Construct a new mutable range from the given operand, operand start index, + /// and range length. `operandSegments` is an optional set of operand segments + /// to be updated when mutating the operand list. + MutableOperandRange(Operation *owner, unsigned start, unsigned length, + ArrayRef operandSegments = llvm::None); + MutableOperandRange(Operation *owner); + + /// Slice this range into a sub range, with the additional operand segment. + MutableOperandRange slice(unsigned subStart, unsigned subLen, + Optional segment = llvm::None); + + /// Append the given values to the range. + void append(ValueRange values); + + /// Assign this range to the given values. + void assign(ValueRange values); + + /// Assign the range to the given value. + void assign(Value value); + + /// Erase the operands within the given sub-range. + void erase(unsigned subStart, unsigned subLen = 1); + + /// Clear this range and erase all of the operands. + void clear(); + + /// Returns the current size of the range. + unsigned size() const { return length; } + + /// Allow implicit conversion to an OperandRange. + operator OperandRange() const; + + /// Returns the owning operation. + Operation *getOwner() const { return owner; } + +private: + /// Update the length of this range to the one provided. + void updateLength(unsigned newLength); + + /// The owning operation of this range. + Operation *owner; + + /// The start index of the operand range within the owner operand list, and + /// the length starting from `start`. + unsigned start, length; + + /// Optional set of operand segments that should be updated when mutating the + /// length of this range. + SmallVector, 1> operandSegments; +}; + //===----------------------------------------------------------------------===// // ResultRange @@ -752,6 +833,37 @@ class ValueRange final /// Allow access to `offset_base` and `dereference_iterator`. friend RangeBaseT; }; + +//===----------------------------------------------------------------------===// +// Operation Equivalency +//===----------------------------------------------------------------------===// + +/// This class provides utilities for computing if two operations are +/// equivalent. +struct OperationEquivalence { + enum Flags { + None = 0, + + /// This flag signals that operands should not be considered when checking + /// for equivalence. This allows for users to implement there own + /// equivalence schemes for operand values. The number of operands are still + /// checked, just not the operands themselves. + IgnoreOperands = 1, + + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ IgnoreOperands) + }; + + /// Compute a hash for the given operation. + static llvm::hash_code computeHash(Operation *op, Flags flags = Flags::None); + + /// Compare two operations and return if they are equivalent. + static bool isEquivalentTo(Operation *lhs, Operation *rhs, + Flags flags = Flags::None); +}; + +/// Enable Bitmask enums for OperationEquivalence::Flags. +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + } // end namespace mlir namespace llvm { diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h index 6dbc5b9664d02..0d125b3d81482 100644 --- a/mlir/include/mlir/IR/PatternMatch.h +++ b/mlir/include/mlir/IR/PatternMatch.h @@ -211,14 +211,14 @@ template struct OpRewritePattern : public RewritePattern { /// to apply patterns and observe their effects (e.g. to keep worklists or /// other data structures up to date). /// -class PatternRewriter : public OpBuilder { +class PatternRewriter : public OpBuilder, public OpBuilder::Listener { public: /// Create operation of specific op type at the current insertion point /// without verifying to see if it is valid. template OpTy create(Location location, Args... args) { OperationState state(location, OpTy::getOperationName()); - OpTy::build(this, state, args...); + OpTy::build(*this, state, args...); auto *op = createOperation(state); auto result = dyn_cast(op); assert(result && "Builder didn't return the right type"); @@ -231,7 +231,7 @@ class PatternRewriter : public OpBuilder { template OpTy createChecked(Location location, Args... args) { OperationState state(location, OpTy::getOperationName()); - OpTy::build(this, state, args...); + OpTy::build(*this, state, args...); auto *op = createOperation(state); // If the Operation we produce is valid, return it. @@ -247,10 +247,6 @@ class PatternRewriter : public OpBuilder { return OpTy(); } - /// This is implemented to insert the specified operation and serves as a - /// notification hook for rewriters that want to know about new operations. - virtual Operation *insert(Operation *op) = 0; - /// Move the blocks that belong to "region" before the given position in /// another region "parent". The two regions must be different. The caller /// is responsible for creating or updating the operation transferring flow @@ -349,11 +345,13 @@ class PatternRewriter : public OpBuilder { } protected: - explicit PatternRewriter(MLIRContext *ctx) : OpBuilder(ctx) {} - virtual ~PatternRewriter(); + /// Initialize the builder with this rewriter as the listener. + explicit PatternRewriter(MLIRContext *ctx) + : OpBuilder(ctx, /*listener=*/this) {} + ~PatternRewriter() override; - // These are the callback methods that subclasses can choose to implement if - // they would like to be notified about certain types of mutations. + /// These are the callback methods that subclasses can choose to implement if + /// they would like to be notified about certain types of mutations. /// Notify the pattern rewriter that the specified operation is about to be /// replaced with another set of operations. This is called before the uses diff --git a/mlir/include/mlir/IR/Region.h b/mlir/include/mlir/IR/Region.h index f824b65c646f7..0efa58ca4c0a0 100644 --- a/mlir/include/mlir/IR/Region.h +++ b/mlir/include/mlir/IR/Region.h @@ -34,6 +34,10 @@ class Region { /// parent container. The region must have a valid parent container. Location getLoc(); + //===--------------------------------------------------------------------===// + // Block list management + //===--------------------------------------------------------------------===// + using BlockListType = llvm::iplist; BlockListType &getBlocks() { return blocks; } @@ -58,6 +62,72 @@ class Region { return &Region::blocks; } + //===--------------------------------------------------------------------===// + // Operation list utilities + //===--------------------------------------------------------------------===// + + /// This class provides iteration over the held operations of blocks directly + /// within a region. + class OpIterator final + : public llvm::iterator_facade_base { + public: + /// Initialize OpIterator for a region, specify `end` to return the iterator + /// to last operation. + explicit OpIterator(Region *region, bool end = false); + + using llvm::iterator_facade_base::operator++; + OpIterator &operator++(); + Operation *operator->() const { return &*operation; } + Operation &operator*() const { return *operation; } + + /// Compare this iterator with another. + bool operator==(const OpIterator &rhs) const { + return operation == rhs.operation; + } + bool operator!=(const OpIterator &rhs) const { return !(*this == rhs); } + + private: + void skipOverBlocksWithNoOps(); + + /// The region whose operations are being iterated over. + Region *region; + /// The block of 'region' whose operations are being iterated over. + Region::iterator block; + /// The current operation within 'block'. + Block::iterator operation; + }; + + /// This class provides iteration over the held operations of a region for a + /// specific operation type. + template + using op_iterator = detail::op_iterator; + + /// Return iterators that walk the operations nested directly within this + /// region. + OpIterator op_begin() { return OpIterator(this); } + OpIterator op_end() { return OpIterator(this, /*end=*/true); } + iterator_range getOps() { return {op_begin(), op_end()}; } + + /// Return iterators that walk operations of type 'T' nested directly within + /// this region. + template op_iterator op_begin() { + return detail::op_filter_iterator(op_begin(), op_end()); + } + template op_iterator op_end() { + return detail::op_filter_iterator(op_end(), op_end()); + } + template iterator_range> getOps() { + auto endIt = op_end(); + return {detail::op_filter_iterator(op_begin(), endIt), + detail::op_filter_iterator(endIt, endIt)}; + } + + //===--------------------------------------------------------------------===// + // Misc. utilities + //===--------------------------------------------------------------------===// + /// Return the region containing this region or nullptr if the region is /// attached to a top-level operation. Region *getParentRegion(); @@ -120,6 +190,10 @@ class Region { /// they are to be deleted. void dropAllReferences(); + //===--------------------------------------------------------------------===// + // Operation Walkers + //===--------------------------------------------------------------------===// + /// Walk the operations in this region in postorder, calling the callback for /// each operation. This method is invoked for void-returning callbacks. /// See Operation::walk for more details. @@ -142,6 +216,10 @@ class Region { return WalkResult::advance(); } + //===--------------------------------------------------------------------===// + // CFG view utilities + //===--------------------------------------------------------------------===// + /// Displays the CFG in a window. This is for use from the debugger and /// depends on Graphviz to generate the graph. /// This function is defined in ViewRegionGraph and only works with that diff --git a/mlir/include/mlir/IR/StandardTypes.h b/mlir/include/mlir/IR/StandardTypes.h index 3241a70f26752..752f2a45f07dd 100644 --- a/mlir/include/mlir/IR/StandardTypes.h +++ b/mlir/include/mlir/IR/StandardTypes.h @@ -65,6 +65,41 @@ enum Kind { } // namespace StandardTypes +//===----------------------------------------------------------------------===// +// ComplexType +//===----------------------------------------------------------------------===// + +/// The 'complex' type represents a complex number with a parameterized element +/// type, which is composed of a real and imaginary value of that element type. +/// +/// The element must be a floating point or integer scalar type. +/// +class ComplexType + : public Type::TypeBase { +public: + using Base::Base; + + /// Get or create a ComplexType with the provided element type. + static ComplexType get(Type elementType); + + /// Get or create a ComplexType with the provided element type. This emits + /// and error at the specified location and returns null if the element type + /// isn't supported. + static ComplexType getChecked(Type elementType, Location location); + + /// Verify the construction of an integer type. + static LogicalResult verifyConstructionInvariants(Location loc, + Type elementType); + + Type getElementType(); + + static bool kindof(unsigned kind) { return kind == StandardTypes::Complex; } +}; + +//===----------------------------------------------------------------------===// +// IndexType +//===----------------------------------------------------------------------===// + /// Index is a special integer-like type with unknown platform-dependent bit /// width. class IndexType : public Type::TypeBase { @@ -81,6 +116,10 @@ class IndexType : public Type::TypeBase { static constexpr unsigned kInternalStorageBitWidth = 64; }; +//===----------------------------------------------------------------------===// +// IntegerType +//===----------------------------------------------------------------------===// + /// Integer types can have arbitrary bitwidth up to a large fixed limit. class IntegerType : public Type::TypeBase { @@ -145,6 +184,10 @@ class IntegerType static constexpr unsigned kMaxWidth = 4096; }; +//===----------------------------------------------------------------------===// +// FloatType +//===----------------------------------------------------------------------===// + class FloatType : public Type::TypeBase { public: using Base::Base; @@ -178,33 +221,26 @@ class FloatType : public Type::TypeBase { const llvm::fltSemantics &getFloatSemantics(); }; -/// The 'complex' type represents a complex number with a parameterized element -/// type, which is composed of a real and imaginary value of that element type. -/// -/// The element must be a floating point or integer scalar type. -/// -class ComplexType - : public Type::TypeBase { +//===----------------------------------------------------------------------===// +// NoneType +//===----------------------------------------------------------------------===// + +/// NoneType is a unit type, i.e. a type with exactly one possible value, where +/// its value does not have a defined dynamic representation. +class NoneType : public Type::TypeBase { public: using Base::Base; - /// Get or create a ComplexType with the provided element type. - static ComplexType get(Type elementType); - - /// Get or create a ComplexType with the provided element type. This emits - /// and error at the specified location and returns null if the element type - /// isn't supported. - static ComplexType getChecked(Type elementType, Location location); - - /// Verify the construction of an integer type. - static LogicalResult verifyConstructionInvariants(Location loc, - Type elementType); - - Type getElementType(); + /// Get an instance of the NoneType. + static NoneType get(MLIRContext *context); - static bool kindof(unsigned kind) { return kind == StandardTypes::Complex; } + static bool kindof(unsigned kind) { return kind == StandardTypes::None; } }; +//===----------------------------------------------------------------------===// +// ShapedType +//===----------------------------------------------------------------------===// + /// This is a common base class between Vector, UnrankedTensor, RankedTensor, /// and MemRef types because they share behavior and semantics around shape, /// rank, and fixed element type. Any type with these semantics should inherit @@ -291,6 +327,10 @@ class ShapedType : public Type { } }; +//===----------------------------------------------------------------------===// +// VectorType +//===----------------------------------------------------------------------===// + /// Vector types represent multi-dimensional SIMD vectors, and have a fixed /// known constant shape with one or more dimension. class VectorType @@ -326,6 +366,10 @@ class VectorType static bool kindof(unsigned kind) { return kind == StandardTypes::Vector; } }; +//===----------------------------------------------------------------------===// +// TensorType +//===----------------------------------------------------------------------===// + /// Tensor types represent multi-dimensional arrays, and have two variants: /// RankedTensorType and UnrankedTensorType. class TensorType : public ShapedType { @@ -350,6 +394,9 @@ class TensorType : public ShapedType { } }; +//===----------------------------------------------------------------------===// +// RankedTensorType + /// Ranked tensor types represent multi-dimensional arrays that have a shape /// with a fixed number of dimensions. Each shape element can be a non-negative /// integer or unknown (represented by -1). @@ -382,6 +429,9 @@ class RankedTensorType } }; +//===----------------------------------------------------------------------===// +// UnrankedTensorType + /// Unranked tensor types represent multi-dimensional arrays that have an /// unknown shape. class UnrankedTensorType @@ -411,6 +461,10 @@ class UnrankedTensorType } }; +//===----------------------------------------------------------------------===// +// BaseMemRefType +//===----------------------------------------------------------------------===// + /// Base MemRef for Ranked and Unranked variants class BaseMemRefType : public ShapedType { public: @@ -423,6 +477,9 @@ class BaseMemRefType : public ShapedType { } }; +//===----------------------------------------------------------------------===// +// MemRefType + /// MemRef types represent a region of memory that have a shape with a fixed /// number of dimensions. Each shape element can be a non-negative integer or /// unknown (represented by -1). MemRef types also have an affine map @@ -525,6 +582,9 @@ class MemRefType : public Type::TypeBase { -public: - using Base::Base; - - /// Get an instance of the NoneType. - static NoneType get(MLIRContext *context); - - static bool kindof(unsigned kind) { return kind == StandardTypes::None; } -}; +//===----------------------------------------------------------------------===// +// Type Utilities +//===----------------------------------------------------------------------===// /// Returns the strides of the MemRef if the layout map is in strided form. /// MemRefs with layout maps in strided form include: diff --git a/mlir/include/mlir/IR/SymbolInterfaces.td b/mlir/include/mlir/IR/SymbolInterfaces.td new file mode 100644 index 0000000000000..219ea6048f02d --- /dev/null +++ b/mlir/include/mlir/IR/SymbolInterfaces.td @@ -0,0 +1,155 @@ +//===- SymbolInterfaces.td - Interfaces for symbol ops -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a set of interfaces and traits that can be used to define +// properties of symbol and symbol table operations. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_IR_SYMBOLINTERFACES +#define MLIR_IR_SYMBOLINTERFACES + +include "mlir/IR/OpBase.td" + +//===----------------------------------------------------------------------===// +// SymbolOpInterface +//===----------------------------------------------------------------------===// + +def Symbol : OpInterface<"SymbolOpInterface"> { + let description = [{ + This interface describes an operation that may define a `Symbol`. A `Symbol` + operation resides immediately within a region that defines a `SymbolTable`. + See [Symbols and SymbolTables](SymbolsAndSymbolTables.md) for more details + and constraints on `Symbol` operations. + }]; + + let methods = [ + InterfaceMethod<"Returns the name of this symbol.", + "StringRef", "getName", (ins), [{ + // Don't rely on the trait implementation as optional symbol operations + // may override this. + return mlir::SymbolTable::getSymbolName(op); + }], /*defaultImplementation=*/[{ + return mlir::SymbolTable::getSymbolName(this->getOperation()); + }] + >, + InterfaceMethod<"Sets the name of this symbol.", + "void", "setName", (ins "StringRef":$name), [{}], + /*defaultImplementation=*/[{ + this->getOperation()->setAttr( + mlir::SymbolTable::getSymbolAttrName(), + StringAttr::get(name, this->getOperation()->getContext())); + }] + >, + InterfaceMethod<"Gets the visibility of this symbol.", + "mlir::SymbolTable::Visibility", "getVisibility", (ins), [{}], + /*defaultImplementation=*/[{ + return mlir::SymbolTable::getSymbolVisibility(this->getOperation()); + }] + >, + InterfaceMethod<"Sets the visibility of this symbol.", + "void", "setVisibility", (ins "mlir::SymbolTable::Visibility":$vis), [{}], + /*defaultImplementation=*/[{ + mlir::SymbolTable::setSymbolVisibility(this->getOperation(), vis); + }] + >, + InterfaceMethod<[{ + Get all of the uses of the current symbol that are nested within the + given operation 'from'. + Note: See mlir::SymbolTable::getSymbolUses for more details. + }], + "Optional<::mlir::SymbolTable::UseRange>", "getSymbolUses", + (ins "Operation *":$from), [{}], + /*defaultImplementation=*/[{ + return ::mlir::SymbolTable::getSymbolUses(this->getOperation(), from); + }] + >, + InterfaceMethod<[{ + Return if the current symbol is known to have no uses that are nested + within the given operation 'from'. + Note: See mlir::SymbolTable::symbolKnownUseEmpty for more details. + }], + "bool", "symbolKnownUseEmpty", (ins "Operation *":$from), [{}], + /*defaultImplementation=*/[{ + return ::mlir::SymbolTable::symbolKnownUseEmpty(this->getOperation(), + from); + }] + >, + InterfaceMethod<[{ + Attempt to replace all uses of the current symbol with the provided + symbol 'newSymbol' that are nested within the given operation 'from'. + Note: See mlir::SymbolTable::replaceAllSymbolUses for more details. + }], + "LogicalResult", "replaceAllSymbolUses", (ins "StringRef":$newSymbol, + "Operation *":$from), [{}], + /*defaultImplementation=*/[{ + return ::mlir::SymbolTable::replaceAllSymbolUses(this->getOperation(), + newSymbol, from); + }] + >, + InterfaceMethod<[{ + Returns true if this operation optionally defines a symbol based on the + presence of the symbol name. + }], + "bool", "isOptionalSymbol", (ins), [{}], + /*defaultImplementation=*/[{ return false; }] + >, + InterfaceMethod<[{ + Returns true if this operation can be discarded if it has no remaining + symbol uses. + }], + "bool", "canDiscardOnUseEmpty", (ins), [{}], + /*defaultImplementation=*/[{ + // By default, base this on the visibility alone. A symbol can be + // discarded as long as it is not public. Only public symbols may be + // visible from outside of the IR. + return getVisibility() != ::mlir::SymbolTable::Visibility::Public; + }] + >, + ]; + + let verify = [{ + // If this is an optional symbol, bail out early if possible. + auto concreteOp = cast($_op); + if (concreteOp.isOptionalSymbol()) { + if(!concreteOp.getAttr(::mlir::SymbolTable::getSymbolAttrName())) + return success(); + } + return ::mlir::detail::verifySymbol($_op); + }]; + + let extraClassDeclaration = [{ + using Visibility = mlir::SymbolTable::Visibility; + + /// Custom classof that handles the case where the symbol is optional. + static bool classof(Operation *op) { + return Base::classof(op) + && op->getAttr(::mlir::SymbolTable::getSymbolAttrName()); + } + + /// Returns true if this symbol has nested visibility. + bool isNested() { return getVisibility() == Visibility::Nested; } + /// Returns true if this symbol has private visibility. + bool isPrivate() { return getVisibility() == Visibility::Private; } + /// Returns true if this symbol has public visibility. + bool isPublic() { return getVisibility() == Visibility::Public; } + }]; + + let extraTraitClassDeclaration = [{ + using Visibility = mlir::SymbolTable::Visibility; + }]; +} + +//===----------------------------------------------------------------------===// +// Symbol Traits +//===----------------------------------------------------------------------===// + +// Op defines a symbol table. +def SymbolTable : NativeOpTrait<"SymbolTable">; + +#endif // MLIR_IR_SYMBOLINTERFACES diff --git a/mlir/include/mlir/IR/SymbolTable.h b/mlir/include/mlir/IR/SymbolTable.h index c61efb066e394..0b035836ec61d 100644 --- a/mlir/include/mlir/IR/SymbolTable.h +++ b/mlir/include/mlir/IR/SymbolTable.h @@ -72,9 +72,6 @@ class SymbolTable { Nested, }; - /// Returns true if the given operation defines a symbol. - static bool isSymbol(Operation *op); - /// Returns the name of the given symbol operation. static StringRef getSymbolName(Operation *symbol); /// Sets the name of the given symbol operation. @@ -89,6 +86,15 @@ class SymbolTable { /// nullptr if no valid parent symbol table could be found. static Operation *getNearestSymbolTable(Operation *from); + /// Walks all symbol table operations nested within, and including, `op`. For + /// each symbol table operation, the provided callback is invoked with the op + /// and a boolean signifying if the symbols within that symbol table can be + /// treated as if all uses within the IR are visible to the caller. + /// `allSymUsesVisible` identifies whether all of the symbol uses of symbols + /// within `op` are visible. + static void walkSymbolTables(Operation *op, bool allSymUsesVisible, + function_ref callback); + /// Returns the operation registered with the given symbol name with the /// regions of 'symbolTableOp'. 'symbolTableOp' is required to be an operation /// with the 'OpTrait::SymbolTable' trait. @@ -207,12 +213,12 @@ class SymbolTable { // SymbolTable Trait Types //===----------------------------------------------------------------------===// -namespace OpTrait { -namespace impl { +namespace detail { LogicalResult verifySymbolTable(Operation *op); LogicalResult verifySymbol(Operation *op); -} // namespace impl +} // namespace detail +namespace OpTrait { /// A trait used to provide symbol table functionalities to a region operation. /// This operation must hold exactly 1 region. Once attached, all operations /// that are directly within the region, i.e not including those within child @@ -224,7 +230,7 @@ template class SymbolTable : public TraitBase { public: static LogicalResult verifyTrait(Operation *op) { - return impl::verifySymbolTable(op); + return ::mlir::detail::verifySymbolTable(op); } /// Look up a symbol with the specified name, returning null if no such @@ -245,68 +251,11 @@ class SymbolTable : public TraitBase { } }; -/// A trait used to define a symbol that can be used on operations within a -/// symbol table. Operations using this trait must adhere to the following: -/// * Have a StringAttr attribute named 'SymbolTable::getSymbolAttrName()'. -template -class Symbol : public TraitBase { -public: - using Visibility = mlir::SymbolTable::Visibility; - - static LogicalResult verifyTrait(Operation *op) { - return impl::verifySymbol(op); - } - - /// Returns the name of this symbol. - StringRef getName() { - return this->getOperation() - ->template getAttrOfType( - mlir::SymbolTable::getSymbolAttrName()) - .getValue(); - } - - /// Set the name of this symbol. - void setName(StringRef name) { - this->getOperation()->setAttr( - mlir::SymbolTable::getSymbolAttrName(), - StringAttr::get(name, this->getOperation()->getContext())); - } - - /// Returns the visibility of the current symbol. - Visibility getVisibility() { - return mlir::SymbolTable::getSymbolVisibility(this->getOperation()); - } - - /// Sets the visibility of the current symbol. - void setVisibility(Visibility vis) { - mlir::SymbolTable::setSymbolVisibility(this->getOperation(), vis); - } - - /// Get all of the uses of the current symbol that are nested within the given - /// operation 'from'. - /// Note: See mlir::SymbolTable::getSymbolUses for more details. - Optional<::mlir::SymbolTable::UseRange> getSymbolUses(Operation *from) { - return ::mlir::SymbolTable::getSymbolUses(this->getOperation(), from); - } - - /// Return if the current symbol is known to have no uses that are nested - /// within the given operation 'from'. - /// Note: See mlir::SymbolTable::symbolKnownUseEmpty for more details. - bool symbolKnownUseEmpty(Operation *from) { - return ::mlir::SymbolTable::symbolKnownUseEmpty(this->getOperation(), from); - } +} // end namespace OpTrait - /// Attempt to replace all uses of the current symbol with the provided symbol - /// 'newSymbol' that are nested within the given operation 'from'. - /// Note: See mlir::SymbolTable::replaceAllSymbolUses for more details. - LLVM_NODISCARD LogicalResult replaceAllSymbolUses(StringRef newSymbol, - Operation *from) { - return ::mlir::SymbolTable::replaceAllSymbolUses(this->getOperation(), - newSymbol, from); - } -}; +/// Include the generated symbol interfaces. +#include "mlir/IR/SymbolInterfaces.h.inc" -} // end namespace OpTrait } // end namespace mlir #endif // MLIR_IR_SYMBOLTABLE_H diff --git a/mlir/include/mlir/IR/UseDefLists.h b/mlir/include/mlir/IR/UseDefLists.h index 06df9edbfa8b5..1a6319d2b2e8d 100644 --- a/mlir/include/mlir/IR/UseDefLists.h +++ b/mlir/include/mlir/IR/UseDefLists.h @@ -164,7 +164,8 @@ template class IROperand { other.back = nullptr; nextUse = nullptr; back = nullptr; - insertIntoCurrent(); + if (value) + insertIntoCurrent(); return *this; } diff --git a/mlir/include/mlir/IR/Value.h b/mlir/include/mlir/IR/Value.h index 95def76867929..78517309468dc 100644 --- a/mlir/include/mlir/IR/Value.h +++ b/mlir/include/mlir/IR/Value.h @@ -123,6 +123,9 @@ class Value { /// Return the Region in which this Value is defined. Region *getParentRegion(); + /// Return the Block in which this Value is defined. + Block *getParentBlock(); + //===--------------------------------------------------------------------===// // UseLists //===--------------------------------------------------------------------===// @@ -150,6 +153,9 @@ class Value { void replaceUsesWithIf(Value newValue, function_ref shouldReplace); + /// Returns true if the value is used outside of the given block. + bool isUsedOutsideOfBlock(Block *block); + //===--------------------------------------------------------------------===// // Uses diff --git a/mlir/include/mlir/Analysis/Verifier.h b/mlir/include/mlir/IR/Verifier.h similarity index 91% rename from mlir/include/mlir/Analysis/Verifier.h rename to mlir/include/mlir/IR/Verifier.h index aa37fe3327f04..d9d99dd09df04 100644 --- a/mlir/include/mlir/Analysis/Verifier.h +++ b/mlir/include/mlir/IR/Verifier.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_ANALYSIS_VERIFIER_H -#define MLIR_ANALYSIS_VERIFIER_H +#ifndef MLIR_IR_VERIFIER_H +#define MLIR_IR_VERIFIER_H namespace mlir { struct LogicalResult; diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h index 3647a31fd950b..1acfc99053846 100644 --- a/mlir/include/mlir/InitAllPasses.h +++ b/mlir/include/mlir/InitAllPasses.h @@ -34,6 +34,7 @@ #include "mlir/Dialect/LoopOps/Passes.h" #include "mlir/Dialect/Quant/Passes.h" #include "mlir/Dialect/SPIRV/Passes.h" +#include "mlir/Dialect/StandardOps/Transforms/Passes.h" #include "mlir/Transforms/LocationSnapshot.h" #include "mlir/Transforms/Passes.h" #include "mlir/Transforms/ViewOpGraph.h" @@ -86,6 +87,10 @@ inline void registerAllPasses() { // SPIR-V #define GEN_PASS_REGISTRATION #include "mlir/Dialect/SPIRV/Passes.h.inc" + + // Standard +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/StandardOps/Transforms/Passes.h.inc" } } // namespace mlir diff --git a/mlir/include/mlir/InitAllTranslations.h b/mlir/include/mlir/InitAllTranslations.h index 4952713c38286..8efae67df2052 100644 --- a/mlir/include/mlir/InitAllTranslations.h +++ b/mlir/include/mlir/InitAllTranslations.h @@ -17,6 +17,7 @@ namespace mlir { void registerFromLLVMIRTranslation(); +void registerFromSPIRVTranslation(); void registerToLLVMIRTranslation(); void registerToSPIRVTranslation(); void registerToNVVMIRTranslation(); @@ -29,6 +30,7 @@ void registerAVX512ToLLVMIRTranslation(); inline void registerAllTranslations() { static bool init_once = []() { registerFromLLVMIRTranslation(); + registerFromSPIRVTranslation(); registerToLLVMIRTranslation(); registerToSPIRVTranslation(); registerToNVVMIRTranslation(); diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt index fea8ffb2fc0ab..18efa97ab1009 100644 --- a/mlir/include/mlir/Interfaces/CMakeLists.txt +++ b/mlir/include/mlir/Interfaces/CMakeLists.txt @@ -2,33 +2,40 @@ set(LLVM_TARGET_DEFINITIONS CallInterfaces.td) mlir_tablegen(CallInterfaces.h.inc -gen-op-interface-decls) mlir_tablegen(CallInterfaces.cpp.inc -gen-op-interface-defs) add_public_tablegen_target(MLIRCallInterfacesIncGen) +add_dependencies(mlir-generic-headers MLIRCallInterfacesIncGen) set(LLVM_TARGET_DEFINITIONS ControlFlowInterfaces.td) mlir_tablegen(ControlFlowInterfaces.h.inc -gen-op-interface-decls) mlir_tablegen(ControlFlowInterfaces.cpp.inc -gen-op-interface-defs) add_public_tablegen_target(MLIRControlFlowInterfacesIncGen) +add_dependencies(mlir-generic-headers MLIRControlFlowInterfacesIncGen) set(LLVM_TARGET_DEFINITIONS DerivedAttributeOpInterface.td) mlir_tablegen(DerivedAttributeOpInterface.h.inc -gen-op-interface-decls) mlir_tablegen(DerivedAttributeOpInterface.cpp.inc -gen-op-interface-defs) add_public_tablegen_target(MLIRDerivedAttributeOpInterfaceIncGen) +add_dependencies(mlir-generic-headers MLIRDerivedAttributeOpInterfaceIncGen) set(LLVM_TARGET_DEFINITIONS InferTypeOpInterface.td) mlir_tablegen(InferTypeOpInterface.h.inc -gen-op-interface-decls) mlir_tablegen(InferTypeOpInterface.cpp.inc -gen-op-interface-defs) add_public_tablegen_target(MLIRInferTypeOpInterfaceIncGen) +add_dependencies(mlir-generic-headers MLIRInferTypeOpInterfaceIncGen) set(LLVM_TARGET_DEFINITIONS LoopLikeInterface.td) mlir_tablegen(LoopLikeInterface.h.inc -gen-op-interface-decls) mlir_tablegen(LoopLikeInterface.cpp.inc -gen-op-interface-defs) add_public_tablegen_target(MLIRLoopLikeInterfaceIncGen) +add_dependencies(mlir-generic-headers MLIRLoopLikeInterfaceIncGen) set(LLVM_TARGET_DEFINITIONS SideEffects.td) mlir_tablegen(SideEffectInterfaces.h.inc -gen-op-interface-decls) mlir_tablegen(SideEffectInterfaces.cpp.inc -gen-op-interface-defs) add_public_tablegen_target(MLIRSideEffectOpInterfacesIncGen) +add_dependencies(mlir-generic-headers MLIRSideEffectOpInterfacesIncGen) set(LLVM_TARGET_DEFINITIONS ViewLikeInterface.td) mlir_tablegen(ViewLikeInterface.h.inc -gen-op-interface-decls) mlir_tablegen(ViewLikeInterface.cpp.inc -gen-op-interface-defs) add_public_tablegen_target(MLIRViewLikeInterfaceIncGen) +add_dependencies(mlir-generic-headers MLIRViewLikeInterfaceIncGen) diff --git a/mlir/include/mlir/Interfaces/CallInterfaces.td b/mlir/include/mlir/Interfaces/CallInterfaces.td index 0ff189de68000..81ab52f197aaf 100644 --- a/mlir/include/mlir/Interfaces/CallInterfaces.td +++ b/mlir/include/mlir/Interfaces/CallInterfaces.td @@ -34,7 +34,8 @@ def CallOpInterface : OpInterface<"CallOpInterface"> { InterfaceMethod<[{ Returns the callee of this call-like operation. A `callee` is either a reference to a symbol, via SymbolRefAttr, or a reference to a defined - SSA value. + SSA value. If the reference is an SSA value, the SSA value corresponds + to a region of a lambda-like operation. }], "CallInterfaceCallable", "getCallableForCallee" >, diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h index e22454538343c..e18c46f745a2f 100644 --- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h +++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h @@ -24,11 +24,6 @@ class BranchOpInterface; //===----------------------------------------------------------------------===// namespace detail { -/// Erase an operand from a branch operation that is used as a successor -/// operand. `operandIndex` is the operand within `operands` to be erased. -void eraseBranchSuccessorOperand(OperandRange operands, unsigned operandIndex, - Operation *op); - /// Return the `BlockArgument` corresponding to operand `operandIndex` in some /// successor if `operandIndex` is within the range of `operands`, or None if /// `operandIndex` isn't a successor operand index. diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td index 5c02482394b7a..591ca11830e96 100644 --- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td +++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td @@ -27,29 +27,25 @@ def BranchOpInterface : OpInterface<"BranchOpInterface"> { }]; let methods = [ InterfaceMethod<[{ - Returns a set of values that correspond to the arguments to the + Returns a mutable range of operands that correspond to the arguments of successor at the given index. Returns None if the operands to the successor are non-materialized values, i.e. they are internal to the operation. }], - "Optional", "getSuccessorOperands", (ins "unsigned":$index) + "Optional", "getMutableSuccessorOperands", + (ins "unsigned":$index) >, InterfaceMethod<[{ - Return true if this operation can erase an operand to a successor block. - }], - "bool", "canEraseSuccessorOperand" - >, - InterfaceMethod<[{ - Erase the operand at `operandIndex` from the `index`-th successor. This - should only be called if `canEraseSuccessorOperand` returns true. + Returns a range of operands that correspond to the arguments of + successor at the given index. Returns None if the operands to the + successor are non-materialized values, i.e. they are internal to the + operation. }], - "void", "eraseSuccessorOperand", - (ins "unsigned":$index, "unsigned":$operandIndex), [{}], - /*defaultImplementation=*/[{ + "Optional", "getSuccessorOperands", + (ins "unsigned":$index), [{}], [{ ConcreteOp *op = static_cast(this); - Optional operands = op->getSuccessorOperands(index); - assert(operands && "unable to query operands for successor"); - detail::eraseBranchSuccessorOperand(*operands, operandIndex, *op); + auto operands = op->getMutableSuccessorOperands(index); + return operands ? Optional(*operands) : llvm::None; }] >, InterfaceMethod<[{ diff --git a/mlir/include/mlir/Pass/Pass.h b/mlir/include/mlir/Pass/Pass.h index 5e00984587553..7c0f9bd958a10 100644 --- a/mlir/include/mlir/Pass/Pass.h +++ b/mlir/include/mlir/Pass/Pass.h @@ -252,6 +252,9 @@ class Pass { /// Allow access to 'clone' and 'run'. friend class OpPassManager; + /// Allow access to 'run'. + friend class PassManager; + /// Allow access to 'passOptions'. friend class PassInfo; }; diff --git a/mlir/include/mlir/Pass/PassManager.h b/mlir/include/mlir/Pass/PassManager.h index 3117d0f33dc6b..be5c5d5bcc22c 100644 --- a/mlir/include/mlir/Pass/PassManager.h +++ b/mlir/include/mlir/Pass/PassManager.h @@ -99,13 +99,16 @@ class OpPassManager { void mergeStatisticsInto(OpPassManager &other); private: - OpPassManager(OperationName name, bool disableThreads, bool verifyPasses); + OpPassManager(OperationName name, bool verifyPasses); /// A pointer to an internal implementation instance. std::unique_ptr impl; /// Allow access to the constructor. friend class PassManager; + + /// Allow access. + friend detail::OpPassManagerImpl; }; //===----------------------------------------------------------------------===// @@ -136,17 +139,13 @@ class PassManager : public OpPassManager { LLVM_NODISCARD LogicalResult run(ModuleOp module); - /// Disable support for multi-threading within the pass manager. - void disableMultithreading(bool disable = true); - - /// Return true if the pass manager is configured with multi-threading - /// enabled. - bool isMultithreadingEnabled(); - /// Enable support for the pass manager to generate a reproducer on the event /// of a crash or a pass failure. `outputFile` is a .mlir filename used to - /// write the generated reproducer. - void enableCrashReproducerGeneration(StringRef outputFile); + /// write the generated reproducer. If `genLocalReproducer` is true, the pass + /// manager will attempt to generate a local reproducer that contains the + /// smallest pipeline. + void enableCrashReproducerGeneration(StringRef outputFile, + bool genLocalReproducer = false); //===--------------------------------------------------------------------===// // Instrumentations @@ -229,12 +228,38 @@ class PassManager : public OpPassManager { //===--------------------------------------------------------------------===// // Pass Timing + /// A configuration struct provided to the pass timing feature. + class PassTimingConfig { + public: + using PrintCallbackFn = function_ref; + + /// Initialize the configuration. + /// * 'displayMode' switch between list or pipeline display (see the + /// `PassDisplayMode` enum documentation). + explicit PassTimingConfig( + PassDisplayMode displayMode = PassDisplayMode::Pipeline) + : displayMode(displayMode) {} + + virtual ~PassTimingConfig(); + + /// A hook that may be overridden by a derived config to control the + /// printing. The callback is supplied by the framework and the config is + /// responsible to call it back with a stream for the output. + virtual void printTiming(PrintCallbackFn printCallback); + + /// Return the `PassDisplayMode` this config was created with. + PassDisplayMode getDisplayMode() { return displayMode; } + + private: + PassDisplayMode displayMode; + }; + /// Add an instrumentation to time the execution of passes and the computation /// of analyses. /// Note: Timing should be enabled after all other instrumentations to avoid /// any potential "ghost" timing from other instrumentations being /// unintentionally included in the timing results. - void enableTiming(PassDisplayMode displayMode = PassDisplayMode::Pipeline); + void enableTiming(std::unique_ptr config = nullptr); /// Prompts the pass manager to print the statistics collected for each of the /// held passes after each call to 'run'. @@ -245,8 +270,12 @@ class PassManager : public OpPassManager { /// Dump the statistics of the passes within this pass manager. void dumpStatistics(); - /// Flag that specifies if pass timing is enabled. - bool passTiming : 1; + /// Run the pass manager with crash recover enabled. + LogicalResult runWithCrashRecovery(ModuleOp module, AnalysisManager am); + /// Run the given passes with crash recover enabled. + LogicalResult + runWithCrashRecovery(MutableArrayRef> passes, + ModuleOp module, AnalysisManager am); /// Flag that specifies if pass statistics should be dumped. Optional passStatisticsMode; @@ -256,6 +285,12 @@ class PassManager : public OpPassManager { /// An optional filename to use when generating a crash reproducer if valid. Optional crashReproducerFileName; + + /// Flag that specifies if pass timing is enabled. + bool passTiming : 1; + + /// Flag that specifies if the generated crash reproducer should be local. + bool localReproducer : 1; }; /// Register a set of useful command-line options that can be used to configure diff --git a/mlir/include/mlir/Support/LLVM.h b/mlir/include/mlir/Support/LLVM.h index 1887188bf3e2f..17e020442eb48 100644 --- a/mlir/include/mlir/Support/LLVM.h +++ b/mlir/include/mlir/Support/LLVM.h @@ -25,38 +25,54 @@ // Forward declarations. namespace llvm { -// Containers. +// String types +template +class SmallString; class StringRef; class StringLiteral; class Twine; -template class SmallPtrSetImpl; -template class SmallPtrSet; -template class SmallVectorImpl; -template class SmallVector; -template class SmallString; + +// Containers. template class ArrayRef; -template class MutableArrayRef; -template class TinyPtrVector; -template class Optional; -template class PointerUnion; namespace detail { template struct DenseMapPair; -} -template struct DenseMapInfo; -template class DenseSet; +} // namespace detail template class DenseMap; -template class function_ref; -template class iterator_range; -template class TypeSwitch; +template +struct DenseMapInfo; +template +class DenseSet; class MallocAllocator; +template +class MutableArrayRef; +template +class Optional; +template +class PointerUnion; +template +class SmallPtrSet; +template +class SmallPtrSetImpl; +template +class SmallVector; +template +class SmallVectorImpl; template class StringSet; +template +class TinyPtrVector; +template +class TypeSwitch; // Other common classes. -class raw_ostream; class APInt; class APFloat; +template +class function_ref; +template +class iterator_range; +class raw_ostream; } // end namespace llvm namespace mlir { @@ -68,6 +84,14 @@ using llvm::dyn_cast_or_null; using llvm::isa; using llvm::isa_and_nonnull; +// String types +using llvm::SmallString; +using llvm::StringLiteral; +using llvm::StringRef; +using llvm::Twine; + +// Container Related types +// // Containers. using llvm::ArrayRef; using llvm::DenseMapInfo; @@ -79,27 +103,24 @@ template > using DenseSet = llvm::DenseSet; template using StringSet = llvm::StringSet; -template using function_ref = llvm::function_ref; -using llvm::iterator_range; using llvm::MutableArrayRef; using llvm::None; using llvm::Optional; using llvm::PointerUnion; using llvm::SmallPtrSet; using llvm::SmallPtrSetImpl; -using llvm::SmallString; using llvm::SmallVector; using llvm::SmallVectorImpl; -using llvm::StringLiteral; -using llvm::StringRef; using llvm::TinyPtrVector; -using llvm::Twine; template using TypeSwitch = llvm::TypeSwitch; // Other common classes. using llvm::APFloat; using llvm::APInt; +template +using function_ref = llvm::function_ref; +using llvm::iterator_range; using llvm::raw_ostream; } // namespace mlir diff --git a/mlir/include/mlir/Support/StorageUniquer.h b/mlir/include/mlir/Support/StorageUniquer.h index 62a43ff6d1fed..f13a2fef9d50e 100644 --- a/mlir/include/mlir/Support/StorageUniquer.h +++ b/mlir/include/mlir/Support/StorageUniquer.h @@ -65,6 +65,9 @@ class StorageUniquer { StorageUniquer(); ~StorageUniquer(); + /// Set the flag specifying if multi-threading is disabled within the uniquer. + void disableMultithreading(bool disable = true); + /// This class acts as the base storage that all storage classes must derived /// from. class BaseStorage { diff --git a/mlir/include/mlir/TableGen/OpInterfaces.h b/mlir/include/mlir/TableGen/OpInterfaces.h index 2e1a63cf66362..0e1b943ce382e 100644 --- a/mlir/include/mlir/TableGen/OpInterfaces.h +++ b/mlir/include/mlir/TableGen/OpInterfaces.h @@ -89,6 +89,9 @@ class OpInterface { // Return the interfaces extra class declaration code. llvm::Optional getExtraClassDeclaration() const; + // Return the traits extra class declaration code. + llvm::Optional getExtraTraitClassDeclaration() const; + // Return the verify method body if it has one. llvm::Optional getVerify() const; diff --git a/mlir/include/mlir/TableGen/OpTrait.h b/mlir/include/mlir/TableGen/OpTrait.h index 2d212f7a9d7c8..269c6393e4341 100644 --- a/mlir/include/mlir/TableGen/OpTrait.h +++ b/mlir/include/mlir/TableGen/OpTrait.h @@ -15,6 +15,7 @@ #include "mlir/Support/LLVM.h" #include "llvm/ADT/StringRef.h" +#include namespace llvm { class Init; @@ -105,6 +106,10 @@ class InterfaceOpTrait : public OpTrait { // Whether the declaration of methods for this trait should be emitted. bool shouldDeclareMethods() const; + + // Returns the methods that should always be declared if this interface is + // emitting declarations. + std::vector getAlwaysDeclaredMethods() const; }; } // end namespace tblgen diff --git a/mlir/include/mlir/Transforms/BufferPlacement.h b/mlir/include/mlir/Transforms/BufferPlacement.h new file mode 100644 index 0000000000000..013a55f81f60f --- /dev/null +++ b/mlir/include/mlir/Transforms/BufferPlacement.h @@ -0,0 +1,149 @@ +//===- BufferPlacement.h - Buffer Assignment Utilities ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header file defines buffer assignment helper methods to compute correct +// and valid positions for placing Alloc and Dealloc operations. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TRANSFORMS_BUFFERPLACEMENT_H +#define MLIR_TRANSFORMS_BUFFERPLACEMENT_H + +#include "mlir/Analysis/Liveness.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Dominance.h" +#include "mlir/IR/Operation.h" +#include "mlir/Transforms/DialectConversion.h" + +namespace mlir { + +/// Prepares a buffer placement phase. It can place (user-defined) alloc +/// nodes. This simplifies the integration of the actual buffer-placement +/// pass. Sample usage: +/// BufferAssignmentPlacer baHelper(regionOp); +/// -> determine alloc positions +/// auto allocPosition = baHelper.computeAllocPosition(value); +/// -> place alloc +/// allocBuilder.setInsertionPoint(positions.getAllocPosition()); +/// +/// Note: this class is intended to be used during legalization. In order +/// to move alloc and dealloc nodes into the right places you can use the +/// createBufferPlacementPass() function. +class BufferAssignmentPlacer { +public: + /// Creates a new assignment builder. + explicit BufferAssignmentPlacer(Operation *op); + + /// Returns the operation this analysis was constructed from. + Operation *getOperation() const { return operation; } + + /// Computes the actual position to place allocs for the given result. + OpBuilder::InsertPoint computeAllocPosition(OpResult result); + +private: + /// The operation this analysis was constructed from. + Operation *operation; +}; + +/// Helper conversion pattern that encapsulates a BufferAssignmentPlacer +/// instance. Sample usage: +/// class CustomConversionPattern : public +/// BufferAssignmentOpConversionPattern +/// { +/// ... matchAndRewrite(...) { +/// -> Access stored BufferAssignmentPlacer +/// bufferAssignment->computeAllocPosition(resultOp); +/// } +/// }; +template +class BufferAssignmentOpConversionPattern + : public OpConversionPattern { +public: + explicit BufferAssignmentOpConversionPattern( + MLIRContext *context, BufferAssignmentPlacer *bufferAssignment = nullptr, + TypeConverter *converter = nullptr, PatternBenefit benefit = 1) + : OpConversionPattern(context, benefit), + bufferAssignment(bufferAssignment), converter(converter) {} + +protected: + BufferAssignmentPlacer *bufferAssignment; + TypeConverter *converter; +}; + +/// This conversion adds an extra argument for each function result which makes +/// the converted function a void function. A type converter must be provided +/// for this conversion to convert a non-shaped type to memref. +/// BufferAssignmentTypeConverter is an helper TypeConverter for this +/// purpose. All the non-shaped type of the input function will be converted to +/// memref. +class FunctionAndBlockSignatureConverter + : public BufferAssignmentOpConversionPattern { +public: + using BufferAssignmentOpConversionPattern< + FuncOp>::BufferAssignmentOpConversionPattern; + + /// Performs the actual signature rewriting step. + LogicalResult + matchAndRewrite(FuncOp funcOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final; +}; + +/// This pattern converter transforms a non-void ReturnOpSourceTy into a void +/// return of type ReturnOpTargetTy. It uses a copy operation of type CopyOpTy +/// to copy the results to the output buffer. +template +class NonVoidToVoidReturnOpConverter + : public BufferAssignmentOpConversionPattern { +public: + using BufferAssignmentOpConversionPattern< + ReturnOpSourceTy>::BufferAssignmentOpConversionPattern; + + /// Performs the actual return-op conversion step. + LogicalResult + matchAndRewrite(ReturnOpSourceTy returnOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + unsigned numReturnValues = returnOp.getNumOperands(); + Block &entryBlock = returnOp.getParentRegion()->front(); + unsigned numFuncArgs = entryBlock.getNumArguments(); + Location loc = returnOp.getLoc(); + + // Find the corresponding output buffer for each operand. + assert(numReturnValues <= numFuncArgs && + "The number of operands of return operation is more than the " + "number of function argument."); + unsigned firstReturnParameter = numFuncArgs - numReturnValues; + for (auto operand : llvm::enumerate(operands)) { + unsigned returnArgNumber = firstReturnParameter + operand.index(); + BlockArgument dstBuffer = entryBlock.getArgument(returnArgNumber); + if (dstBuffer == operand.value()) + continue; + + // Insert the copy operation to copy before the return. + rewriter.setInsertionPoint(returnOp); + rewriter.create(loc, operand.value(), + entryBlock.getArgument(returnArgNumber)); + } + // Insert the new target return operation. + rewriter.replaceOpWithNewOp(returnOp); + return success(); + } +}; + +/// A helper type converter class for using inside Buffer Assignment operation +/// conversion patterns. The default constructor keeps all the types intact +/// except for the ranked-tensor types which is converted to memref types. +class BufferAssignmentTypeConverter : public TypeConverter { +public: + BufferAssignmentTypeConverter(); +}; + +} // end namespace mlir + +#endif // MLIR_TRANSFORMS_BUFFERPLACEMENT_H diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 2298b3bb3c732..31b5e04c9dbd7 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -348,9 +348,8 @@ class ConversionPatternRewriter final : public PatternRewriter { /// implemented for dialect conversion. void eraseBlock(Block *block) override; - /// PatternRewriter hook for creating a new block with the given arguments. - Block *createBlock(Region *parent, Region::iterator insertPt = {}, - TypeRange argTypes = llvm::None) override; + /// PatternRewriter hook creating a new block. + void notifyBlockCreated(Block *block) override; /// PatternRewriter hook for splitting a block into two parts. Block *splitBlock(Block *block, Block::iterator before) override; @@ -373,7 +372,7 @@ class ConversionPatternRewriter final : public PatternRewriter { using PatternRewriter::cloneRegionBefore; /// PatternRewriter hook for inserting a new operation. - Operation *insert(Operation *op) override; + void notifyOperationInserted(Operation *op) override; /// PatternRewriter hook for updating the root operation in-place. /// Note: These methods only track updates to the top-level operation itself, @@ -660,20 +659,25 @@ class ConversionTarget { /// ConversionPatternRewriter, to see what additional constraints are imposed on /// the use of the PatternRewriter. -/// Apply a partial conversion on the given operations, and all nested +/// Apply a partial conversion on the given operations and all nested /// operations. This method converts as many operations to the target as /// possible, ignoring operations that failed to legalize. This method only -/// returns failure if there are unreachable blocks in any of the regions nested -/// within 'ops'. If 'converter' is provided, the signatures of blocks and -/// regions are also converted. +/// returns failure if there ops explicitly marked as illegal. If `converter` is +/// provided, the signatures of blocks and regions are also converted. +/// If an `unconvertedOps` set is provided, all operations that are found not +/// to be legalizable to the given `target` are placed within that set. (Note +/// that if there is an op explicitly marked as illegal, the conversion +/// terminates and the `unconvertedOps` set will not necessarily be complete.) LLVM_NODISCARD LogicalResult applyPartialConversion(ArrayRef ops, ConversionTarget &target, const OwningRewritePatternList &patterns, - TypeConverter *converter = nullptr); + TypeConverter *converter = nullptr, + DenseSet *unconvertedOps = nullptr); LLVM_NODISCARD LogicalResult applyPartialConversion(Operation *op, ConversionTarget &target, const OwningRewritePatternList &patterns, - TypeConverter *converter = nullptr); + TypeConverter *converter = nullptr, + DenseSet *unconvertedOps = nullptr); /// Apply a complete conversion on the given operations, and all nested /// operations. This method returns failure if the conversion of any operation diff --git a/mlir/include/mlir/Transforms/FoldUtils.h b/mlir/include/mlir/Transforms/FoldUtils.h index 4155533b122ec..f8c678d11b6a9 100644 --- a/mlir/include/mlir/Transforms/FoldUtils.h +++ b/mlir/include/mlir/Transforms/FoldUtils.h @@ -81,7 +81,7 @@ class OperationFolder { // fold). Using create methods of the builder will insert the op, so not // using it here. OperationState state(location, OpTy::getOperationName()); - OpTy::build(&builder, state, std::forward(args)...); + OpTy::build(builder, state, std::forward(args)...); Operation *op = Operation::create(state); if (failed(tryToFold(builder, op, results)) || op->getNumResults() == 0) { diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h index 2f38a24236e3a..7a07b6db23fce 100644 --- a/mlir/include/mlir/Transforms/LoopUtils.h +++ b/mlir/include/mlir/Transforms/LoopUtils.h @@ -38,8 +38,9 @@ LogicalResult loopUnrollFull(AffineForOp forOp); /// Unrolls this for operation by the specified unroll factor. Returns failure /// if the loop cannot be unrolled either due to restrictions or due to invalid -/// unroll factors. +/// unroll factors. Requires positive loop bounds and step. LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor); +LogicalResult loopUnrollByFactor(loop::ForOp forOp, uint64_t unrollFactor); /// Unrolls this loop by the specified unroll factor or its trip count, /// whichever is lower. @@ -68,9 +69,10 @@ LogicalResult loopUnrollJamByFactor(AffineForOp forOp, LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp, uint64_t unrollJamFactor); -/// Promotes the loop body of a AffineForOp to its containing block if the -/// AffineForOp was known to have a single iteration. +/// Promotes the loop body of a AffineForOp/loop::ForOp to its containing block +/// if the loop was known to have a single iteration. LogicalResult promoteIfSingleIteration(AffineForOp forOp); +LogicalResult promoteIfSingleIteration(loop::ForOp forOp); /// Promotes all single iteration AffineForOp's in the Function, i.e., moves /// their body into the containing Block. diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h index 70ac4abcb9a21..f54333df2dd62 100644 --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -26,6 +26,9 @@ class ModuleOp; class Pass; template class OperationPass; +/// Creates an instance of the BufferPlacement pass. +std::unique_ptr createBufferPlacementPass(); + /// Creates an instance of the Canonicalizer pass. std::unique_ptr createCanonicalizerPass(); diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td index 8b1671b13a8fb..4cc7d631fb94a 100644 --- a/mlir/include/mlir/Transforms/Passes.td +++ b/mlir/include/mlir/Transforms/Passes.td @@ -102,6 +102,68 @@ def AffinePipelineDataTransfer let constructor = "mlir::createPipelineDataTransferPass()"; } +def BufferPlacement : Pass<"buffer-placement"> { + let summary = "Optimizes placement of alloc and dealloc operations"; + let description = [{ + This pass implements an algorithm to optimize the placement of alloc and + dealloc operations. This pass also inserts missing dealloc operations + automatically to reclaim memory. + + + Input + + ```mlir + #map0 = affine_map<(d0) -> (d0)> + module { + func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + cond_br %arg0, ^bb1, ^bb2 + ^bb1: + br ^bb3(%arg1 : memref<2xf32>) + ^bb2: + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + br ^bb3(%0 : memref<2xf32>) + ^bb3(%1: memref<2xf32>): + "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return + } + } + + ``` + + Output + + ```mlir + #map0 = affine_map<(d0) -> (d0)> + module { + func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + cond_br %arg0, ^bb1, ^bb2 + ^bb1: // pred: ^bb0 + br ^bb3(%arg1 : memref<2xf32>) + ^bb2: // pred: ^bb0 + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%arg3: f32, %arg4: f32): // no predecessors + %2 = exp %arg3 : f32 + linalg.yield %2 : f32 + }: memref<2xf32>, memref<2xf32> + br ^bb3(%0 : memref<2xf32>) + ^bb3(%1: memref<2xf32>): // 2 preds: ^bb1, ^bb2 + linalg.copy(%1, %arg2) : memref<2xf32>, memref<2xf32> + dealloc %0 : memref<2xf32> + return + } + } + ``` + + }]; + let constructor = "mlir::createBufferPlacementPass()"; +} + def Canonicalizer : Pass<"canonicalize"> { let summary = "Canonicalize operations"; let description = [{ diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 17e380d60b059..b43cd6bd7be6c 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -2075,7 +2075,7 @@ Optional FlatAffineConstraints::getConstantBoundOnDimSize( /*num=*/getNumDimIds()); Optional minDiff = None; - unsigned minLbPosition, minUbPosition; + unsigned minLbPosition = 0, minUbPosition = 0; for (auto ubPos : ubIndices) { for (auto lbPos : lbIndices) { // Look for a lower bound and an upper bound that only differ by a diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt index f9c0236e3d4ec..63ddcdaee3d36 100644 --- a/mlir/lib/Analysis/CMakeLists.txt +++ b/mlir/lib/Analysis/CMakeLists.txt @@ -2,28 +2,22 @@ set(LLVM_OPTIONAL_SOURCES AffineAnalysis.cpp AffineStructures.cpp CallGraph.cpp - Dominance.cpp Liveness.cpp LoopAnalysis.cpp NestedMatcher.cpp SliceAnalysis.cpp Utils.cpp - Verifier.cpp ) add_mlir_library(MLIRAnalysis CallGraph.cpp Liveness.cpp SliceAnalysis.cpp - Dominance.cpp - Verifier.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Analysis - ) -target_link_libraries(MLIRAnalysis - PUBLIC + LINK_LIBS PUBLIC MLIRAffineOps MLIRCallInterfaces MLIRControlFlowInterfaces @@ -40,12 +34,11 @@ add_mlir_library(MLIRLoopAnalysis ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Analysis - ) -target_link_libraries(MLIRLoopAnalysis - PUBLIC + LINK_LIBS PUBLIC MLIRAffineOps MLIRCallInterfaces MLIRControlFlowInterfaces MLIRInferTypeOpInterface - MLIRLoopOps) + MLIRLoopOps + ) diff --git a/mlir/lib/Analysis/CallGraph.cpp b/mlir/lib/Analysis/CallGraph.cpp index e31641a87e054..94965c7a623df 100644 --- a/mlir/lib/Analysis/CallGraph.cpp +++ b/mlir/lib/Analysis/CallGraph.cpp @@ -87,9 +87,8 @@ static void computeCallGraph(Operation *op, CallGraph &cg, } for (Region ®ion : op->getRegions()) - for (Block &block : region) - for (Operation &nested : block) - computeCallGraph(&nested, cg, parentNode, resolveCalls); + for (Operation &nested : region.getOps()) + computeCallGraph(&nested, cg, parentNode, resolveCalls); } CallGraph::CallGraph(Operation *op) : externalNode(/*callableRegion=*/nullptr) { @@ -179,7 +178,7 @@ void CallGraph::print(raw_ostream &os) const { auto *parentOp = callableRegion->getParentOp(); os << "'" << callableRegion->getParentOp()->getName() << "' - Region #" << callableRegion->getRegionNumber(); - if (auto attrs = parentOp->getAttrList().getDictionary()) + if (auto attrs = parentOp->getMutableAttrDict().getDictionary()) os << " : " << attrs; }; diff --git a/mlir/lib/Conversion/AVX512ToLLVM/CMakeLists.txt b/mlir/lib/Conversion/AVX512ToLLVM/CMakeLists.txt index 11397b9e105e5..d3257b136cf19 100644 --- a/mlir/lib/Conversion/AVX512ToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/AVX512ToLLVM/CMakeLists.txt @@ -6,15 +6,14 @@ add_mlir_conversion_library(MLIRAVX512ToLLVM DEPENDS MLIRConversionPassIncGen -) -target_link_libraries(MLIRAVX512ToLLVM - PUBLIC + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC MLIRAVX512 MLIRLLVMAVX512 MLIRLLVMIR MLIRStandardToLLVM MLIRTransforms - LLVMCore - LLVMSupport ) diff --git a/mlir/lib/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.cpp b/mlir/lib/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.cpp index 33fd169c6fdac..0753e388f72e9 100644 --- a/mlir/lib/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.cpp +++ b/mlir/lib/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.cpp @@ -17,14 +17,11 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Vector/VectorOps.h" -#include "mlir/EDSC/Intrinsics.h" #include "mlir/IR/Module.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" using namespace mlir; -using namespace mlir::edsc; -using namespace mlir::edsc::intrinsics; using namespace mlir::vector; using namespace mlir::avx512; diff --git a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt index 9324f7b21c807..6b54324aa0d30 100644 --- a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt +++ b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt @@ -6,16 +6,15 @@ add_mlir_conversion_library(MLIRAffineToStandard DEPENDS MLIRConversionPassIncGen -) -target_link_libraries( - MLIRAffineToStandard - PUBLIC + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC MLIRAffineOps MLIRLoopOps MLIRPass MLIRStandardOps MLIRTransforms MLIRIR - LLVMCore - LLVMSupport ) diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h new file mode 100644 index 0000000000000..4a1fe1afffe6b --- /dev/null +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h @@ -0,0 +1,172 @@ +//===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ +#define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ + +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" +#include "mlir/Dialect/GPU/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Builders.h" +#include "llvm/Support/FormatVariadic.h" + +namespace mlir { + +template +struct GPUFuncOpLowering : ConvertToLLVMPattern { + explicit GPUFuncOpLowering(LLVMTypeConverter &typeConverter) + : ConvertToLLVMPattern(gpu::GPUFuncOp::getOperationName(), + typeConverter.getDialect()->getContext(), + typeConverter) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + assert(operands.empty() && "func op is not expected to have operands"); + auto gpuFuncOp = cast(op); + Location loc = gpuFuncOp.getLoc(); + + SmallVector workgroupBuffers; + workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); + for (auto en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { + Value attribution = en.value(); + + auto type = attribution.getType().dyn_cast(); + assert(type && type.hasStaticShape() && "unexpected type in attribution"); + + uint64_t numElements = type.getNumElements(); + + auto elementType = typeConverter.convertType(type.getElementType()) + .template cast(); + auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements); + std::string name = std::string( + llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index())); + auto globalOp = rewriter.create( + gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false, + LLVM::Linkage::Internal, name, /*value=*/Attribute(), + gpu::GPUDialect::getWorkgroupAddressSpace()); + workgroupBuffers.push_back(globalOp); + } + + // Rewrite the original GPU function to an LLVM function. + auto funcType = typeConverter.convertType(gpuFuncOp.getType()) + .template cast() + .getPointerElementTy(); + + // Remap proper input types. + TypeConverter::SignatureConversion signatureConversion( + gpuFuncOp.front().getNumArguments()); + typeConverter.convertFunctionSignature( + gpuFuncOp.getType(), /*isVariadic=*/false, signatureConversion); + + // Create the new function operation. Only copy those attributes that are + // not specific to function modeling. + SmallVector attributes; + for (const auto &attr : gpuFuncOp.getAttrs()) { + if (attr.first == SymbolTable::getSymbolAttrName() || + attr.first == impl::getTypeAttrName() || + attr.first == gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName()) + continue; + attributes.push_back(attr); + } + auto llvmFuncOp = rewriter.create( + gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType, + LLVM::Linkage::External, attributes); + + { + // Insert operations that correspond to converted workgroup and private + // memory attributions to the body of the function. This must operate on + // the original function, before the body region is inlined in the new + // function to maintain the relation between block arguments and the + // parent operation that assigns their semantics. + OpBuilder::InsertionGuard guard(rewriter); + + // Rewrite workgroup memory attributions to addresses of global buffers. + rewriter.setInsertionPointToStart(&gpuFuncOp.front()); + unsigned numProperArguments = gpuFuncOp.getNumArguments(); + auto i32Type = LLVM::LLVMType::getInt32Ty(typeConverter.getDialect()); + + Value zero = nullptr; + if (!workgroupBuffers.empty()) + zero = rewriter.create(loc, i32Type, + rewriter.getI32IntegerAttr(0)); + for (auto en : llvm::enumerate(workgroupBuffers)) { + LLVM::GlobalOp global = en.value(); + Value address = rewriter.create(loc, global); + auto elementType = global.getType().getArrayElementType(); + Value memory = rewriter.create( + loc, elementType.getPointerTo(global.addr_space().getZExtValue()), + address, ArrayRef{zero, zero}); + + // Build a memref descriptor pointing to the buffer to plug with the + // existing memref infrastructure. This may use more registers than + // otherwise necessary given that memref sizes are fixed, but we can try + // and canonicalize that away later. + Value attribution = gpuFuncOp.getWorkgroupAttributions()[en.index()]; + auto type = attribution.getType().cast(); + auto descr = MemRefDescriptor::fromStaticShape( + rewriter, loc, typeConverter, type, memory); + signatureConversion.remapInput(numProperArguments + en.index(), descr); + } + + // Rewrite private memory attributions to alloca'ed buffers. + unsigned numWorkgroupAttributions = + gpuFuncOp.getNumWorkgroupAttributions(); + auto int64Ty = LLVM::LLVMType::getInt64Ty(typeConverter.getDialect()); + for (auto en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) { + Value attribution = en.value(); + auto type = attribution.getType().cast(); + assert(type && type.hasStaticShape() && + "unexpected type in attribution"); + + // Explicitly drop memory space when lowering private memory + // attributions since NVVM models it as `alloca`s in the default + // memory space and does not support `alloca`s with addrspace(5). + auto ptrType = typeConverter.convertType(type.getElementType()) + .template cast() + .getPointerTo(AllocaAddrSpace); + Value numElements = rewriter.create( + gpuFuncOp.getLoc(), int64Ty, + rewriter.getI64IntegerAttr(type.getNumElements())); + Value allocated = rewriter.create( + gpuFuncOp.getLoc(), ptrType, numElements, /*alignment=*/0); + auto descr = MemRefDescriptor::fromStaticShape( + rewriter, loc, typeConverter, type, allocated); + signatureConversion.remapInput( + numProperArguments + numWorkgroupAttributions + en.index(), descr); + } + } + + // Move the region to the new function, update the entry block signature. + rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(), + llvmFuncOp.end()); + rewriter.applySignatureConversion(&llvmFuncOp.getBody(), + signatureConversion); + + rewriter.eraseOp(gpuFuncOp); + return success(); + } +}; + +struct GPUReturnOpLowering : public ConvertToLLVMPattern { + GPUReturnOpLowering(LLVMTypeConverter &typeConverter) + : ConvertToLLVMPattern(gpu::ReturnOp::getOperationName(), + typeConverter.getDialect()->getContext(), + typeConverter) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp(op, operands); + return success(); + } +}; + +} // namespace mlir + +#endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ diff --git a/mlir/lib/Conversion/GPUToCUDA/CMakeLists.txt b/mlir/lib/Conversion/GPUToCUDA/CMakeLists.txt index a4c98e555322b..4696dd65fa62a 100644 --- a/mlir/lib/Conversion/GPUToCUDA/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToCUDA/CMakeLists.txt @@ -9,10 +9,12 @@ set(SOURCES if (MLIR_CUDA_CONVERSIONS_ENABLED) list(APPEND SOURCES "ConvertKernelFuncToCubin.cpp") set(NVPTX_LIBS - LLVMNVPTXCodeGen - LLVMNVPTXDesc - LLVMNVPTXInfo + MC + NVPTXCodeGen + NVPTXDesc + NVPTXInfo ) + endif() add_mlir_conversion_library(MLIRGPUtoCUDATransforms @@ -20,13 +22,13 @@ add_mlir_conversion_library(MLIRGPUtoCUDATransforms DEPENDS MLIRConversionPassIncGen -) -target_link_libraries(MLIRGPUtoCUDATransforms - PUBLIC + intrinsics_gen + + LINK_COMPONENTS + Core ${NVPTX_LIBS} - LLVMCore - LLVMMC - LLVMSupport + + LINK_LIBS PUBLIC MLIRGPU MLIRIR MLIRLLVMIR diff --git a/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt b/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt index b7c583d571699..fe8502f0061c3 100644 --- a/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt @@ -8,11 +8,8 @@ add_mlir_conversion_library(MLIRGPUtoNVVMTransforms DEPENDS MLIRConversionPassIncGen MLIRGPUToNVVMIncGen - ) -target_link_libraries(MLIRGPUtoNVVMTransforms - PUBLIC - LLVMSupport + LINK_LIBS PUBLIC MLIRGPU MLIRLLVMIR MLIRNVVMIR diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 66a0a94a499ee..afc06c5727a2d 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -21,6 +21,7 @@ #include "mlir/Transforms/DialectConversion.h" #include "llvm/Support/FormatVariadic.h" +#include "../GPUCommon/GPUOpsLowering.h" #include "../GPUCommon/IndexIntrinsicsOpLowering.h" #include "../GPUCommon/OpToFuncCallLowering.h" #include "../PassDetail.h" @@ -88,155 +89,6 @@ struct GPUShuffleOpLowering : public ConvertToLLVMPattern { } }; -struct GPUFuncOpLowering : ConvertToLLVMPattern { - explicit GPUFuncOpLowering(LLVMTypeConverter &typeConverter) - : ConvertToLLVMPattern(gpu::GPUFuncOp::getOperationName(), - typeConverter.getDialect()->getContext(), - typeConverter) {} - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - assert(operands.empty() && "func op is not expected to have operands"); - auto gpuFuncOp = cast(op); - Location loc = gpuFuncOp.getLoc(); - - SmallVector workgroupBuffers; - workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); - for (auto en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { - Value attribution = en.value(); - - auto type = attribution.getType().dyn_cast(); - assert(type && type.hasStaticShape() && "unexpected type in attribution"); - - uint64_t numElements = type.getNumElements(); - - auto elementType = typeConverter.convertType(type.getElementType()) - .cast(); - auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements); - std::string name = std::string( - llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index())); - auto globalOp = rewriter.create( - gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false, - LLVM::Linkage::Internal, name, /*value=*/Attribute(), - gpu::GPUDialect::getWorkgroupAddressSpace()); - workgroupBuffers.push_back(globalOp); - } - - // Rewrite the original GPU function to an LLVM function. - auto funcType = typeConverter.convertType(gpuFuncOp.getType()) - .cast() - .getPointerElementTy(); - - // Remap proper input types. - TypeConverter::SignatureConversion signatureConversion( - gpuFuncOp.front().getNumArguments()); - typeConverter.convertFunctionSignature( - gpuFuncOp.getType(), /*isVariadic=*/false, signatureConversion); - - // Create the new function operation. Only copy those attributes that are - // not specific to function modeling. - SmallVector attributes; - for (const auto &attr : gpuFuncOp.getAttrs()) { - if (attr.first == SymbolTable::getSymbolAttrName() || - attr.first == impl::getTypeAttrName() || - attr.first == gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName()) - continue; - attributes.push_back(attr); - } - auto llvmFuncOp = rewriter.create( - gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType, - LLVM::Linkage::External, attributes); - - { - // Insert operations that correspond to converted workgroup and private - // memory attributions to the body of the function. This must operate on - // the original function, before the body region is inlined in the new - // function to maintain the relation between block arguments and the - // parent operation that assigns their semantics. - OpBuilder::InsertionGuard guard(rewriter); - - // Rewrite workgroup memory attributions to addresses of global buffers. - rewriter.setInsertionPointToStart(&gpuFuncOp.front()); - unsigned numProperArguments = gpuFuncOp.getNumArguments(); - auto i32Type = LLVM::LLVMType::getInt32Ty(typeConverter.getDialect()); - - Value zero = nullptr; - if (!workgroupBuffers.empty()) - zero = rewriter.create(loc, i32Type, - rewriter.getI32IntegerAttr(0)); - for (auto en : llvm::enumerate(workgroupBuffers)) { - LLVM::GlobalOp global = en.value(); - Value address = rewriter.create(loc, global); - auto elementType = global.getType().getArrayElementType(); - Value memory = rewriter.create( - loc, elementType.getPointerTo(global.addr_space().getZExtValue()), - address, ArrayRef{zero, zero}); - - // Build a memref descriptor pointing to the buffer to plug with the - // existing memref infrastructure. This may use more registers than - // otherwise necessary given that memref sizes are fixed, but we can try - // and canonicalize that away later. - Value attribution = gpuFuncOp.getWorkgroupAttributions()[en.index()]; - auto type = attribution.getType().cast(); - auto descr = MemRefDescriptor::fromStaticShape( - rewriter, loc, typeConverter, type, memory); - signatureConversion.remapInput(numProperArguments + en.index(), descr); - } - - // Rewrite private memory attributions to alloca'ed buffers. - unsigned numWorkgroupAttributions = - gpuFuncOp.getNumWorkgroupAttributions(); - auto int64Ty = LLVM::LLVMType::getInt64Ty(typeConverter.getDialect()); - for (auto en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) { - Value attribution = en.value(); - auto type = attribution.getType().cast(); - assert(type && type.hasStaticShape() && - "unexpected type in attribution"); - - // Explicitly drop memory space when lowering private memory - // attributions since NVVM models it as `alloca`s in the default - // memory space and does not support `alloca`s with addrspace(5). - auto ptrType = typeConverter.convertType(type.getElementType()) - .cast() - .getPointerTo(); - Value numElements = rewriter.create( - gpuFuncOp.getLoc(), int64Ty, - rewriter.getI64IntegerAttr(type.getNumElements())); - Value allocated = rewriter.create( - gpuFuncOp.getLoc(), ptrType, numElements, /*alignment=*/0); - auto descr = MemRefDescriptor::fromStaticShape( - rewriter, loc, typeConverter, type, allocated); - signatureConversion.remapInput( - numProperArguments + numWorkgroupAttributions + en.index(), descr); - } - } - - // Move the region to the new function, update the entry block signature. - rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(), - llvmFuncOp.end()); - rewriter.applySignatureConversion(&llvmFuncOp.getBody(), - signatureConversion); - - rewriter.eraseOp(gpuFuncOp); - return success(); - } -}; - -struct GPUReturnOpLowering : public ConvertToLLVMPattern { - GPUReturnOpLowering(LLVMTypeConverter &typeConverter) - : ConvertToLLVMPattern(gpu::ReturnOp::getOperationName(), - typeConverter.getDialect()->getContext(), - typeConverter) {} - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp(op, operands); - return success(); - } -}; - /// Import the GPU Ops to NVVM Patterns. #include "GPUToNVVM.cpp.inc" @@ -300,8 +152,11 @@ void mlir::populateGpuToNVVMConversionPatterns( NVVM::BlockIdYOp, NVVM::BlockIdZOp>, GPUIndexIntrinsicOpLowering, - GPUShuffleOpLowering, GPUFuncOpLowering, GPUReturnOpLowering>( - converter); + GPUShuffleOpLowering, GPUReturnOpLowering, + // Explicitly drop memory space when lowering private memory + // attributions since NVVM models it as `alloca`s in the default + // memory space and does not support `alloca`s with addrspace(5). + GPUFuncOpLowering<0>>(converter); patterns.insert>(converter, "__nv_fabsf", "__nv_fabs"); patterns.insert>(converter, "__nv_ceilf", diff --git a/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt b/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt index 142734c3ef89b..38b6e1a5ea4fe 100644 --- a/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt @@ -1,12 +1,15 @@ +set(LLVM_TARGET_DEFINITIONS GPUToROCDL.td) +mlir_tablegen(GPUToROCDL.cpp.inc -gen-rewriters) +add_public_tablegen_target(MLIRGPUToROCDLIncGen) + add_mlir_conversion_library(MLIRGPUtoROCDLTransforms LowerGpuOpsToROCDLOps.cpp DEPENDS MLIRConversionPassIncGen - ) -target_link_libraries(MLIRGPUtoROCDLTransforms - PUBLIC - LLVMSupport + MLIRGPUToROCDLIncGen + + LINK_LIBS PUBLIC MLIRGPU MLIRLLVMIR MLIRROCDLIR diff --git a/mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td b/mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td new file mode 100644 index 0000000000000..3e81093bf46e1 --- /dev/null +++ b/mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td @@ -0,0 +1,21 @@ +//==-- GPUToROCDL.td - GPU Ops to ROCDL Patterns -------------*- tablegen -*==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines Patterns to lower GPU ops to ROCDL. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_GPUTOROCDL_TD +#define MLIR_CONVERSION_GPUTOROCDL_TD + +include "mlir/Dialect/GPU/GPUOps.td" +include "mlir/Dialect/LLVMIR/ROCDLOps.td" + +def : Pat<(GPU_BarrierOp), (ROCDL_BarrierOp)>; + +#endif // MLIR_CONVERSION_GPUTOROCDL_TD diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index ed78bcfb1e76d..f81c382152d7c 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -14,11 +14,16 @@ #include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" +#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" #include "mlir/Dialect/GPU/GPUDialect.h" +#include "mlir/Dialect/GPU/Passes.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" +#include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/Support/FormatVariadic.h" +#include "../GPUCommon/GPUOpsLowering.h" #include "../GPUCommon/IndexIntrinsicsOpLowering.h" #include "../GPUCommon/OpToFuncCallLowering.h" #include "../PassDetail.h" @@ -27,6 +32,9 @@ using namespace mlir; namespace { +/// Import the GPU Ops to ROCDL Patterns. +#include "GPUToROCDL.cpp.inc" + // A pass that replaces all occurrences of GPU device operations with their // corresponding ROCDL equivalent. // @@ -38,41 +46,25 @@ class LowerGpuOpsToROCDLOpsPass void runOnOperation() override { gpu::GPUModuleOp m = getOperation(); - OwningRewritePatternList patterns; LLVMTypeConverter converter(m.getContext()); - populateStdToLLVMConversionPatterns(converter, patterns); - patterns.insert< - GPUIndexIntrinsicOpLowering, - GPUIndexIntrinsicOpLowering, - GPUIndexIntrinsicOpLowering, - GPUIndexIntrinsicOpLowering>( - converter); - patterns.insert>(converter, "__ocml_fabs_f32", - "__ocml_fabs_f64"); - patterns.insert>(converter, "__ocml_ceil_f32", - "__ocml_ceil_f64"); - patterns.insert>(converter, "__ocml_cos_f32", - "__ocml_cos_f64"); - patterns.insert>(converter, "__ocml_exp_f32", - "__ocml_exp_f64"); - patterns.insert>(converter, "__ocml_log_f32", - "__ocml_log_f64"); - patterns.insert>( - converter, "__ocml_log10_f32", "__ocml_log10_f64"); - patterns.insert>(converter, "__ocml_log2_f32", - "__ocml_log2_f64"); - patterns.insert>(converter, "__ocml_tanh_f32", - "__ocml_tanh_f64"); - ConversionTarget target(getContext()); - target.addLegalDialect(); + OwningRewritePatternList patterns; + + populateGpuRewritePatterns(m.getContext(), patterns); + applyPatternsAndFoldGreedily(m, patterns); + patterns.clear(); + + populateVectorToLLVMConversionPatterns(converter, patterns); + populateStdToLLVMConversionPatterns(converter, patterns); + populateGpuToROCDLConversionPatterns(converter, patterns); + LLVMConversionTarget target(getContext()); + target.addIllegalDialect(); target.addIllegalOp(); target.addIllegalOp(); + target.addLegalDialect(); + // TODO(whchung): Remove once we support replacing non-root ops. + target.addLegalOp(); if (failed(applyPartialConversion(m, target, patterns, &converter))) signalPassFailure(); } @@ -80,6 +72,37 @@ class LowerGpuOpsToROCDLOpsPass } // anonymous namespace +void mlir::populateGpuToROCDLConversionPatterns( + LLVMTypeConverter &converter, OwningRewritePatternList &patterns) { + populateWithGenerated(converter.getDialect()->getContext(), &patterns); + patterns.insert< + GPUIndexIntrinsicOpLowering, + GPUIndexIntrinsicOpLowering, + GPUIndexIntrinsicOpLowering, + GPUIndexIntrinsicOpLowering, + GPUFuncOpLowering<5>, GPUReturnOpLowering>(converter); + patterns.insert>(converter, "__ocml_fabs_f32", + "__ocml_fabs_f64"); + patterns.insert>(converter, "__ocml_ceil_f32", + "__ocml_ceil_f64"); + patterns.insert>(converter, "__ocml_cos_f32", + "__ocml_cos_f64"); + patterns.insert>(converter, "__ocml_exp_f32", + "__ocml_exp_f64"); + patterns.insert>(converter, "__ocml_log_f32", + "__ocml_log_f64"); + patterns.insert>(converter, "__ocml_log10_f32", + "__ocml_log10_f64"); + patterns.insert>(converter, "__ocml_log2_f32", + "__ocml_log2_f64"); + patterns.insert>(converter, "__ocml_tanh_f32", + "__ocml_tanh_f64"); +} + std::unique_ptr> mlir::createLowerGpuOpsToROCDLOpsPass() { return std::make_unique(); diff --git a/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt index f473a3e04e518..aa202a34c556c 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt @@ -9,10 +9,8 @@ add_mlir_conversion_library(MLIRGPUtoSPIRVTransforms DEPENDS MLIRConversionPassIncGen MLIRGPUToSPIRVIncGen - ) -target_link_libraries(MLIRGPUtoSPIRVTransforms - PUBLIC + LINK_LIBS PUBLIC MLIRGPU MLIRIR MLIRPass diff --git a/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt b/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt index ecfc2d75d5f36..c0265096d73bb 100644 --- a/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt @@ -4,10 +4,8 @@ add_mlir_conversion_library(MLIRGPUtoVulkanTransforms DEPENDS MLIRConversionPassIncGen - ) -target_link_libraries(MLIRGPUtoVulkanTransforms - PUBLIC + LINK_LIBS PUBLIC MLIRGPU MLIRIR MLIRLLVMIR @@ -18,5 +16,4 @@ target_link_libraries(MLIRGPUtoVulkanTransforms MLIRSupport MLIRTransforms MLIRTranslation - LLVMSupport ) diff --git a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt index 8519566e4fb28..45b60c7ecd864 100644 --- a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt @@ -6,10 +6,12 @@ add_mlir_conversion_library(MLIRLinalgToLLVM DEPENDS MLIRConversionPassIncGen -) + intrinsics_gen -target_link_libraries(MLIRLinalgToLLVM - PUBLIC + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC MLIRAffineToStandard MLIREDSC MLIRIR @@ -18,7 +20,6 @@ target_link_libraries(MLIRLinalgToLLVM MLIRLoopToStandard MLIRStandardToLLVM MLIRVectorToLLVM + MLIRVectorToLoops MLIRTransforms - LLVMCore - LLVMSupport ) diff --git a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp index 1ffe548ae9191..3b3bf0f083706 100644 --- a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp +++ b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp @@ -14,6 +14,7 @@ #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" +#include "mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" @@ -117,7 +118,7 @@ class BaseViewConversionHelper { operator Value() { return d; } private: - OpBuilder &rewriter() { return ScopedContext::getBuilder(); } + OpBuilder &rewriter() { return ScopedContext::getBuilderRef(); } Location loc() { return ScopedContext::getLocation(); } MemRefDescriptor d; @@ -575,6 +576,7 @@ void ConvertLinalgToLLVMPass::runOnOperation() { populateAffineToStdConversionPatterns(patterns, &getContext()); populateLoopToStdConversionPatterns(patterns, &getContext()); populateStdToLLVMConversionPatterns(converter, patterns); + populateVectorToLoopsConversionPatterns(patterns, &getContext()); populateVectorToLLVMMatrixConversionPatterns(converter, patterns); populateVectorToLLVMConversionPatterns(converter, patterns); populateLinalgToStandardConversionPatterns(patterns, &getContext()); diff --git a/mlir/lib/Conversion/LinalgToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/LinalgToSPIRV/CMakeLists.txt index f0cc532233665..98553ad967483 100644 --- a/mlir/lib/Conversion/LinalgToSPIRV/CMakeLists.txt +++ b/mlir/lib/Conversion/LinalgToSPIRV/CMakeLists.txt @@ -8,10 +8,8 @@ add_mlir_conversion_library(MLIRLinalgToSPIRVTransforms DEPENDS MLIRConversionPassIncGen - ) -target_link_libraries(MLIRLinalgToSPIRVTransforms - PUBLIC + LINK_LIBS PUBLIC MLIRIR MLIRLinalgOps MLIRLinalgUtils diff --git a/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRV.cpp b/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRV.cpp index 14fdc9b207fbf..cf67b96fce1ee 100644 --- a/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRV.cpp +++ b/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRV.cpp @@ -164,7 +164,7 @@ LogicalResult SingleWorkgroupReduction::matchAndRewrite( // Get the output element accessed by this reduction. Value zero = spirv::ConstantOp::getZero( - typeConverter.getIndexType(rewriter.getContext()), loc, &rewriter); + typeConverter.getIndexType(rewriter.getContext()), loc, rewriter); SmallVector zeroIndices(originalOutputType.getRank(), zero); Value outputElementPtr = spirv::getElementPtr(typeConverter, originalOutputType, convertedOutput, @@ -181,18 +181,18 @@ LogicalResult SingleWorkgroupReduction::matchAndRewrite( Value condition = rewriter.create( loc, spirv::Scope::Subgroup); - auto createAtomicOp = [&](OpBuilder *builder) { + auto createAtomicOp = [&](OpBuilder &builder) { #define CREATE_ATOMIC_BIN_OP(opKind, spvOp) \ case linalg::RegionMatcher::BinaryOpKind::opKind: { \ - builder->create(loc, outputElementPtr, spirv::Scope::Device, \ - spirv::MemorySemantics::AcquireRelease, \ - groupOperation); \ + builder.create(loc, outputElementPtr, spirv::Scope::Device, \ + spirv::MemorySemantics::AcquireRelease, \ + groupOperation); \ } break switch (*binaryOpKind) { CREATE_ATOMIC_BIN_OP(IAdd, AtomicIAddOp); } #undef CREATE_ATOMIC_BIN_OP }; - spirv::SelectionOp::createIfThen(loc, condition, createAtomicOp, &rewriter); + spirv::SelectionOp::createIfThen(loc, condition, createAtomicOp, rewriter); rewriter.eraseOp(genericOp); return success(); diff --git a/mlir/lib/Conversion/LoopToStandard/CMakeLists.txt b/mlir/lib/Conversion/LoopToStandard/CMakeLists.txt index d4c7528475835..cf749b81b0fb1 100644 --- a/mlir/lib/Conversion/LoopToStandard/CMakeLists.txt +++ b/mlir/lib/Conversion/LoopToStandard/CMakeLists.txt @@ -6,12 +6,11 @@ add_mlir_conversion_library(MLIRLoopToStandard DEPENDS MLIRConversionPassIncGen -) -target_link_libraries( - MLIRLoopToStandard - PUBLIC + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC MLIRLoopOps MLIRTransforms - LLVMCore - LLVMSupport ) diff --git a/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt b/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt index 5b31429a0d0c5..9b4e184d84c35 100644 --- a/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt +++ b/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt @@ -7,9 +7,8 @@ add_mlir_conversion_library(MLIRLoopsToGPU DEPENDS MLIRConversionPassIncGen -) -target_link_libraries(MLIRLoopsToGPU - PUBLIC + + LINK_LIBS PUBLIC MLIRAffineOps MLIRAffineToStandard MLIRGPU @@ -19,5 +18,4 @@ target_link_libraries(MLIRLoopsToGPU MLIRStandardOps MLIRSupport MLIRTransforms - LLVMSupport ) diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp index 04d5381009ccd..c789c90e4b9f7 100644 --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp @@ -136,7 +136,6 @@ static LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims, return success(); } - OpBuilder builder(forOp.getOperation()); if (numBlockDims > 3) { return forOp.emitError("cannot map to more than 3 block dimensions"); } diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp index a597dd7bf078c..94c6a3b20f2ba 100644 --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp @@ -36,18 +36,17 @@ struct ForLoopMapper : public ConvertSimpleLoopsToGPUBase { } void runOnFunction() override { - for (Block &block : getFunction()) - for (Operation &op : llvm::make_early_inc_range(block)) { - if (auto forOp = dyn_cast(&op)) { - if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims, - numThreadDims))) - signalPassFailure(); - } else if (auto forOp = dyn_cast(&op)) { - if (failed(convertLoopNestToGPULaunch(forOp, numBlockDims, - numThreadDims))) - signalPassFailure(); - } + for (Operation &op : llvm::make_early_inc_range(getFunction().getOps())) { + if (auto forOp = dyn_cast(&op)) { + if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims, + numThreadDims))) + signalPassFailure(); + } else if (auto forOp = dyn_cast(&op)) { + if (failed( + convertLoopNestToGPULaunch(forOp, numBlockDims, numThreadDims))) + signalPassFailure(); } + } } }; @@ -81,14 +80,10 @@ struct ImperfectlyNestedForLoopMapper funcOp.getLoc(), builder.getIntegerAttr(builder.getIndexType(), val)); workGroupSizeVal.push_back(constOp); } - for (Block &block : getFunction()) { - for (Operation &op : llvm::make_early_inc_range(block)) { - if (auto forOp = dyn_cast(&op)) { - if (failed(convertLoopToGPULaunch(forOp, numWorkGroupsVal, - workGroupSizeVal))) { - return signalPassFailure(); - } - } + for (ForOp forOp : llvm::make_early_inc_range(funcOp.getOps())) { + if (failed(convertLoopToGPULaunch(forOp, numWorkGroupsVal, + workGroupSizeVal))) { + return signalPassFailure(); } } } diff --git a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt index ef7ad11d93ef4..83bda1048739b 100644 --- a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt @@ -6,12 +6,12 @@ add_mlir_conversion_library(MLIRStandardToLLVM DEPENDS MLIRConversionPassIncGen -) -target_link_libraries( - MLIRStandardToLLVM - PUBLIC + intrinsics_gen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC MLIRLLVMIR MLIRTransforms - LLVMCore - LLVMSupport ) diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index b60b2fafe9c9d..1a01daa1188ed 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -138,6 +138,7 @@ LLVMTypeConverter::LLVMTypeConverter( module->getDataLayout().getPointerSizeInBits(); // Register conversions for the standard types. + addConversion([&](ComplexType type) { return convertComplexType(type); }); addConversion([&](FloatType type) { return convertFloatType(type); }); addConversion([&](FunctionType type) { return convertFunctionType(type); }); addConversion([&](IndexType type) { return convertIndexType(type); }); @@ -191,6 +192,17 @@ Type LLVMTypeConverter::convertFloatType(FloatType type) { } } +// Convert a `ComplexType` to an LLVM type. The result is a complex number +// struct with entries for the +// 1. real part and for the +// 2. imaginary part. +static constexpr unsigned kRealPosInComplexNumberStruct = 0; +static constexpr unsigned kImaginaryPosInComplexNumberStruct = 1; +Type LLVMTypeConverter::convertComplexType(ComplexType type) { + auto elementType = convertType(type.getElementType()).cast(); + return LLVM::LLVMType::getStructTy(llvmDialect, {elementType, elementType}); +} + // Except for signatures, MLIR function types are converted into LLVM // pointer-to-function types. Type LLVMTypeConverter::convertFunctionType(FunctionType type) { @@ -392,6 +404,7 @@ ConvertToLLVMPattern::ConvertToLLVMPattern(StringRef rootOpName, /*============================================================================*/ /* StructBuilder implementation */ /*============================================================================*/ + StructBuilder::StructBuilder(Value v) : value(v) { assert(value != nullptr && "value cannot be null"); structType = value.getType().dyn_cast(); @@ -410,6 +423,35 @@ void StructBuilder::setPtr(OpBuilder &builder, Location loc, unsigned pos, value = builder.create(loc, structType, value, ptr, builder.getI64ArrayAttr(pos)); } + +/*============================================================================*/ +/* ComplexStructBuilder implementation */ +/*============================================================================*/ + +ComplexStructBuilder ComplexStructBuilder::undef(OpBuilder &builder, + Location loc, Type type) { + Value val = builder.create(loc, type.cast()); + return ComplexStructBuilder(val); +} + +void ComplexStructBuilder::setReal(OpBuilder &builder, Location loc, + Value real) { + setPtr(builder, loc, kRealPosInComplexNumberStruct, real); +} + +Value ComplexStructBuilder::real(OpBuilder &builder, Location loc) { + return extractPtr(builder, loc, kRealPosInComplexNumberStruct); +} + +void ComplexStructBuilder ::setImaginary(OpBuilder &builder, Location loc, + Value imaginary) { + setPtr(builder, loc, kImaginaryPosInComplexNumberStruct, imaginary); +} + +Value ComplexStructBuilder ::imaginary(OpBuilder &builder, Location loc) { + return extractPtr(builder, loc, kImaginaryPosInComplexNumberStruct); +} + /*============================================================================*/ /* MemRefDescriptor implementation */ /*============================================================================*/ @@ -1284,6 +1326,65 @@ using UnsignedShiftRightOpLowering = OneToOneConvertToLLVMPattern; using XOrOpLowering = VectorConvertToLLVMPattern; +// Lowerings for operations on complex numbers, `CreateComplexOp`, `ReOp`, and +// `ImOp`. + +struct CreateComplexOpLowering + : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto complexOp = cast(op); + OperandAdaptor transformed(operands); + + // Pack real and imaginary part in a complex number struct. + auto loc = op->getLoc(); + auto structType = typeConverter.convertType(complexOp.getType()); + auto complexStruct = ComplexStructBuilder::undef(rewriter, loc, structType); + complexStruct.setReal(rewriter, loc, transformed.real()); + complexStruct.setImaginary(rewriter, loc, transformed.imaginary()); + + rewriter.replaceOp(op, {complexStruct}); + return success(); + } +}; + +struct ReOpLowering : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + OperandAdaptor transformed(operands); + + // Extract real part from the complex number struct. + ComplexStructBuilder complexStruct(transformed.complex()); + Value real = complexStruct.real(rewriter, op->getLoc()); + rewriter.replaceOp(op, real); + + return success(); + } +}; + +struct ImOpLowering : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + OperandAdaptor transformed(operands); + + // Extract imaginary part from the complex number struct. + ComplexStructBuilder complexStruct(transformed.complex()); + Value imaginary = complexStruct.imaginary(rewriter, op->getLoc()); + rewriter.replaceOp(op, imaginary); + + return success(); + } +}; + // Check if the MemRefType `type` is supported by the lowering. We currently // only support memrefs with identity maps. static bool isSupportedMemRefType(MemRefType type) { @@ -2373,12 +2474,14 @@ struct SubViewOpLowering : public ConvertOpToLLVMPattern { // Copy the buffer pointer from the old descriptor to the new one. Value extracted = sourceMemRef.allocatedPtr(rewriter, loc); Value bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(), extracted); + loc, targetElementTy.getPointerTo(viewMemRefType.getMemorySpace()), + extracted); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); extracted = sourceMemRef.alignedPtr(rewriter, loc); bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(), extracted); + loc, targetElementTy.getPointerTo(viewMemRefType.getMemorySpace()), + extracted); targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); // Extract strides needed to compute offset. @@ -2640,113 +2743,6 @@ struct AtomicRMWOpLowering : public LoadStoreOpLowering { } }; -/// Wrap a llvm.cmpxchg operation in a while loop so that the operation can be -/// retried until it succeeds in atomically storing a new value into memory. -/// -/// +---------------------------------+ -/// | | -/// | | -/// | br loop(%loaded) | -/// +---------------------------------+ -/// | -/// -------| | -/// | v v -/// | +--------------------------------+ -/// | | loop(%loaded): | -/// | | | -/// | | %pair = cmpxchg | -/// | | %ok = %pair[0] | -/// | | %new = %pair[1] | -/// | | cond_br %ok, end, loop(%new) | -/// | +--------------------------------+ -/// | | | -/// |----------- | -/// v -/// +--------------------------------+ -/// | end: | -/// | | -/// +--------------------------------+ -/// -struct AtomicCmpXchgOpLowering : public LoadStoreOpLowering { - using Base::Base; - - LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - auto atomicOp = cast(op); - auto maybeKind = matchSimpleAtomicOp(atomicOp); - if (maybeKind) - return failure(); - - LLVM::FCmpPredicate predicate; - switch (atomicOp.kind()) { - case AtomicRMWKind::maxf: - predicate = LLVM::FCmpPredicate::ogt; - break; - case AtomicRMWKind::minf: - predicate = LLVM::FCmpPredicate::olt; - break; - default: - return failure(); - } - - OperandAdaptor adaptor(operands); - auto loc = op->getLoc(); - auto valueType = adaptor.value().getType().cast(); - - // Split the block into initial, loop, and ending parts. - auto *initBlock = rewriter.getInsertionBlock(); - auto initPosition = rewriter.getInsertionPoint(); - auto *loopBlock = rewriter.splitBlock(initBlock, initPosition); - auto loopArgument = loopBlock->addArgument(valueType); - auto loopPosition = rewriter.getInsertionPoint(); - auto *endBlock = rewriter.splitBlock(loopBlock, loopPosition); - - // Compute the loaded value and branch to the loop block. - rewriter.setInsertionPointToEnd(initBlock); - auto memRefType = atomicOp.getMemRefType(); - auto dataPtr = getDataPtr(loc, memRefType, adaptor.memref(), - adaptor.indices(), rewriter, getModule()); - Value init = rewriter.create(loc, dataPtr); - rewriter.create(loc, init, loopBlock); - - // Prepare the body of the loop block. - rewriter.setInsertionPointToStart(loopBlock); - auto predicateI64 = - rewriter.getI64IntegerAttr(static_cast(predicate)); - auto boolType = LLVM::LLVMType::getInt1Ty(&getDialect()); - auto lhs = loopArgument; - auto rhs = adaptor.value(); - auto cmp = - rewriter.create(loc, boolType, predicateI64, lhs, rhs); - auto select = rewriter.create(loc, cmp, lhs, rhs); - - // Prepare the epilog of the loop block. - rewriter.setInsertionPointToEnd(loopBlock); - // Append the cmpxchg op to the end of the loop block. - auto successOrdering = LLVM::AtomicOrdering::acq_rel; - auto failureOrdering = LLVM::AtomicOrdering::monotonic; - auto pairType = LLVM::LLVMType::getStructTy(valueType, boolType); - auto cmpxchg = rewriter.create( - loc, pairType, dataPtr, loopArgument, select, successOrdering, - failureOrdering); - // Extract the %new_loaded and %ok values from the pair. - Value newLoaded = rewriter.create( - loc, valueType, cmpxchg, rewriter.getI64ArrayAttr({0})); - Value ok = rewriter.create( - loc, boolType, cmpxchg, rewriter.getI64ArrayAttr({1})); - - // Conditionally branch to the end or back to the loop depending on %ok. - rewriter.create(loc, ok, endBlock, ArrayRef(), - loopBlock, newLoaded); - - // The 'result' of the atomic_rmw op is the newly loaded value. - rewriter.replaceOp(op, {newLoaded}); - - return success(); - } -}; - /// Wrap a llvm.cmpxchg operation in a while loop so that the operation can be /// retried until it succeeds in atomically storing a new value into memory. /// @@ -2882,7 +2878,6 @@ void mlir::populateStdToLLVMNonMemoryConversionPatterns( AddIOpLowering, AllocaOpLowering, AndOpLowering, - AtomicCmpXchgOpLowering, AtomicRMWOpLowering, BranchOpLowering, CallIndirectOpLowering, @@ -2894,6 +2889,7 @@ void mlir::populateStdToLLVMNonMemoryConversionPatterns( CopySignOpLowering, CosOpLowering, ConstLLVMOpLowering, + CreateComplexOpLowering, DialectCastOpLowering, DivFOpLowering, ExpOpLowering, @@ -2904,12 +2900,14 @@ void mlir::populateStdToLLVMNonMemoryConversionPatterns( Log2OpLowering, FPExtLowering, FPTruncLowering, + ImOpLowering, IndexCastOpLowering, MulFOpLowering, MulIOpLowering, NegFOpLowering, OrOpLowering, PrefetchOpLowering, + ReOpLowering, RemFOpLowering, ReturnOpLowering, RsqrtOpLowering, diff --git a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt index bb249078d62c0..e60985984da32 100644 --- a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt +++ b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt @@ -9,10 +9,8 @@ add_mlir_conversion_library(MLIRStandardToSPIRVTransforms DEPENDS MLIRConversionPassIncGen - ) -target_link_libraries(MLIRStandardToSPIRVTransforms - PUBLIC + LINK_LIBS PUBLIC MLIRIR MLIRPass MLIRSPIRV diff --git a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp index b53128a33018d..2f7868e89336d 100644 --- a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp @@ -97,6 +97,64 @@ static FloatAttr convertFloatAttr(FloatAttr srcAttr, FloatType dstType, return builder.getF32FloatAttr(dstVal.convertToFloat()); } +/// Returns the offset of the value in `targetBits` representation. `srcIdx` is +/// an index into a 1-D array with each element having `sourceBits`. When +/// accessing an element in the array treating as having elements of +/// `targetBits`, multiple values are loaded in the same time. The method +/// returns the offset where the `srcIdx` locates in the value. For example, if +/// `sourceBits` equals to 8 and `targetBits` equals to 32, the x-th element is +/// located at (x % 4) * 8. Because there are four elements in one i32, and one +/// element has 8 bits. +static Value getOffsetForBitwidth(Location loc, Value srcIdx, int sourceBits, + int targetBits, OpBuilder &builder) { + assert(targetBits % sourceBits == 0); + IntegerType targetType = builder.getIntegerType(targetBits); + IntegerAttr idxAttr = + builder.getIntegerAttr(targetType, targetBits / sourceBits); + auto idx = builder.create(loc, targetType, idxAttr); + IntegerAttr srcBitsAttr = builder.getIntegerAttr(targetType, sourceBits); + auto srcBitsValue = + builder.create(loc, targetType, srcBitsAttr); + auto m = builder.create(loc, srcIdx, idx); + return builder.create(loc, targetType, m, srcBitsValue); +} + +/// Returns an adjusted spirv::AccessChainOp. Based on the +/// extension/capabilities, certain integer bitwidths `sourceBits` might not be +/// supported. During conversion if a memref of an unsupported type is used, +/// load/stores to this memref need to be modified to use a supported higher +/// bitwidth `targetBits` and extracting the required bits. For an accessing a +/// 1D array (spv.array or spv.rt_array), the last index is modified to load the +/// bits needed. The extraction of the actual bits needed are handled +/// separately. Note that this only works for a 1-D tensor. +static Value adjustAccessChainForBitwidth(SPIRVTypeConverter &typeConverter, + spirv::AccessChainOp op, + int sourceBits, int targetBits, + OpBuilder &builder) { + assert(targetBits % sourceBits == 0); + const auto loc = op.getLoc(); + IntegerType targetType = builder.getIntegerType(targetBits); + IntegerAttr attr = + builder.getIntegerAttr(targetType, targetBits / sourceBits); + auto idx = builder.create(loc, targetType, attr); + auto lastDim = op.getOperation()->getOperand(op.getNumOperands() - 1); + auto indices = llvm::to_vector<4>(op.indices()); + // There are two elements if this is a 1-D tensor. + assert(indices.size() == 2); + indices.back() = builder.create(loc, lastDim, idx); + Type t = typeConverter.convertType(op.component_ptr().getType()); + return builder.create(loc, t, op.base_ptr(), indices); +} + +/// Returns the shifted `targetBits`-bit value with the given offset. +Value shiftValue(Location loc, Value value, Value offset, Value mask, + int targetBits, OpBuilder &builder) { + Type targetType = builder.getIntegerType(targetBits); + Value result = builder.create(loc, value, mask); + return builder.create(loc, targetType, result, + offset); +} + //===----------------------------------------------------------------------===// // Operation conversion //===----------------------------------------------------------------------===// @@ -184,6 +242,16 @@ class CmpFOpPattern final : public SPIRVOpLowering { ConversionPatternRewriter &rewriter) const override; }; +/// Converts integer compare operation on i1 type opearnds to SPIR-V ops. +class BoolCmpIOpPattern final : public SPIRVOpLowering { +public: + using SPIRVOpLowering::SPIRVOpLowering; + + LogicalResult + matchAndRewrite(CmpIOp cmpIOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; + /// Converts integer compare operation to SPIR-V ops. class CmpIOpPattern final : public SPIRVOpLowering { public: @@ -194,6 +262,16 @@ class CmpIOpPattern final : public SPIRVOpLowering { ConversionPatternRewriter &rewriter) const override; }; +/// Converts std.load to spv.Load. +class IntLoadOpPattern final : public SPIRVOpLowering { +public: + using SPIRVOpLowering::SPIRVOpLowering; + + LogicalResult + matchAndRewrite(LoadOp loadOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; + /// Converts std.load to spv.Load. class LoadOpPattern final : public SPIRVOpLowering { public: @@ -223,6 +301,16 @@ class SelectOpPattern final : public SPIRVOpLowering { ConversionPatternRewriter &rewriter) const override; }; +/// Converts std.store to spv.Store on integers. +class IntStoreOpPattern final : public SPIRVOpLowering { +public: + using SPIRVOpLowering::SPIRVOpLowering; + + LogicalResult + matchAndRewrite(StoreOp storeOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; + /// Converts std.store to spv.Store. class StoreOpPattern final : public SPIRVOpLowering { public: @@ -453,11 +541,43 @@ CmpFOpPattern::matchAndRewrite(CmpFOp cmpFOp, ArrayRef operands, // CmpIOp //===----------------------------------------------------------------------===// +LogicalResult +BoolCmpIOpPattern::matchAndRewrite(CmpIOp cmpIOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + CmpIOpOperandAdaptor cmpIOpOperands(operands); + + Type operandType = cmpIOp.lhs().getType(); + if (!operandType.isa() || + operandType.cast().getWidth() != 1) + return failure(); + + switch (cmpIOp.getPredicate()) { +#define DISPATCH(cmpPredicate, spirvOp) \ + case cmpPredicate: \ + rewriter.replaceOpWithNewOp(cmpIOp, cmpIOp.getResult().getType(), \ + cmpIOpOperands.lhs(), \ + cmpIOpOperands.rhs()); \ + return success(); + + DISPATCH(CmpIPredicate::eq, spirv::LogicalEqualOp); + DISPATCH(CmpIPredicate::ne, spirv::LogicalNotEqualOp); + +#undef DISPATCH + default:; + } + return failure(); +} + LogicalResult CmpIOpPattern::matchAndRewrite(CmpIOp cmpIOp, ArrayRef operands, ConversionPatternRewriter &rewriter) const { CmpIOpOperandAdaptor cmpIOpOperands(operands); + Type operandType = cmpIOp.lhs().getType(); + if (operandType.isa() && + operandType.cast().getWidth() == 1) + return failure(); + switch (cmpIOp.getPredicate()) { #define DISPATCH(cmpPredicate, spirvOp) \ case cmpPredicate: \ @@ -486,13 +606,79 @@ CmpIOpPattern::matchAndRewrite(CmpIOp cmpIOp, ArrayRef operands, // LoadOp //===----------------------------------------------------------------------===// +LogicalResult +IntLoadOpPattern::matchAndRewrite(LoadOp loadOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + LoadOpOperandAdaptor loadOperands(operands); + auto loc = loadOp.getLoc(); + auto memrefType = loadOp.memref().getType().cast(); + if (!memrefType.getElementType().isSignlessInteger()) + return failure(); + spirv::AccessChainOp accessChainOp = + spirv::getElementPtr(typeConverter, memrefType, loadOperands.memref(), + loadOperands.indices(), loc, rewriter); + + int srcBits = memrefType.getElementType().getIntOrFloatBitWidth(); + auto dstType = typeConverter.convertType(memrefType) + .cast() + .getPointeeType() + .cast() + .getElementType(0) + .cast() + .getElementType(); + int dstBits = dstType.getIntOrFloatBitWidth(); + assert(dstBits % srcBits == 0); + + // If the rewrited load op has the same bit width, use the loading value + // directly. + if (srcBits == dstBits) { + rewriter.replaceOpWithNewOp(loadOp, + accessChainOp.getResult()); + return success(); + } + + // Assume that getElementPtr() works linearizely. If it's a scalar, the method + // still returns a linearized accessing. If the accessing is not linearized, + // there will be offset issues. + assert(accessChainOp.indices().size() == 2); + Value adjustedPtr = adjustAccessChainForBitwidth(typeConverter, accessChainOp, + srcBits, dstBits, rewriter); + Value spvLoadOp = rewriter.create( + loc, dstType, adjustedPtr, + loadOp.getAttrOfType( + spirv::attributeName()), + loadOp.getAttrOfType("alignment")); + + // Shift the bits to the rightmost. + // ____XXXX________ -> ____________XXXX + Value lastDim = accessChainOp.getOperation()->getOperand( + accessChainOp.getNumOperands() - 1); + Value offset = getOffsetForBitwidth(loc, lastDim, srcBits, dstBits, rewriter); + Value result = rewriter.create( + loc, spvLoadOp.getType(), spvLoadOp, offset); + + // Apply the mask to extract corresponding bits. + Value mask = rewriter.create( + loc, dstType, rewriter.getIntegerAttr(dstType, (1 << srcBits) - 1)); + result = rewriter.create(loc, dstType, result, mask); + rewriter.replaceOp(loadOp, result); + + assert(accessChainOp.use_empty()); + rewriter.eraseOp(accessChainOp); + + return success(); +} + LogicalResult LoadOpPattern::matchAndRewrite(LoadOp loadOp, ArrayRef operands, ConversionPatternRewriter &rewriter) const { LoadOpOperandAdaptor loadOperands(operands); - auto loadPtr = spirv::getElementPtr( - typeConverter, loadOp.memref().getType().cast(), - loadOperands.memref(), loadOperands.indices(), loadOp.getLoc(), rewriter); + auto memrefType = loadOp.memref().getType().cast(); + if (memrefType.getElementType().isSignlessInteger()) + return failure(); + auto loadPtr = + spirv::getElementPtr(typeConverter, memrefType, loadOperands.memref(), + loadOperands.indices(), loadOp.getLoc(), rewriter); rewriter.replaceOpWithNewOp(loadOp, loadPtr); return success(); } @@ -529,14 +715,92 @@ SelectOpPattern::matchAndRewrite(SelectOp op, ArrayRef operands, // StoreOp //===----------------------------------------------------------------------===// +LogicalResult +IntStoreOpPattern::matchAndRewrite(StoreOp storeOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + StoreOpOperandAdaptor storeOperands(operands); + auto memrefType = storeOp.memref().getType().cast(); + if (!memrefType.getElementType().isSignlessInteger()) + return failure(); + + auto loc = storeOp.getLoc(); + spirv::AccessChainOp accessChainOp = + spirv::getElementPtr(typeConverter, memrefType, storeOperands.memref(), + storeOperands.indices(), loc, rewriter); + int srcBits = memrefType.getElementType().getIntOrFloatBitWidth(); + auto dstType = typeConverter.convertType(memrefType) + .cast() + .getPointeeType() + .cast() + .getElementType(0) + .cast() + .getElementType(); + int dstBits = dstType.getIntOrFloatBitWidth(); + assert(dstBits % srcBits == 0); + + if (srcBits == dstBits) { + rewriter.replaceOpWithNewOp( + storeOp, accessChainOp.getResult(), storeOperands.value()); + return success(); + } + + // Since there are multi threads in the processing, the emulation will be done + // with atomic operations. E.g., if the storing value is i8, rewrite the + // StoreOp to + // 1) load a 32-bit integer + // 2) clear 8 bits in the loading value + // 3) store 32-bit value back + // 4) load a 32-bit integer + // 5) modify 8 bits in the loading value + // 6) store 32-bit value back + // The step 1 to step 3 are done by AtomicAnd as one atomic step, and the step + // 4 to step 6 are done by AtomicOr as another atomic step. + assert(accessChainOp.indices().size() == 2); + Value lastDim = accessChainOp.getOperation()->getOperand( + accessChainOp.getNumOperands() - 1); + Value offset = getOffsetForBitwidth(loc, lastDim, srcBits, dstBits, rewriter); + + // Create a mask to clear the destination. E.g., if it is the second i8 in + // i32, 0xFFFF00FF is created. + Value mask = rewriter.create( + loc, dstType, rewriter.getIntegerAttr(dstType, (1 << srcBits) - 1)); + Value clearBitsMask = + rewriter.create(loc, dstType, mask, offset); + clearBitsMask = rewriter.create(loc, dstType, clearBitsMask); + + Value storeVal = + shiftValue(loc, storeOperands.value(), offset, mask, dstBits, rewriter); + Value adjustedPtr = adjustAccessChainForBitwidth(typeConverter, accessChainOp, + srcBits, dstBits, rewriter); + Value result = rewriter.create( + loc, dstType, adjustedPtr, spirv::Scope::Device, + spirv::MemorySemantics::AcquireRelease, clearBitsMask); + result = rewriter.create( + loc, dstType, adjustedPtr, spirv::Scope::Device, + spirv::MemorySemantics::AcquireRelease, storeVal); + + // The AtomicOrOp has no side effect. Since it is already inserted, we can + // just remove the original StoreOp. Note that rewriter.replaceOp() + // doesn't work because it only accepts that the numbers of result are the + // same. + rewriter.eraseOp(storeOp); + + assert(accessChainOp.use_empty()); + rewriter.eraseOp(accessChainOp); + + return success(); +} + LogicalResult StoreOpPattern::matchAndRewrite(StoreOp storeOp, ArrayRef operands, ConversionPatternRewriter &rewriter) const { StoreOpOperandAdaptor storeOperands(operands); - auto storePtr = spirv::getElementPtr( - typeConverter, storeOp.memref().getType().cast(), - storeOperands.memref(), storeOperands.indices(), storeOp.getLoc(), - rewriter); + auto memrefType = storeOp.memref().getType().cast(); + if (memrefType.getElementType().isSignlessInteger()) + return failure(); + auto storePtr = + spirv::getElementPtr(typeConverter, memrefType, storeOperands.memref(), + storeOperands.indices(), storeOp.getLoc(), rewriter); rewriter.replaceOpWithNewOp(storeOp, storePtr, storeOperands.value()); return success(); @@ -590,6 +854,7 @@ void populateStandardToSPIRVPatterns(MLIRContext *context, UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, + UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, @@ -599,9 +864,12 @@ void populateStandardToSPIRVPatterns(MLIRContext *context, UnaryAndBinaryOpPattern, BitwiseOpPattern, BitwiseOpPattern, - ConstantCompositeOpPattern, ConstantScalarOpPattern, CmpFOpPattern, - CmpIOpPattern, LoadOpPattern, ReturnOpPattern, SelectOpPattern, - StoreOpPattern, TypeCastingOpPattern, + BoolCmpIOpPattern, ConstantCompositeOpPattern, ConstantScalarOpPattern, + CmpFOpPattern, CmpIOpPattern, IntLoadOpPattern, LoadOpPattern, + ReturnOpPattern, SelectOpPattern, IntStoreOpPattern, StoreOpPattern, + TypeCastingOpPattern, + TypeCastingOpPattern, + TypeCastingOpPattern, TypeCastingOpPattern, TypeCastingOpPattern, XOrOpPattern>( context, typeConverter); diff --git a/mlir/lib/Conversion/StandardToStandard/CMakeLists.txt b/mlir/lib/Conversion/StandardToStandard/CMakeLists.txt index e1bc42a746ee9..2d1a0be0ab7ac 100644 --- a/mlir/lib/Conversion/StandardToStandard/CMakeLists.txt +++ b/mlir/lib/Conversion/StandardToStandard/CMakeLists.txt @@ -3,9 +3,8 @@ add_mlir_conversion_library(MLIRStandardToStandard ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/StandardToStandard - ) -target_link_libraries(MLIRStandardToStandard - PUBLIC + + LINK_LIBS PUBLIC MLIRIR MLIRPass MLIRStandardOps diff --git a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt index 6d6b3b87c0730..569619f2bcef2 100644 --- a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt @@ -6,14 +6,14 @@ add_mlir_conversion_library(MLIRVectorToLLVM DEPENDS MLIRConversionPassIncGen -) + intrinsics_gen -target_link_libraries(MLIRVectorToLLVM - PUBLIC + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC MLIRLLVMIR MLIRStandardToLLVM MLIRVector MLIRTransforms - LLVMCore - LLVMSupport ) diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 003e06a87299c..dec932173c45d 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -791,9 +791,19 @@ getTransferOpAdapter(TransferWriteOp xferOp, ArrayRef operands) { return TransferWriteOpOperandAdaptor(operands); } +bool isMinorIdentity(AffineMap map, unsigned rank) { + if (map.getNumResults() < rank) + return false; + unsigned startDim = map.getNumDims() - rank; + for (unsigned i = 0; i < rank; ++i) + if (map.getResult(i) != getAffineDimExpr(startDim + i, map.getContext())) + return false; + return true; +} + /// Conversion pattern that converts a 1-D vector transfer read/write op in a /// sequence of: -/// 1. Bitcast to vector form. +/// 1. Bitcast or addrspacecast to vector form. /// 2. Create an offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. /// 3. Create a mask where offsetVector is compared against memref upper bound. /// 4. Rewrite op as a masked read or write. @@ -810,9 +820,12 @@ class VectorTransferConversion : public ConvertToLLVMPattern { ConversionPatternRewriter &rewriter) const override { auto xferOp = cast(op); auto adaptor = getTransferOpAdapter(xferOp, operands); - if (xferOp.getMemRefType().getRank() != 1) + + if (xferOp.getVectorType().getRank() > 1 || + llvm::size(xferOp.indices()) == 0) return failure(); - if (!xferOp.permutation_map().isIdentity()) + if (!isMinorIdentity(xferOp.permutation_map(), + xferOp.getVectorType().getRank())) return failure(); auto toLLVMTy = [&](Type t) { return typeConverter.convertType(t); }; @@ -822,13 +835,21 @@ class VectorTransferConversion : public ConvertToLLVMPattern { MemRefType memRefType = xferOp.getMemRefType(); // 1. Get the source/dst address as an LLVM vector pointer. + // The vector pointer would always be on address space 0, therefore + // addrspacecast shall be used when source/dst memrefs are not on + // address space 0. // TODO: support alignment when possible. Value dataPtr = getDataPtr(loc, memRefType, adaptor.memref(), adaptor.indices(), rewriter, getModule()); auto vecTy = toLLVMTy(xferOp.getVectorType()).template cast(); - auto vectorDataPtr = - rewriter.create(loc, vecTy.getPointerTo(), dataPtr); + Value vectorDataPtr; + if (memRefType.getMemorySpace() == 0) + vectorDataPtr = + rewriter.create(loc, vecTy.getPointerTo(), dataPtr); + else + vectorDataPtr = rewriter.create( + loc, vecTy.getPointerTo(), dataPtr); // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. unsigned vecWidth = vecTy.getVectorNumElements(); @@ -844,17 +865,18 @@ class VectorTransferConversion : public ConvertToLLVMPattern { loc, toLLVMTy(vectorCmpType), linearIndices); // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. - Value offsetIndex = *(xferOp.indices().begin()); - offsetIndex = rewriter.create( - loc, vectorCmpType.getElementType(), offsetIndex); + // TODO(ntv, ajcbik): when the leaf transfer rank is k > 1 we need the last + // `k` dimensions here. + unsigned lastIndex = llvm::size(xferOp.indices()) - 1; + Value offsetIndex = *(xferOp.indices().begin() + lastIndex); + offsetIndex = rewriter.create(loc, i64Type, offsetIndex); Value base = rewriter.create(loc, vectorCmpType, offsetIndex); Value offsetVector = rewriter.create(loc, base, linearIndices); // 4. Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] - Value dim = rewriter.create(loc, xferOp.memref(), 0); - dim = - rewriter.create(loc, vectorCmpType.getElementType(), dim); + Value dim = rewriter.create(loc, xferOp.memref(), lastIndex); + dim = rewriter.create(loc, i64Type, dim); dim = rewriter.create(loc, vectorCmpType, dim); Value mask = rewriter.create(loc, CmpIPredicate::slt, offsetVector, dim); @@ -903,7 +925,9 @@ class VectorPrintOpConversion : public ConvertToLLVMPattern { Type eltType = vectorType ? vectorType.getElementType() : printType; int64_t rank = vectorType ? vectorType.getRank() : 0; Operation *printer; - if (eltType.isSignlessInteger(32)) + if (eltType.isSignlessInteger(1)) + printer = getPrintI1(op); + else if (eltType.isSignlessInteger(32)) printer = getPrintI32(op); else if (eltType.isSignlessInteger(64)) printer = getPrintI64(op); @@ -970,6 +994,11 @@ class VectorPrintOpConversion : public ConvertToLLVMPattern { } // Helpers for method names. + Operation *getPrintI1(Operation *op) const { + LLVM::LLVMDialect *dialect = typeConverter.getDialect(); + return getPrint(op, dialect, "print_i1", + LLVM::LLVMType::getInt1Ty(dialect)); + } Operation *getPrintI32(Operation *op) const { LLVM::LLVMDialect *dialect = typeConverter.getDialect(); return getPrint(op, dialect, "print_i32", diff --git a/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt b/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt index 515c0e2d0344d..08cbc392d7f6f 100644 --- a/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt +++ b/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt @@ -3,14 +3,13 @@ add_mlir_conversion_library(MLIRVectorToLoops ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/VectorToLoops -) -target_link_libraries(MLIRVectorToLoops - PUBLIC + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC MLIREDSC MLIRAffineEDSC MLIRLLVMIR MLIRTransforms - LLVMCore - LLVMSupport ) diff --git a/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp b/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp index 99676f1335f6f..5cb781b02b2a1 100644 --- a/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp +++ b/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp @@ -15,6 +15,7 @@ #include "mlir/Conversion/VectorToLoops/ConvertVectorToLoops.h" #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" #include "mlir/Dialect/LoopOps/EDSC/Builders.h" +#include "mlir/Dialect/LoopOps/EDSC/Intrinsics.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/Vector/EDSC/Intrinsics.h" #include "mlir/Dialect/Vector/VectorOps.h" @@ -34,6 +35,223 @@ using namespace mlir::edsc::intrinsics; using vector::TransferReadOp; using vector::TransferWriteOp; +/// Helper class captures the common information needed to lower N>1-D vector +/// transfer operations (read and write). +/// On construction, this class opens an edsc::ScopedContext for simpler IR +/// manipulation. +/// In pseudo-IR, for an n-D vector_transfer_read such as: +/// +/// ``` +/// vector_transfer_read(%m, %offsets, identity_map, %fill) : +/// memref<(leading_dims) x (major_dims) x (minor_dims) x type>, +/// vector<(major_dims) x (minor_dims) x type> +/// ``` +/// +/// where rank(minor_dims) is the lower-level vector rank (e.g. 1 for LLVM or +/// higher). +/// +/// This is the entry point to emitting pseudo-IR resembling: +/// +/// ``` +/// %tmp = alloc(): memref<(major_dims) x vector> +/// for (%ivs_major, {0}, {vector_shape}, {1}) { // (N-1)-D loop nest +/// if (any_of(%ivs_major + %offsets, <, major_dims)) { +/// %v = vector_transfer_read( +/// {%offsets_leading, %ivs_major + %offsets_major, %offsets_minor}, +/// %ivs_minor): +/// memref<(leading_dims) x (major_dims) x (minor_dims) x type>, +/// vector<(minor_dims) x type>; +/// store(%v, %tmp); +/// } else { +/// %v = splat(vector<(minor_dims) x type>, %fill) +/// store(%v, %tmp, %ivs_major); +/// } +/// } +/// %res = load(%tmp, %0): memref<(major_dims) x vector>): +// vector<(major_dims) x (minor_dims) x type> +/// ``` +/// +template +class NDTransferOpHelper { +public: + NDTransferOpHelper(PatternRewriter &rewriter, ConcreteOp xferOp) + : rewriter(rewriter), loc(xferOp.getLoc()), + scope(std::make_unique(rewriter, loc)), xferOp(xferOp), + op(xferOp.getOperation()) { + vectorType = xferOp.getVectorType(); + // TODO(ntv, ajcbik): when we go to k > 1-D vectors adapt minorRank. + minorRank = 1; + majorRank = vectorType.getRank() - minorRank; + leadingRank = xferOp.getMemRefType().getRank() - (majorRank + minorRank); + majorVectorType = + VectorType::get(vectorType.getShape().take_front(majorRank), + vectorType.getElementType()); + minorVectorType = + VectorType::get(vectorType.getShape().take_back(minorRank), + vectorType.getElementType()); + /// Memref of minor vector type is used for individual transfers. + memRefMinorVectorType = + MemRefType::get(majorVectorType.getShape(), minorVectorType, {}, + xferOp.getMemRefType().getMemorySpace()); + } + + LogicalResult doReplace(); + +private: + /// Creates the loop nest on the "major" dimensions and calls the + /// `loopBodyBuilder` lambda in the context of the loop nest. + template + void emitLoops(Lambda loopBodyBuilder); + + /// Operate within the body of `emitLoops` to: + /// 1. Compute the indexings `majorIvs + majorOffsets`. + /// 2. Compute a boolean that determines whether the first `majorIvs.rank()` + /// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`. + /// 3. Create an IfOp conditioned on the boolean in step 2. + /// 4. Call a `thenBlockBuilder` and an `elseBlockBuilder` to append + /// operations to the IfOp blocks as appropriate. + template + void emitInBounds(ValueRange majorIvs, ValueRange majorOffsets, + MemRefBoundsCapture &memrefBounds, + LambdaThen thenBlockBuilder, LambdaElse elseBlockBuilder); + + /// Common state to lower vector transfer ops. + PatternRewriter &rewriter; + Location loc; + std::unique_ptr scope; + ConcreteOp xferOp; + Operation *op; + // A vector transfer copies data between: + // - memref<(leading_dims) x (major_dims) x (minor_dims) x type> + // - vector<(major_dims) x (minor_dims) x type> + unsigned minorRank; // for now always 1 + unsigned majorRank; // vector rank - minorRank + unsigned leadingRank; // memref rank - vector rank + VectorType vectorType; // vector<(major_dims) x (minor_dims) x type> + VectorType majorVectorType; // vector<(major_dims) x type> + VectorType minorVectorType; // vector<(minor_dims) x type> + MemRefType memRefMinorVectorType; // memref> +}; + +template +template +void NDTransferOpHelper::emitLoops(Lambda loopBodyBuilder) { + /// Loop nest operates on the major dimensions + MemRefBoundsCapture memrefBoundsCapture(xferOp.memref()); + VectorBoundsCapture vectorBoundsCapture(majorVectorType); + auto majorLbs = vectorBoundsCapture.getLbs(); + auto majorUbs = vectorBoundsCapture.getUbs(); + auto majorSteps = vectorBoundsCapture.getSteps(); + SmallVector majorIvs(vectorBoundsCapture.rank()); + AffineLoopNestBuilder(majorIvs, majorLbs, majorUbs, majorSteps)([&] { + ValueRange indices(xferOp.indices()); + loopBodyBuilder(majorIvs, indices.take_front(leadingRank), + indices.drop_front(leadingRank).take_front(majorRank), + indices.take_back(minorRank), memrefBoundsCapture); + }); +} + +template +template +void NDTransferOpHelper::emitInBounds( + ValueRange majorIvs, ValueRange majorOffsets, + MemRefBoundsCapture &memrefBounds, LambdaThen thenBlockBuilder, + LambdaElse elseBlockBuilder) { + Value inBounds = std_constant_int(/*value=*/1, /*width=*/1); + SmallVector majorIvsPlusOffsets; + majorIvsPlusOffsets.reserve(majorIvs.size()); + for (auto it : llvm::zip(majorIvs, majorOffsets, memrefBounds.getUbs())) { + Value iv = std::get<0>(it), off = std::get<1>(it), ub = std::get<2>(it); + using namespace mlir::edsc::op; + majorIvsPlusOffsets.push_back(iv + off); + Value inBounds2 = majorIvsPlusOffsets.back() < ub; + inBounds = inBounds && inBounds2; + } + + auto ifOp = ScopedContext::getBuilderRef().create( + ScopedContext::getLocation(), TypeRange{}, inBounds, + /*withElseRegion=*/std::is_same()); + BlockBuilder(&ifOp.thenRegion().front(), + Append())([&] { thenBlockBuilder(majorIvsPlusOffsets); }); + if (std::is_same()) + BlockBuilder(&ifOp.elseRegion().front(), + Append())([&] { elseBlockBuilder(majorIvsPlusOffsets); }); +} + +template <> +LogicalResult NDTransferOpHelper::doReplace() { + Value alloc = std_alloc(memRefMinorVectorType); + + emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, + ValueRange majorOffsets, ValueRange minorOffsets, + MemRefBoundsCapture &memrefBounds) { + // If in-bounds, index into memref and lower to 1-D transfer read. + auto thenBlockBuilder = [&](ValueRange majorIvsPlusOffsets) { + auto map = AffineMap::getMinorIdentityMap( + xferOp.getMemRefType().getRank(), minorRank, xferOp.getContext()); + // Lower to 1-D vector_transfer_read and let recursion handle it. + Value memref = xferOp.memref(); + SmallVector indexing; + indexing.reserve(leadingRank + majorRank + minorRank); + indexing.append(leadingOffsets.begin(), leadingOffsets.end()); + indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end()); + indexing.append(minorOffsets.begin(), minorOffsets.end()); + auto loaded1D = + vector_transfer_read(minorVectorType, memref, indexing, + AffineMapAttr::get(map), xferOp.padding()); + // Store the 1-D vector. + std_store(loaded1D, alloc, majorIvs); + }; + // If out-of-bounds, just store a splatted vector. + auto elseBlockBuilder = [&](ValueRange majorIvsPlusOffsets) { + auto vector = std_splat(minorVectorType, xferOp.padding()); + std_store(vector, alloc, majorIvs); + }; + emitInBounds(majorIvs, majorOffsets, memrefBounds, thenBlockBuilder, + elseBlockBuilder); + }); + + Value loaded = + std_load(vector_type_cast(MemRefType::get({}, vectorType), alloc)); + rewriter.replaceOp(op, loaded); + + return success(); +} + +template <> +LogicalResult NDTransferOpHelper::doReplace() { + Value alloc = std_alloc(memRefMinorVectorType); + + std_store(xferOp.vector(), + vector_type_cast(MemRefType::get({}, vectorType), alloc)); + + emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, + ValueRange majorOffsets, ValueRange minorOffsets, + MemRefBoundsCapture &memrefBounds) { + auto thenBlockBuilder = [&](ValueRange majorIvsPlusOffsets) { + // Lower to 1-D vector_transfer_write and let recursion handle it. + Value loaded1D = std_load(alloc, majorIvs); + auto map = AffineMap::getMinorIdentityMap( + xferOp.getMemRefType().getRank(), minorRank, xferOp.getContext()); + SmallVector indexing; + indexing.reserve(leadingRank + majorRank + minorRank); + indexing.append(leadingOffsets.begin(), leadingOffsets.end()); + indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end()); + indexing.append(minorOffsets.begin(), minorOffsets.end()); + vector_transfer_write(loaded1D, xferOp.memref(), indexing, + AffineMapAttr::get(map)); + }; + // Don't write anything when out of bounds. + auto elseBlockBuilder = [&](ValueRange majorIvsPlusOffsets) {}; + emitInBounds(majorIvs, majorOffsets, memrefBounds, thenBlockBuilder, + elseBlockBuilder); + }); + + rewriter.eraseOp(op); + + return success(); +} + /// Analyzes the `transfer` to find an access dimension along the fastest remote /// MemRef dimension. If such a dimension with coalescing properties is found, /// `pivs` and `vectorBoundsCapture` are swapped so that the invocation of @@ -243,7 +461,16 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( using namespace mlir::edsc::op; TransferReadOp transfer = cast(op); + if (AffineMap::isMinorIdentity(transfer.permutation_map())) { + // If > 1D, emit a bunch of loops around 1-D vector transfers. + if (transfer.getVectorType().getRank() > 1) + return NDTransferOpHelper(rewriter, transfer).doReplace(); + // If 1-D this is now handled by the target-specific lowering. + if (transfer.getVectorType().getRank() == 1) + return failure(); + } + // Conservative lowering to scalar load / stores. // 1. Setup all the captures. ScopedContext scope(rewriter, transfer.getLoc()); StdIndexedValue remote(transfer.memref()); @@ -306,6 +533,15 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( using namespace edsc::op; TransferWriteOp transfer = cast(op); + if (AffineMap::isMinorIdentity(transfer.permutation_map())) { + // If > 1D, emit a bunch of loops around 1-D vector transfers. + if (transfer.getVectorType().getRank() > 1) + return NDTransferOpHelper(rewriter, transfer) + .doReplace(); + // If 1-D this is now handled by the target-specific lowering. + if (transfer.getVectorType().getRank() == 1) + return failure(); + } // 1. Setup all the captures. ScopedContext scope(rewriter, transfer.getLoc()); @@ -347,8 +583,8 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( } // namespace -void mlir::populateVectorToAffineLoopsConversionPatterns( - MLIRContext *context, OwningRewritePatternList &patterns) { +void mlir::populateVectorToLoopsConversionPatterns( + OwningRewritePatternList &patterns, MLIRContext *context) { patterns.insert, VectorTransferRewriter>(context); } diff --git a/mlir/lib/Dialect/AVX512/CMakeLists.txt b/mlir/lib/Dialect/AVX512/CMakeLists.txt index eb1e7dc5c4b51..6b15bf21c2d97 100644 --- a/mlir/lib/Dialect/AVX512/CMakeLists.txt +++ b/mlir/lib/Dialect/AVX512/CMakeLists.txt @@ -6,11 +6,9 @@ add_mlir_dialect_library(MLIRAVX512 DEPENDS MLIRAVX512IncGen - ) -target_link_libraries(MLIRAVX512 - PUBLIC + + LINK_LIBS PUBLIC MLIRIR MLIRSideEffects MLIRVectorToLLVM - LLVMSupport ) diff --git a/mlir/lib/Dialect/Affine/EDSC/Builders.cpp b/mlir/lib/Dialect/Affine/EDSC/Builders.cpp index 0e1b2a5bf0af1..50e26574b7d5e 100644 --- a/mlir/lib/Dialect/Affine/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/Affine/EDSC/Builders.cpp @@ -28,7 +28,7 @@ static Optional emitStaticFor(ArrayRef lbs, ArrayRef ubs, auto ubConst = dyn_cast(ubDef); if (!lbConst || !ubConst) return Optional(); - return ScopedContext::getBuilder() + return ScopedContext::getBuilderRef() .create(ScopedContext::getLocation(), lbConst.getValue(), ubConst.getValue(), step) .getInductionVar(); @@ -38,19 +38,22 @@ LoopBuilder mlir::edsc::makeAffineLoopBuilder(Value *iv, ArrayRef lbs, ArrayRef ubs, int64_t step) { mlir::edsc::LoopBuilder result; - if (auto staticForIv = emitStaticFor(lbs, ubs, step)) { + if (auto staticForIv = emitStaticFor(lbs, ubs, step)) *iv = staticForIv.getValue(); - } else { - auto b = ScopedContext::getBuilder(); - *iv = - Value(b.create(ScopedContext::getLocation(), lbs, - b.getMultiDimIdentityMap(lbs.size()), ubs, - b.getMultiDimIdentityMap(ubs.size()), step) - .getInductionVar()); - } + else + *iv = ScopedContext::getBuilderRef() + .create( + ScopedContext::getLocation(), lbs, + ScopedContext::getBuilderRef().getMultiDimIdentityMap( + lbs.size()), + ubs, + ScopedContext::getBuilderRef().getMultiDimIdentityMap( + ubs.size()), + step) + .getInductionVar(); auto *body = getForInductionVarOwner(*iv).getBody(); - result.enter(body, /*prev=*/1); + result.enter(body); return result; } @@ -122,7 +125,7 @@ static Value createBinaryIndexHandle( // TODO: createOrFold when available. Operation *op = - makeComposedAffineApply(ScopedContext::getBuilder(), + makeComposedAffineApply(ScopedContext::getBuilderRef(), ScopedContext::getLocation(), map, operands) .getOperation(); assert(op->getNumResults() == 1 && "Expected single result AffineApply"); @@ -218,7 +221,7 @@ static Value createIComparisonExpr(CmpIPredicate predicate, Value lhs, assert((lhsType.isa() || lhsType.isSignlessInteger()) && "only integer comparisons are supported"); - return ScopedContext::getBuilder().create( + return ScopedContext::getBuilderRef().create( ScopedContext::getLocation(), predicate, lhs, rhs); } @@ -231,7 +234,7 @@ static Value createFComparisonExpr(CmpFPredicate predicate, Value lhs, assert(lhsType == rhsType && "cannot mix types in operators"); assert(lhsType.isa() && "only float comparisons are supported"); - return ScopedContext::getBuilder().create( + return ScopedContext::getBuilderRef().create( ScopedContext::getLocation(), predicate, lhs, rhs); } diff --git a/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt b/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt index a07905b71fbc2..e00986584676f 100644 --- a/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt @@ -6,9 +6,8 @@ add_mlir_dialect_library(MLIRAffineEDSC DEPENDS MLIRAffineOpsIncGen - ) -target_link_libraries(MLIRAffineEDSC - PUBLIC + + LINK_LIBS PUBLIC MLIRAffineOps MLIREDSC MLIRIR diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index 5f490b0d3721b..8f533e9b21779 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -69,7 +69,7 @@ struct AffineInlinerInterface : public DialectInlinerInterface { AffineDialect::AffineDialect(MLIRContext *context) : Dialect(getDialectNamespace(), context) { - addOperations(); @@ -84,65 +84,110 @@ Operation *AffineDialect::materializeConstant(OpBuilder &builder, return builder.create(loc, type, value); } -/// A utility function to check if a given region is attached to a function. -static bool isFunctionRegion(Region *region) { - return llvm::isa(region->getParentOp()); +/// A utility function to check if a value is defined at the top level of an +/// op with trait `PolyhedralScope`. A value of index type defined at the top +/// level is always a valid symbol. +bool mlir::isTopLevelValue(Value value) { + if (auto arg = value.dyn_cast()) + return arg.getOwner()->getParentOp()->hasTrait(); + return value.getDefiningOp() + ->getParentOp() + ->hasTrait(); } -/// A utility function to check if a value is defined at the top level of a -/// function. A value of index type defined at the top level is always a valid -/// symbol. -bool mlir::isTopLevelValue(Value value) { +/// A utility function to check if a value is defined at the top level of +/// `region` or is an argument of `region`. A value of index type defined at the +/// top level of a `PolyhedralScope` region is always a valid symbol for all +/// uses in that region. +static bool isTopLevelValue(Value value, Region *region) { if (auto arg = value.dyn_cast()) - return isFunctionRegion(arg.getOwner()->getParent()); - return isFunctionRegion(value.getDefiningOp()->getParentRegion()); + return arg.getParentRegion() == region; + return value.getDefiningOp()->getParentOp() == region->getParentOp(); +} + +/// Returns the closest region enclosing `op` that is held by an operation with +/// trait `PolyhedralScope`. +// TODO: getAffineScope should be publicly exposed for affine passes/utilities. +static Region *getAffineScope(Operation *op) { + auto *curOp = op; + while (auto *parentOp = curOp->getParentOp()) { + if (parentOp->hasTrait()) + return curOp->getParentRegion(); + curOp = parentOp; + } + llvm_unreachable("op doesn't have an enclosing polyhedral scope"); } -// Value can be used as a dimension id if it is valid as a symbol, or -// it is an induction variable, or it is a result of affine apply operation -// with dimension id arguments. +// A Value can be used as a dimension id iff it meets one of the following +// conditions: +// *) It is valid as a symbol. +// *) It is an induction variable. +// *) It is the result of affine apply operation with dimension id arguments. bool mlir::isValidDim(Value value) { // The value must be an index type. if (!value.getType().isIndex()) return false; - if (auto *op = value.getDefiningOp()) { - // Top level operation or constant operation is ok. - if (isFunctionRegion(op->getParentRegion()) || isa(op)) - return true; - // Affine apply operation is ok if all of its operands are ok. - if (auto applyOp = dyn_cast(op)) - return applyOp.isValidDim(); - // The dim op is okay if its operand memref/tensor is defined at the top - // level. - if (auto dimOp = dyn_cast(op)) - return isTopLevelValue(dimOp.getOperand()); + if (auto *defOp = value.getDefiningOp()) + return isValidDim(value, getAffineScope(defOp)); + + // This value has to be a block argument for an op that has the + // `PolyhedralScope` trait or for an affine.for or affine.parallel. + auto *parentOp = value.cast().getOwner()->getParentOp(); + return parentOp->hasTrait() || + isa(parentOp) || isa(parentOp); +} + +// Value can be used as a dimension id iff it meets one of the following +// conditions: +// *) It is valid as a symbol. +// *) It is an induction variable. +// *) It is the result of an affine apply operation with dimension id operands. +bool mlir::isValidDim(Value value, Region *region) { + // The value must be an index type. + if (!value.getType().isIndex()) return false; + + // All valid symbols are okay. + if (isValidSymbol(value, region)) + return true; + + auto *op = value.getDefiningOp(); + if (!op) { + // This value has to be a block argument for an affine.for or an + // affine.parallel. + auto *parentOp = value.cast().getOwner()->getParentOp(); + return isa(parentOp) || isa(parentOp); } - // This value has to be a block argument of a FuncOp, an 'affine.for', or an - // 'affine.parallel'. - auto *parentOp = value.cast().getOwner()->getParentOp(); - return isa(parentOp) || isa(parentOp) || - isa(parentOp); + + // Affine apply operation is ok if all of its operands are ok. + if (auto applyOp = dyn_cast(op)) + return applyOp.isValidDim(region); + // The dim op is okay if its operand memref/tensor is defined at the top + // level. + if (auto dimOp = dyn_cast(op)) + return isTopLevelValue(dimOp.getOperand()); + return false; } /// Returns true if the 'index' dimension of the `memref` defined by -/// `memrefDefOp` is a statically shaped one or defined using a valid symbol. +/// `memrefDefOp` is a statically shaped one or defined using a valid symbol +/// for `region`. template -static bool isMemRefSizeValidSymbol(AnyMemRefDefOp memrefDefOp, - unsigned index) { +bool isMemRefSizeValidSymbol(AnyMemRefDefOp memrefDefOp, unsigned index, + Region *region) { auto memRefType = memrefDefOp.getType(); // Statically shaped. if (!memRefType.isDynamicDim(index)) return true; // Get the position of the dimension among dynamic dimensions; unsigned dynamicDimPos = memRefType.getDynamicDimIndex(index); - return isValidSymbol( - *(memrefDefOp.getDynamicSizes().begin() + dynamicDimPos)); + return isValidSymbol(*(memrefDefOp.getDynamicSizes().begin() + dynamicDimPos), + region); } -/// Returns true if the result of the dim op is a valid symbol. -static bool isDimOpValidSymbol(DimOp dimOp) { +/// Returns true if the result of the dim op is a valid symbol for `region`. +static bool isDimOpValidSymbol(DimOp dimOp, Region *region) { // The dim op is okay if its operand memref/tensor is defined at the top // level. if (isTopLevelValue(dimOp.getOperand())) @@ -152,43 +197,90 @@ static bool isDimOpValidSymbol(DimOp dimOp) { // whose corresponding size is a valid symbol. unsigned index = dimOp.getIndex(); if (auto viewOp = dyn_cast(dimOp.getOperand().getDefiningOp())) - return isMemRefSizeValidSymbol(viewOp, index); + return isMemRefSizeValidSymbol(viewOp, index, region); if (auto subViewOp = dyn_cast(dimOp.getOperand().getDefiningOp())) - return isMemRefSizeValidSymbol(subViewOp, index); + return isMemRefSizeValidSymbol(subViewOp, index, region); if (auto allocOp = dyn_cast(dimOp.getOperand().getDefiningOp())) - return isMemRefSizeValidSymbol(allocOp, index); + return isMemRefSizeValidSymbol(allocOp, index, region); return false; } -// Value can be used as a symbol if it is a constant, or it is defined at -// the top level, or it is a result of affine apply operation with symbol -// arguments, or a result of the dim op on a memref satisfying certain -// constraints. +// A value can be used as a symbol (at all its use sites) iff it meets one of +// the following conditions: +// *) It is a constant. +// *) Its defining op or block arg appearance is immediately enclosed by an op +// with `PolyhedralScope` trait. +// *) It is the result of an affine.apply operation with symbol operands. +// *) It is a result of the dim op on a memref whose corresponding size is a +// valid symbol. bool mlir::isValidSymbol(Value value) { // The value must be an index type. if (!value.getType().isIndex()) return false; - if (auto *op = value.getDefiningOp()) { - // Top level operation or constant operation is ok. - if (isFunctionRegion(op->getParentRegion()) || isa(op)) - return true; - // Affine apply operation is ok if all of its operands are ok. - if (auto applyOp = dyn_cast(op)) - return applyOp.isValidSymbol(); - if (auto dimOp = dyn_cast(op)) { - return isDimOpValidSymbol(dimOp); - } + // Check that the value is a top level value. + if (isTopLevelValue(value)) + return true; + + if (auto *defOp = value.getDefiningOp()) + return isValidSymbol(value, getAffineScope(defOp)); + + return false; +} + +// A value can be used as a symbol for `region` iff it meets onf of the the +// following conditions: +// *) It is a constant. +// *) It is defined at the top level of 'region' or is its argument. +// *) It dominates `region`'s parent op. +// *) It is the result of an affine apply operation with symbol arguments. +// *) It is a result of the dim op on a memref whose corresponding size is +// a valid symbol. +bool mlir::isValidSymbol(Value value, Region *region) { + // The value must be an index type. + if (!value.getType().isIndex()) + return false; + + // A top-level value is a valid symbol. + if (::isTopLevelValue(value, region)) + return true; + + auto *defOp = value.getDefiningOp(); + if (!defOp) { + // A block argument that is not a top-level value is a valid symbol if it + // dominates region's parent op. + if (!region->getParentOp()->isKnownIsolatedFromAbove()) + if (auto *parentOpRegion = region->getParentOp()->getParentRegion()) + return isValidSymbol(value, parentOpRegion); + return false; } - // Otherwise, check that the value is a top level value. - return isTopLevelValue(value); + + // Constant operation is ok. + Attribute operandCst; + if (matchPattern(defOp, m_Constant(&operandCst))) + return true; + + // Affine apply operation is ok if all of its operands are ok. + if (auto applyOp = dyn_cast(defOp)) + return applyOp.isValidSymbol(region); + + // Dim op results could be valid symbols at any level. + if (auto dimOp = dyn_cast(defOp)) + return isDimOpValidSymbol(dimOp, region); + + // Check for values dominating `region`'s parent op. + if (!region->getParentOp()->isKnownIsolatedFromAbove()) + if (auto *parentRegion = region->getParentOp()->getParentRegion()) + return isValidSymbol(value, parentRegion); + + return false; } // Returns true if 'value' is a valid index to an affine operation (e.g. -// affine.load, affine.store, affine.dma_start, affine.dma_wait). -// Returns false otherwise. -static bool isValidAffineIndexOperand(Value value) { - return isValidDim(value) || isValidSymbol(value); +// affine.load, affine.store, affine.dma_start, affine.dma_wait) where +// `region` provides the polyhedral symbol scope. Returns false otherwise. +static bool isValidAffineIndexOperand(Value value, Region *region) { + return isValidDim(value, region) || isValidSymbol(value, region); } /// Utility function to verify that a set of operands are valid dimension and @@ -203,9 +295,9 @@ verifyDimAndSymbolIdentifiers(OpTy &op, Operation::operand_range operands, unsigned opIt = 0; for (auto operand : operands) { if (opIt++ < numDims) { - if (!isValidDim(operand)) + if (!isValidDim(operand, getAffineScope(op))) return op.emitOpError("operand cannot be used as a dimension id"); - } else if (!isValidSymbol(operand)) { + } else if (!isValidSymbol(operand, getAffineScope(op))) { return op.emitOpError("operand cannot be used as a symbol"); } } @@ -273,6 +365,14 @@ bool AffineApplyOp::isValidDim() { [](Value op) { return mlir::isValidDim(op); }); } +// The result of the affine apply operation can be used as a dimension id if all +// its operands are valid dimension ids with the parent operation of `region` +// defining the polyhedral scope for symbols. +bool AffineApplyOp::isValidDim(Region *region) { + return llvm::all_of(getOperands(), + [&](Value op) { return ::isValidDim(op, region); }); +} + // The result of the affine apply operation can be used as a symbol if all its // operands are symbols. bool AffineApplyOp::isValidSymbol() { @@ -280,6 +380,14 @@ bool AffineApplyOp::isValidSymbol() { [](Value op) { return mlir::isValidSymbol(op); }); } +// The result of the affine apply operation can be used as a symbol in `region` +// if all its operands are symbols in `region`. +bool AffineApplyOp::isValidSymbol(Region *region) { + return llvm::all_of(getOperands(), [&](Value operand) { + return mlir::isValidSymbol(operand, region); + }); +} + OpFoldResult AffineApplyOp::fold(ArrayRef operands) { auto map = getAffineMap(); @@ -811,7 +919,7 @@ static LogicalResult foldMemRefCast(Operation *op) { //===----------------------------------------------------------------------===// // TODO(b/133776335) Check that map operands are loop IVs or symbols. -void AffineDmaStartOp::build(Builder *builder, OperationState &result, +void AffineDmaStartOp::build(OpBuilder &builder, OperationState &result, Value srcMemRef, AffineMap srcMap, ValueRange srcIndices, Value destMemRef, AffineMap dstMap, ValueRange destIndices, @@ -948,22 +1056,23 @@ LogicalResult AffineDmaStartOp::verify() { return emitOpError("incorrect number of operands"); } + Region *scope = getAffineScope(*this); for (auto idx : getSrcIndices()) { if (!idx.getType().isIndex()) return emitOpError("src index to dma_start must have 'index' type"); - if (!isValidAffineIndexOperand(idx)) + if (!isValidAffineIndexOperand(idx, scope)) return emitOpError("src index must be a dimension or symbol identifier"); } for (auto idx : getDstIndices()) { if (!idx.getType().isIndex()) return emitOpError("dst index to dma_start must have 'index' type"); - if (!isValidAffineIndexOperand(idx)) + if (!isValidAffineIndexOperand(idx, scope)) return emitOpError("dst index must be a dimension or symbol identifier"); } for (auto idx : getTagIndices()) { if (!idx.getType().isIndex()) return emitOpError("tag index to dma_start must have 'index' type"); - if (!isValidAffineIndexOperand(idx)) + if (!isValidAffineIndexOperand(idx, scope)) return emitOpError("tag index must be a dimension or symbol identifier"); } return success(); @@ -980,7 +1089,7 @@ LogicalResult AffineDmaStartOp::fold(ArrayRef cstOperands, //===----------------------------------------------------------------------===// // TODO(b/133776335) Check that map operands are loop IVs or symbols. -void AffineDmaWaitOp::build(Builder *builder, OperationState &result, +void AffineDmaWaitOp::build(OpBuilder &builder, OperationState &result, Value tagMemRef, AffineMap tagMap, ValueRange tagIndices, Value numElements) { result.addOperands(tagMemRef); @@ -1036,10 +1145,11 @@ ParseResult AffineDmaWaitOp::parse(OpAsmParser &parser, LogicalResult AffineDmaWaitOp::verify() { if (!getOperand(0).getType().isa()) return emitOpError("expected DMA tag to be of memref type"); + Region *scope = getAffineScope(*this); for (auto idx : getTagIndices()) { if (!idx.getType().isIndex()) return emitOpError("index to dma_wait must have 'index' type"); - if (!isValidAffineIndexOperand(idx)) + if (!isValidAffineIndexOperand(idx, scope)) return emitOpError("index must be a dimension or symbol identifier"); } return success(); @@ -1055,7 +1165,7 @@ LogicalResult AffineDmaWaitOp::fold(ArrayRef cstOperands, // AffineForOp //===----------------------------------------------------------------------===// -void AffineForOp::build(Builder *builder, OperationState &result, +void AffineForOp::build(OpBuilder &builder, OperationState &result, ValueRange lbOperands, AffineMap lbMap, ValueRange ubOperands, AffineMap ubMap, int64_t step) { assert(((!lbMap && lbOperands.empty()) || @@ -1068,7 +1178,7 @@ void AffineForOp::build(Builder *builder, OperationState &result, // Add an attribute for the step. result.addAttribute(getStepAttrName(), - builder->getIntegerAttr(builder->getIndexType(), step)); + builder.getIntegerAttr(builder.getIndexType(), step)); // Add the lower bound. result.addAttribute(getLowerBoundAttrName(), AffineMapAttr::get(lbMap)); @@ -1082,15 +1192,15 @@ void AffineForOp::build(Builder *builder, OperationState &result, // the loop induction variable. Region *bodyRegion = result.addRegion(); Block *body = new Block(); - body->addArgument(IndexType::get(builder->getContext())); + body->addArgument(IndexType::get(builder.getContext())); bodyRegion->push_back(body); - ensureTerminator(*bodyRegion, *builder, result.location); + ensureTerminator(*bodyRegion, builder, result.location); } -void AffineForOp::build(Builder *builder, OperationState &result, int64_t lb, +void AffineForOp::build(OpBuilder &builder, OperationState &result, int64_t lb, int64_t ub, int64_t step) { - auto lbMap = AffineMap::getConstantMap(lb, builder->getContext()); - auto ubMap = AffineMap::getConstantMap(ub, builder->getContext()); + auto lbMap = AffineMap::getConstantMap(lb, builder.getContext()); + auto ubMap = AffineMap::getConstantMap(ub, builder.getContext()); return build(builder, result, {}, lbMap, {}, ubMap, step); } @@ -1695,15 +1805,15 @@ void AffineIfOp::setConditional(IntegerSet set, ValueRange operands) { getOperation()->setOperands(operands); } -void AffineIfOp::build(Builder *builder, OperationState &result, IntegerSet set, - ValueRange args, bool withElseRegion) { +void AffineIfOp::build(OpBuilder &builder, OperationState &result, + IntegerSet set, ValueRange args, bool withElseRegion) { result.addOperands(args); result.addAttribute(getConditionAttrName(), IntegerSetAttr::get(set)); Region *thenRegion = result.addRegion(); Region *elseRegion = result.addRegion(); - AffineIfOp::ensureTerminator(*thenRegion, *builder, result.location); + AffineIfOp::ensureTerminator(*thenRegion, builder, result.location); if (withElseRegion) - AffineIfOp::ensureTerminator(*elseRegion, *builder, result.location); + AffineIfOp::ensureTerminator(*elseRegion, builder, result.location); } /// Canonicalize an affine if op's conditional (integer set + operands). @@ -1734,7 +1844,7 @@ void AffineIfOp::getCanonicalizationPatterns(OwningRewritePatternList &results, // AffineLoadOp //===----------------------------------------------------------------------===// -void AffineLoadOp::build(Builder *builder, OperationState &result, +void AffineLoadOp::build(OpBuilder &builder, OperationState &result, AffineMap map, ValueRange operands) { assert(operands.size() == 1 + map.getNumInputs() && "inconsistent operands"); result.addOperands(operands); @@ -1744,8 +1854,8 @@ void AffineLoadOp::build(Builder *builder, OperationState &result, result.types.push_back(memrefType.getElementType()); } -void AffineLoadOp::build(Builder *builder, OperationState &result, Value memref, - AffineMap map, ValueRange mapOperands) { +void AffineLoadOp::build(OpBuilder &builder, OperationState &result, + Value memref, AffineMap map, ValueRange mapOperands) { assert(map.getNumInputs() == mapOperands.size() && "inconsistent index info"); result.addOperands(memref); result.addOperands(mapOperands); @@ -1754,18 +1864,18 @@ void AffineLoadOp::build(Builder *builder, OperationState &result, Value memref, result.types.push_back(memrefType.getElementType()); } -void AffineLoadOp::build(Builder *builder, OperationState &result, Value memref, - ValueRange indices) { +void AffineLoadOp::build(OpBuilder &builder, OperationState &result, + Value memref, ValueRange indices) { auto memrefType = memref.getType().cast(); auto rank = memrefType.getRank(); // Create identity map for memrefs with at least one dimension or () -> () // for zero-dimensional memrefs. - auto map = rank ? builder->getMultiDimIdentityMap(rank) - : builder->getEmptyAffineMap(); + auto map = + rank ? builder.getMultiDimIdentityMap(rank) : builder.getEmptyAffineMap(); build(builder, result, memref, map, indices); } -ParseResult AffineLoadOp::parse(OpAsmParser &parser, OperationState &result) { +ParseResult parseAffineLoadOp(OpAsmParser &parser, OperationState &result) { auto &builder = parser.getBuilder(); auto indexTy = builder.getIndexType(); @@ -1775,7 +1885,8 @@ ParseResult AffineLoadOp::parse(OpAsmParser &parser, OperationState &result) { SmallVector mapOperands; return failure( parser.parseOperand(memrefInfo) || - parser.parseAffineMapOfSSAIds(mapOperands, mapAttr, getMapAttrName(), + parser.parseAffineMapOfSSAIds(mapOperands, mapAttr, + AffineLoadOp::getMapAttrName(), result.attributes) || parser.parseOptionalAttrDict(result.attributes) || parser.parseColonType(type) || @@ -1784,38 +1895,41 @@ ParseResult AffineLoadOp::parse(OpAsmParser &parser, OperationState &result) { parser.addTypeToList(type.getElementType(), result.types)); } -void AffineLoadOp::print(OpAsmPrinter &p) { - p << "affine.load " << getMemRef() << '['; - if (AffineMapAttr mapAttr = getAttrOfType(getMapAttrName())) - p.printAffineMapOfSSAIds(mapAttr, getMapOperands()); +void print(OpAsmPrinter &p, AffineLoadOp op) { + p << "affine.load " << op.getMemRef() << '['; + if (AffineMapAttr mapAttr = + op.getAttrOfType(op.getMapAttrName())) + p.printAffineMapOfSSAIds(mapAttr, op.getMapOperands()); p << ']'; - p.printOptionalAttrDict(getAttrs(), /*elidedAttrs=*/{getMapAttrName()}); - p << " : " << getMemRefType(); + p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{op.getMapAttrName()}); + p << " : " << op.getMemRefType(); } -LogicalResult AffineLoadOp::verify() { - if (getType() != getMemRefType().getElementType()) - return emitOpError("result type must match element type of memref"); +LogicalResult verify(AffineLoadOp op) { + if (op.getType() != op.getMemRefType().getElementType()) + return op.emitOpError("result type must match element type of memref"); - auto mapAttr = getAttrOfType(getMapAttrName()); + auto mapAttr = op.getAttrOfType(op.getMapAttrName()); if (mapAttr) { - AffineMap map = getAttrOfType(getMapAttrName()).getValue(); - if (map.getNumResults() != getMemRefType().getRank()) - return emitOpError("affine.load affine map num results must equal" - " memref rank"); - if (map.getNumInputs() != getNumOperands() - 1) - return emitOpError("expects as many subscripts as affine map inputs"); + AffineMap map = + op.getAttrOfType(op.getMapAttrName()).getValue(); + if (map.getNumResults() != op.getMemRefType().getRank()) + return op.emitOpError("affine.load affine map num results must equal" + " memref rank"); + if (map.getNumInputs() != op.getNumOperands() - 1) + return op.emitOpError("expects as many subscripts as affine map inputs"); } else { - if (getMemRefType().getRank() != getNumOperands() - 1) - return emitOpError( + if (op.getMemRefType().getRank() != op.getNumOperands() - 1) + return op.emitOpError( "expects the number of subscripts to be equal to memref rank"); } - for (auto idx : getMapOperands()) { + Region *scope = getAffineScope(op); + for (auto idx : op.getMapOperands()) { if (!idx.getType().isIndex()) - return emitOpError("index to load must have 'index' type"); - if (!isValidAffineIndexOperand(idx)) - return emitOpError("index must be a dimension or symbol identifier"); + return op.emitOpError("index to load must have 'index' type"); + if (!isValidAffineIndexOperand(idx, scope)) + return op.emitOpError("index must be a dimension or symbol identifier"); } return success(); } @@ -1836,7 +1950,7 @@ OpFoldResult AffineLoadOp::fold(ArrayRef cstOperands) { // AffineStoreOp //===----------------------------------------------------------------------===// -void AffineStoreOp::build(Builder *builder, OperationState &result, +void AffineStoreOp::build(OpBuilder &builder, OperationState &result, Value valueToStore, Value memref, AffineMap map, ValueRange mapOperands) { assert(map.getNumInputs() == mapOperands.size() && "inconsistent index info"); @@ -1847,19 +1961,19 @@ void AffineStoreOp::build(Builder *builder, OperationState &result, } // Use identity map. -void AffineStoreOp::build(Builder *builder, OperationState &result, +void AffineStoreOp::build(OpBuilder &builder, OperationState &result, Value valueToStore, Value memref, ValueRange indices) { auto memrefType = memref.getType().cast(); auto rank = memrefType.getRank(); // Create identity map for memrefs with at least one dimension or () -> () // for zero-dimensional memrefs. - auto map = rank ? builder->getMultiDimIdentityMap(rank) - : builder->getEmptyAffineMap(); + auto map = + rank ? builder.getMultiDimIdentityMap(rank) : builder.getEmptyAffineMap(); build(builder, result, valueToStore, memref, map, indices); } -ParseResult AffineStoreOp::parse(OpAsmParser &parser, OperationState &result) { +ParseResult parseAffineStoreOp(OpAsmParser &parser, OperationState &result) { auto indexTy = parser.getBuilder().getIndexType(); MemRefType type; @@ -1870,7 +1984,7 @@ ParseResult AffineStoreOp::parse(OpAsmParser &parser, OperationState &result) { return failure(parser.parseOperand(storeValueInfo) || parser.parseComma() || parser.parseOperand(memrefInfo) || parser.parseAffineMapOfSSAIds(mapOperands, mapAttr, - getMapAttrName(), + AffineStoreOp::getMapAttrName(), result.attributes) || parser.parseOptionalAttrDict(result.attributes) || parser.parseColonType(type) || @@ -1880,40 +1994,43 @@ ParseResult AffineStoreOp::parse(OpAsmParser &parser, OperationState &result) { parser.resolveOperands(mapOperands, indexTy, result.operands)); } -void AffineStoreOp::print(OpAsmPrinter &p) { - p << "affine.store " << getValueToStore(); - p << ", " << getMemRef() << '['; - if (AffineMapAttr mapAttr = getAttrOfType(getMapAttrName())) - p.printAffineMapOfSSAIds(mapAttr, getMapOperands()); +void print(OpAsmPrinter &p, AffineStoreOp op) { + p << "affine.store " << op.getValueToStore(); + p << ", " << op.getMemRef() << '['; + if (AffineMapAttr mapAttr = + op.getAttrOfType(op.getMapAttrName())) + p.printAffineMapOfSSAIds(mapAttr, op.getMapOperands()); p << ']'; - p.printOptionalAttrDict(getAttrs(), /*elidedAttrs=*/{getMapAttrName()}); - p << " : " << getMemRefType(); + p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{op.getMapAttrName()}); + p << " : " << op.getMemRefType(); } -LogicalResult AffineStoreOp::verify() { +LogicalResult verify(AffineStoreOp op) { // First operand must have same type as memref element type. - if (getValueToStore().getType() != getMemRefType().getElementType()) - return emitOpError("first operand must have same type memref element type"); + if (op.getValueToStore().getType() != op.getMemRefType().getElementType()) + return op.emitOpError( + "first operand must have same type memref element type"); - auto mapAttr = getAttrOfType(getMapAttrName()); + auto mapAttr = op.getAttrOfType(op.getMapAttrName()); if (mapAttr) { AffineMap map = mapAttr.getValue(); - if (map.getNumResults() != getMemRefType().getRank()) - return emitOpError("affine.store affine map num results must equal" - " memref rank"); - if (map.getNumInputs() != getNumOperands() - 2) - return emitOpError("expects as many subscripts as affine map inputs"); + if (map.getNumResults() != op.getMemRefType().getRank()) + return op.emitOpError("affine.store affine map num results must equal" + " memref rank"); + if (map.getNumInputs() != op.getNumOperands() - 2) + return op.emitOpError("expects as many subscripts as affine map inputs"); } else { - if (getMemRefType().getRank() != getNumOperands() - 2) - return emitOpError( + if (op.getMemRefType().getRank() != op.getNumOperands() - 2) + return op.emitOpError( "expects the number of subscripts to be equal to memref rank"); } - for (auto idx : getMapOperands()) { + Region *scope = getAffineScope(op); + for (auto idx : op.getMapOperands()) { if (!idx.getType().isIndex()) - return emitOpError("index to store must have 'index' type"); - if (!isValidAffineIndexOperand(idx)) - return emitOpError("index must be a dimension or symbol identifier"); + return op.emitOpError("index to store must have 'index' type"); + if (!isValidAffineIndexOperand(idx, scope)) + return op.emitOpError("index must be a dimension or symbol identifier"); } return success(); } @@ -2131,8 +2248,9 @@ static LogicalResult verify(AffinePrefetchOp op) { return op.emitOpError("too few operands"); } + Region *scope = getAffineScope(op); for (auto idx : op.getMapOperands()) { - if (!isValidAffineIndexOperand(idx)) + if (!isValidAffineIndexOperand(idx, scope)) return op.emitOpError("index must be a dimension or symbol identifier"); } return success(); @@ -2154,19 +2272,19 @@ LogicalResult AffinePrefetchOp::fold(ArrayRef cstOperands, // AffineParallelOp //===----------------------------------------------------------------------===// -void AffineParallelOp::build(Builder *builder, OperationState &result, +void AffineParallelOp::build(OpBuilder &builder, OperationState &result, ArrayRef ranges) { SmallVector lbExprs(ranges.size(), - builder->getAffineConstantExpr(0)); - auto lbMap = AffineMap::get(0, 0, lbExprs, builder->getContext()); + builder.getAffineConstantExpr(0)); + auto lbMap = AffineMap::get(0, 0, lbExprs, builder.getContext()); SmallVector ubExprs; for (int64_t range : ranges) - ubExprs.push_back(builder->getAffineConstantExpr(range)); - auto ubMap = AffineMap::get(0, 0, ubExprs, builder->getContext()); + ubExprs.push_back(builder.getAffineConstantExpr(range)); + auto ubMap = AffineMap::get(0, 0, ubExprs, builder.getContext()); build(builder, result, lbMap, {}, ubMap, {}); } -void AffineParallelOp::build(Builder *builder, OperationState &result, +void AffineParallelOp::build(OpBuilder &builder, OperationState &result, AffineMap lbMap, ValueRange lbArgs, AffineMap ubMap, ValueRange ubArgs) { auto numDims = lbMap.getNumResults(); @@ -2178,7 +2296,7 @@ void AffineParallelOp::build(Builder *builder, OperationState &result, build(builder, result, lbMap, lbArgs, ubMap, ubArgs, steps); } -void AffineParallelOp::build(Builder *builder, OperationState &result, +void AffineParallelOp::build(OpBuilder &builder, OperationState &result, AffineMap lbMap, ValueRange lbArgs, AffineMap ubMap, ValueRange ubArgs, ArrayRef steps) { @@ -2189,7 +2307,7 @@ void AffineParallelOp::build(Builder *builder, OperationState &result, assert(numDims == steps.size() && "num dims and num steps mismatch"); result.addAttribute(getLowerBoundsMapAttrName(), AffineMapAttr::get(lbMap)); result.addAttribute(getUpperBoundsMapAttrName(), AffineMapAttr::get(ubMap)); - result.addAttribute(getStepsAttrName(), builder->getI64ArrayAttr(steps)); + result.addAttribute(getStepsAttrName(), builder.getI64ArrayAttr(steps)); result.addOperands(lbArgs); result.addOperands(ubArgs); // Create a region and a block for the body. @@ -2197,9 +2315,9 @@ void AffineParallelOp::build(Builder *builder, OperationState &result, auto body = new Block(); // Add all the block arguments. for (unsigned i = 0; i < numDims; ++i) - body->addArgument(IndexType::get(builder->getContext())); + body->addArgument(IndexType::get(builder.getContext())); bodyRegion->push_back(body); - ensureTerminator(*bodyRegion, *builder, result.location); + ensureTerminator(*bodyRegion, builder, result.location); } unsigned AffineParallelOp::getNumDims() { return steps().size(); } diff --git a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt index 8d186ae03b544..d34065e3b9526 100644 --- a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt @@ -7,9 +7,8 @@ add_mlir_dialect_library(MLIRAffineOps DEPENDS MLIRAffineOpsIncGen - ) -target_link_libraries(MLIRAffineOps - PUBLIC + + LINK_LIBS PUBLIC MLIREDSC MLIRIR MLIRLoopLikeInterface diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt index bcad44d724975..89c72ae1ddab8 100644 --- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt @@ -14,9 +14,8 @@ add_mlir_dialect_library(MLIRAffineTransforms MLIRAffineOpsIncGen MLIRAffinePassIncGen MLIRLoopLikeInterfaceIncGen - ) -target_link_libraries(MLIRAffineTransforms - PUBLIC + + LINK_LIBS PUBLIC MLIRAffineOps MLIREDSC MLIRIR diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt index ed3b5b8b17235..59ae13dcabcfd 100644 --- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt @@ -4,9 +4,7 @@ add_mlir_dialect_library(MLIRAffineUtils ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Affine - ) -target_link_libraries(MLIRAffineUtils - PUBLIC + LINK_LIBS PUBLIC MLIRAffineOps MLIRTransformUtils ) diff --git a/mlir/lib/Dialect/CMakeLists.txt b/mlir/lib/Dialect/CMakeLists.txt index b18a2a671b149..e9196f224ae0a 100644 --- a/mlir/lib/Dialect/CMakeLists.txt +++ b/mlir/lib/Dialect/CMakeLists.txt @@ -21,8 +21,7 @@ add_mlir_library(MLIRDialect ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect - ) -target_link_libraries(MLIRDialect - PUBLIC + + LINK_LIBS PUBLIC MLIRIR ) diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index ad63b3669409d..01863d4a3d565 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -13,9 +13,8 @@ add_mlir_dialect_library(MLIRGPU MLIRGPUPassIncGen MLIRParallelLoopMapperAttrGen MLIRParallelLoopMapperEnumsGen - ) -target_link_libraries(MLIRGPU - PUBLIC + + LINK_LIBS PUBLIC MLIREDSC MLIRIR MLIRLLVMIR @@ -25,5 +24,4 @@ target_link_libraries(MLIRGPU MLIRStandardOps MLIRSupport MLIRTransformUtils - LLVMSupport ) diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index e751107820bc7..92ececb142d96 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -200,9 +200,9 @@ static ParseResult parseShuffleOp(OpAsmParser &parser, OperationState &state) { // LaunchOp //===----------------------------------------------------------------------===// -void LaunchOp::build(Builder *builder, OperationState &result, Value gridSizeX, - Value gridSizeY, Value gridSizeZ, Value blockSizeX, - Value blockSizeY, Value blockSizeZ) { +void LaunchOp::build(OpBuilder &builder, OperationState &result, + Value gridSizeX, Value gridSizeY, Value gridSizeZ, + Value blockSizeX, Value blockSizeY, Value blockSizeZ) { // Add grid and block sizes as op operands, followed by the data operands. result.addOperands( {gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ}); @@ -213,7 +213,7 @@ void LaunchOp::build(Builder *builder, OperationState &result, Value gridSizeX, Region *kernelRegion = result.addRegion(); Block *body = new Block(); body->addArguments( - std::vector(kNumConfigRegionAttributes, builder->getIndexType())); + std::vector(kNumConfigRegionAttributes, builder.getIndexType())); kernelRegion->push_back(body); } @@ -388,7 +388,7 @@ static ParseResult parseLaunchOp(OpAsmParser &parser, OperationState &result) { // LaunchFuncOp //===----------------------------------------------------------------------===// -void LaunchFuncOp::build(Builder *builder, OperationState &result, +void LaunchFuncOp::build(OpBuilder &builder, OperationState &result, GPUFuncOp kernelFunc, Value gridSizeX, Value gridSizeY, Value gridSizeZ, Value blockSizeX, Value blockSizeY, Value blockSizeZ, ValueRange kernelOperands) { @@ -397,13 +397,12 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result, {gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ}); result.addOperands(kernelOperands); auto kernelModule = kernelFunc.getParentOfType(); - auto kernelSymbol = builder->getSymbolRefAttr( - kernelModule.getName(), - {builder->getSymbolRefAttr(kernelFunc.getName())}); + auto kernelSymbol = builder.getSymbolRefAttr( + kernelModule.getName(), {builder.getSymbolRefAttr(kernelFunc.getName())}); result.addAttribute(getKernelAttrName(), kernelSymbol); } -void LaunchFuncOp::build(Builder *builder, OperationState &result, +void LaunchFuncOp::build(OpBuilder &builder, OperationState &result, GPUFuncOp kernelFunc, KernelDim3 gridSize, KernelDim3 blockSize, ValueRange kernelOperands) { build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z, @@ -476,15 +475,16 @@ Value GPUFuncOp::addWorkgroupAttribution(ArrayRef shape, return attribution; } -void GPUFuncOp::build(Builder *builder, OperationState &result, StringRef name, - FunctionType type, ArrayRef workgroupAttributions, +void GPUFuncOp::build(OpBuilder &builder, OperationState &result, + StringRef name, FunctionType type, + ArrayRef workgroupAttributions, ArrayRef privateAttributions, ArrayRef attrs) { result.addAttribute(SymbolTable::getSymbolAttrName(), - builder->getStringAttr(name)); + builder.getStringAttr(name)); result.addAttribute(getTypeAttrName(), TypeAttr::get(type)); result.addAttribute(getNumWorkgroupAttributionsAttrName(), - builder->getI64IntegerAttr(workgroupAttributions.size())); + builder.getI64IntegerAttr(workgroupAttributions.size())); result.addAttributes(attrs); Region *body = result.addRegion(); Block *entryBlock = new Block; @@ -742,11 +742,11 @@ static LogicalResult verify(gpu::ReturnOp returnOp) { // GPUModuleOp //===----------------------------------------------------------------------===// -void GPUModuleOp::build(Builder *builder, OperationState &result, +void GPUModuleOp::build(OpBuilder &builder, OperationState &result, StringRef name) { - ensureTerminator(*result.addRegion(), *builder, result.location); - result.attributes.push_back(builder->getNamedAttr( - ::mlir::SymbolTable::getSymbolAttrName(), builder->getStringAttr(name))); + ensureTerminator(*result.addRegion(), builder, result.location); + result.attributes.push_back(builder.getNamedAttr( + ::mlir::SymbolTable::getSymbolAttrName(), builder.getStringAttr(name))); } static ParseResult parseGPUModuleOp(OpAsmParser &parser, diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index 0f4c4103b2a7e..1d72f9ec2690e 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -265,10 +265,10 @@ class GpuKernelOutliningPass // prevent manual building of Ops with symbols in code using SymbolTables // and then this needs to use the OpBuilder. auto context = getOperation().getContext(); - Builder builder(context); + OpBuilder builder(context); OperationState state(kernelFunc.getLoc(), gpu::GPUModuleOp::getOperationName()); - gpu::GPUModuleOp::build(&builder, state, kernelFunc.getName()); + gpu::GPUModuleOp::build(builder, state, kernelFunc.getName()); auto kernelModule = cast(Operation::create(state)); SymbolTable symbolTable(kernelModule); symbolTable.insert(kernelFunc); diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt index 833438a70cb97..7ff3584508c33 100644 --- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt @@ -9,15 +9,16 @@ add_mlir_dialect_library(MLIRLLVMIR DEPENDS MLIRLLVMOpsIncGen MLIRLLVMConversionsIncGen - ) -target_link_libraries(MLIRLLVMIR - PUBLIC - LLVMAsmParser - LLVMBitReader - LLVMBitWriter - LLVMCore - LLVMSupport - LLVMFrontendOpenMP + intrinsics_gen + + LINK_COMPONENTS + AsmParser + BitReader + BitWriter + Core + FrontendOpenMP + + LINK_LIBS PUBLIC MLIRCallInterfaces MLIRControlFlowInterfaces MLIROpenMP @@ -35,15 +36,16 @@ add_mlir_dialect_library(MLIRLLVMAVX512 DEPENDS MLIRLLVMAVX512IncGen MLIRLLVMAVX512ConversionsIncGen - ) -target_link_libraries(MLIRLLVMAVX512 - PUBLIC - LLVMAsmParser + intrinsics_gen + + LINK_COMPONENTS + AsmParser + Core + + LINK_LIBS PUBLIC MLIRIR MLIRLLVMIR MLIRSideEffects - LLVMSupport - LLVMCore ) add_mlir_dialect_library(MLIRNVVMIR @@ -55,15 +57,16 @@ add_mlir_dialect_library(MLIRNVVMIR DEPENDS MLIRNVVMOpsIncGen MLIRNVVMConversionsIncGen - ) -target_link_libraries(MLIRNVVMIR - PUBLIC - LLVMAsmParser + intrinsics_gen + + LINK_COMPONENTS + AsmParser + Core + + LINK_LIBS PUBLIC MLIRIR MLIRLLVMIR MLIRSideEffects - LLVMSupport - LLVMCore ) add_mlir_dialect_library(MLIRROCDLIR @@ -75,12 +78,14 @@ add_mlir_dialect_library(MLIRROCDLIR DEPENDS MLIRROCDLOpsIncGen MLIRROCDLConversionsIncGen - ) -target_link_libraries(MLIRROCDLIR - PUBLIC - LLVMAsmParser - LLVMCore - LLVMSupport + intrinsics_gen + + LINK_COMPONENTS + AsmParser + Core + + LINK_LIBS PUBLIC MLIRIR MLIRSideEffects + MLIRVectorToLLVM ) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 7ce591de3802f..5c112710ec55f 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -160,24 +160,22 @@ static ParseResult parseAllocaOp(OpAsmParser &parser, OperationState &result) { // LLVM::BrOp //===----------------------------------------------------------------------===// -Optional BrOp::getSuccessorOperands(unsigned index) { +Optional +BrOp::getMutableSuccessorOperands(unsigned index) { assert(index == 0 && "invalid successor index"); - return getOperands(); + return destOperandsMutable(); } -bool BrOp::canEraseSuccessorOperand() { return true; } - //===----------------------------------------------------------------------===// // LLVM::CondBrOp //===----------------------------------------------------------------------===// -Optional CondBrOp::getSuccessorOperands(unsigned index) { +Optional +CondBrOp::getMutableSuccessorOperands(unsigned index) { assert(index < getNumSuccessors() && "invalid successor index"); - return index == 0 ? trueDestOperands() : falseDestOperands(); + return index == 0 ? trueDestOperandsMutable() : falseDestOperandsMutable(); } -bool CondBrOp::canEraseSuccessorOperand() { return true; } - //===----------------------------------------------------------------------===// // Printing/parsing for LLVM::LoadOp. //===----------------------------------------------------------------------===// @@ -257,13 +255,12 @@ static ParseResult parseStoreOp(OpAsmParser &parser, OperationState &result) { /// LLVM::InvokeOp ///===---------------------------------------------------------------------===// -Optional InvokeOp::getSuccessorOperands(unsigned index) { +Optional +InvokeOp::getMutableSuccessorOperands(unsigned index) { assert(index < getNumSuccessors() && "invalid successor index"); - return index == 0 ? normalDestOperands() : unwindDestOperands(); + return index == 0 ? normalDestOperandsMutable() : unwindDestOperandsMutable(); } -bool InvokeOp::canEraseSuccessorOperand() { return true; } - static LogicalResult verify(InvokeOp op) { if (op.getNumResults() > 1) return op.emitOpError("must have 0 or 1 result"); @@ -612,7 +609,7 @@ static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) { //===----------------------------------------------------------------------===// // Expects vector to be of wrapped LLVM vector type and position to be of // wrapped LLVM i32 type. -void LLVM::ExtractElementOp::build(Builder *b, OperationState &result, +void LLVM::ExtractElementOp::build(OpBuilder &b, OperationState &result, Value vector, Value position, ArrayRef attrs) { auto wrappedVectorType = vector.getType().cast(); @@ -892,21 +889,21 @@ static LogicalResult verify(AddressOfOp op) { /// the name of the attribute in ODS. static StringRef getLinkageAttrName() { return "linkage"; } -void GlobalOp::build(Builder *builder, OperationState &result, LLVMType type, +void GlobalOp::build(OpBuilder &builder, OperationState &result, LLVMType type, bool isConstant, Linkage linkage, StringRef name, Attribute value, unsigned addrSpace, ArrayRef attrs) { result.addAttribute(SymbolTable::getSymbolAttrName(), - builder->getStringAttr(name)); + builder.getStringAttr(name)); result.addAttribute("type", TypeAttr::get(type)); if (isConstant) - result.addAttribute("constant", builder->getUnitAttr()); + result.addAttribute("constant", builder.getUnitAttr()); if (value) result.addAttribute("value", value); - result.addAttribute(getLinkageAttrName(), builder->getI64IntegerAttr( - static_cast(linkage))); + result.addAttribute(getLinkageAttrName(), + builder.getI64IntegerAttr(static_cast(linkage))); if (addrSpace != 0) - result.addAttribute("addr_space", builder->getI32IntegerAttr(addrSpace)); + result.addAttribute("addr_space", builder.getI32IntegerAttr(addrSpace)); result.attributes.append(attrs.begin(), attrs.end()); result.addRegion(); } @@ -1106,8 +1103,8 @@ static LogicalResult verify(GlobalOp op) { //===----------------------------------------------------------------------===// // Expects vector to be of wrapped LLVM vector type and position to be of // wrapped LLVM i32 type. -void LLVM::ShuffleVectorOp::build(Builder *b, OperationState &result, Value v1, - Value v2, ArrayAttr mask, +void LLVM::ShuffleVectorOp::build(OpBuilder &b, OperationState &result, + Value v1, Value v2, ArrayAttr mask, ArrayRef attrs) { auto wrappedContainerType1 = v1.getType().cast(); auto vType = LLVMType::getVectorTy( @@ -1170,16 +1167,16 @@ Block *LLVMFuncOp::addEntryBlock() { return entry; } -void LLVMFuncOp::build(Builder *builder, OperationState &result, StringRef name, - LLVMType type, LLVM::Linkage linkage, +void LLVMFuncOp::build(OpBuilder &builder, OperationState &result, + StringRef name, LLVMType type, LLVM::Linkage linkage, ArrayRef attrs, - ArrayRef argAttrs) { + ArrayRef argAttrs) { result.addRegion(); result.addAttribute(SymbolTable::getSymbolAttrName(), - builder->getStringAttr(name)); + builder.getStringAttr(name)); result.addAttribute("type", TypeAttr::get(type)); - result.addAttribute(getLinkageAttrName(), builder->getI64IntegerAttr( - static_cast(linkage))); + result.addAttribute(getLinkageAttrName(), + builder.getI64IntegerAttr(static_cast(linkage))); result.attributes.append(attrs.begin(), attrs.end()); if (argAttrs.empty()) return; diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt index 216586e6242d0..3e1342dcf2c9c 100644 --- a/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt @@ -3,10 +3,8 @@ add_mlir_dialect_library(MLIRLLVMIRTransforms DEPENDS MLIRLLVMPassIncGen - ) -target_link_libraries(MLIRLLVMIRTransforms - PUBLIC + LINK_LIBS PUBLIC MLIRIR MLIRLLVMIR MLIRPass diff --git a/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt index acb2ab6d91d71..5bb56236a04d0 100644 --- a/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt @@ -4,14 +4,8 @@ add_mlir_dialect_library(MLIRLinalgAnalysis ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Linalg - DEPENDS - intrinsics_gen - ) - -target_link_libraries(MLIRLinalgAnalysis - PUBLIC + LINK_LIBS PUBLIC MLIRIR MLIRLinalgOps MLIRStandardOps - LLVMSupport ) diff --git a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp index 342a909df88b7..4c218503ba173 100644 --- a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp +++ b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp @@ -37,12 +37,26 @@ Value Aliases::find(Value v) { while (true) { if (v.isa()) return v; + Operation *defOp = v.getDefiningOp(); - if (auto alloc = dyn_cast_or_null(defOp)) { - if (isStrided(alloc.getType())) - return alloc.getResult(); + if (!defOp) + return v; + + if (auto memEffect = dyn_cast(defOp)) { + // Collect all memory effects on `v`. + SmallVector effects; + memEffect.getEffectsOnValue(v, effects); + + // If we have the 'Allocate' memory effect on `v`, then `v` should be the + // original buffer. + if (llvm::any_of( + effects, [](const MemoryEffects::EffectInstance &instance) { + return isa(instance.getEffect()); + })) + return v; } - if (auto viewLikeOp = dyn_cast_or_null(defOp)) { + + if (auto viewLikeOp = dyn_cast(defOp)) { auto it = aliases.insert(std::make_pair(v, find(viewLikeOp.getViewSource()))); return it.first->second; diff --git a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp index ed0b2d66da618..dac139f2baa77 100644 --- a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp @@ -28,19 +28,18 @@ mlir::edsc::LoopRangeBuilder::LoopRangeBuilder(Value *iv, Value range) { auto lb = rangeOp.min(); auto ub = rangeOp.max(); auto step = rangeOp.step(); - auto forOp = OperationHandle::createOp(lb, ub, step); + ForOp forOp = OperationBuilder(lb, ub, step); *iv = forOp.getInductionVar(); auto *body = forOp.getBody(); - enter(body, /*prev=*/1); + enter(body); } mlir::edsc::LoopRangeBuilder::LoopRangeBuilder(Value *iv, SubViewOp::Range range) { - auto forOp = - OperationHandle::createOp(range.offset, range.size, range.stride); + ForOp forOp = OperationBuilder(range.offset, range.size, range.stride); *iv = forOp.getInductionVar(); auto *body = forOp.getBody(); - enter(body, /*prev=*/1); + enter(body); } Value mlir::edsc::LoopRangeBuilder::operator()(std::function fun) { @@ -53,18 +52,16 @@ Value mlir::edsc::LoopRangeBuilder::operator()(std::function fun) { mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder( MutableArrayRef ivs, ArrayRef ranges) { loops.reserve(ranges.size()); - for (unsigned i = 0, e = ranges.size(); i < e; ++i) { + for (unsigned i = 0, e = ranges.size(); i < e; ++i) loops.emplace_back(&ivs[i], ranges[i]); - } assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size"); } mlir::edsc::LoopNestRangeBuilder::LoopNestRangeBuilder( MutableArrayRef ivs, ArrayRef ranges) { loops.reserve(ranges.size()); - for (unsigned i = 0, e = ranges.size(); i < e; ++i) { + for (unsigned i = 0, e = ranges.size(); i < e; ++i) loops.emplace_back(&ivs[i], ranges[i]); - } assert(loops.size() == ivs.size() && "Mismatch loops vs ivs size"); } @@ -131,7 +128,7 @@ Operation *mlir::edsc::makeGenericLinalgOp( assert(!(outputs[i].getType().isa() && outputs[i + 1].getType().isa()) && "output tensors must be passed after output buffers"); - auto &builder = edsc::ScopedContext::getBuilder(); + auto &builder = edsc::ScopedContext::getBuilderRef(); auto *ctx = builder.getContext(); unsigned nInputs = inputs.size(); unsigned nOutputs = outputs.size(); @@ -160,7 +157,7 @@ Operation *mlir::edsc::makeGenericLinalgOp( llvm::to_vector<8>(llvm::map_range(iteratorTypes, toString)); // clang-format off auto *op = - edsc::ScopedContext::getBuilder() + edsc::ScopedContext::getBuilderRef() .create( edsc::ScopedContext::getLocation(), types, diff --git a/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt b/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt index 8ec3c6dd7d225..b3f3e5c0804f2 100644 --- a/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt @@ -4,12 +4,7 @@ add_mlir_dialect_library(MLIRLinalgEDSC ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Linalg - DEPENDS - intrinsics_gen - ) - -target_link_libraries(MLIRLinalgEDSC - PUBLIC + LINK_LIBS PUBLIC MLIREDSC MLIRIR MLIRAffineOps @@ -17,5 +12,4 @@ target_link_libraries(MLIRLinalgEDSC MLIRLinalgOps MLIRLoopOps MLIRStandardOps - LLVMSupport ) diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt index 5b4282a4977f0..64a51500d7921 100644 --- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt @@ -6,15 +6,11 @@ add_mlir_dialect_library(MLIRLinalgOps ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Linalg DEPENDS - intrinsics_gen - ) -add_dependencies(MLIRLinalgOps MLIRLinalgOpsIncGen MLIRLinalgStructuredOpsIncGen MLIRLinalgStructuredOpsInterfaceIncGen - ) -target_link_libraries(MLIRLinalgOps - PUBLIC + + LINK_LIBS PUBLIC MLIRIR MLIRSideEffects MLIRViewLikeInterface diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 9fef7a92d6b8b..82ae6de83c833 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -33,10 +33,9 @@ using namespace mlir::linalg; /// Forward declarations. template -static void buildNamedStructuredOpRegion(Builder &builder, - OperationState &result, - TypeRange operandTypes, - TypeRange tensorResultTypes); +static void buildNamedStructuredOpRegionAndAttributes( + Builder &builder, OperationState &result, TypeRange operandTypes, + TypeRange tensorResultTypes); template static void printNamedStructuredOp(OpAsmPrinter &p, NamedStructuredOpType op); template @@ -470,7 +469,7 @@ getSymbolLessAffineMaps(ArrayRef> reassociation) { } void mlir::linalg::ReshapeOp::build( - Builder *b, OperationState &result, Value src, + OpBuilder &b, OperationState &result, Value src, ArrayRef> reassociation, ArrayRef attrs) { auto maps = getSymbolLessAffineMaps(reassociation); @@ -478,17 +477,17 @@ void mlir::linalg::ReshapeOp::build( auto resultType = computeReshapeCollapsedType(memRefType, maps); build(b, result, resultType, src, attrs); result.addAttribute(ReshapeOp::getReassociationAttrName(), - b->getAffineMapArrayAttr(maps)); + b.getAffineMapArrayAttr(maps)); } void mlir::linalg::ReshapeOp::build( - Builder *b, OperationState &result, Type resultType, Value src, + OpBuilder &b, OperationState &result, Type resultType, Value src, ArrayRef> reassociation, ArrayRef attrs) { auto maps = getSymbolLessAffineMaps(reassociation); build(b, result, resultType, src, attrs); result.addAttribute(ReshapeOp::getReassociationAttrName(), - b->getAffineMapArrayAttr(maps)); + b.getAffineMapArrayAttr(maps)); } // Common verifier for reshape-like types. Fills `expandedType` and @@ -572,7 +571,7 @@ computeTensorReshapeCollapsedType(RankedTensorType type, } void mlir::linalg::TensorReshapeOp::build( - Builder *b, OperationState &result, Value src, + OpBuilder &b, OperationState &result, Value src, ArrayRef> reassociation, ArrayRef attrs) { auto maps = getSymbolLessAffineMaps(reassociation); @@ -580,17 +579,17 @@ void mlir::linalg::TensorReshapeOp::build( src.getType().cast(), maps); build(b, result, resultType, src, attrs); result.addAttribute(TensorReshapeOp::getReassociationAttrName(), - b->getAffineMapArrayAttr(maps)); + b.getAffineMapArrayAttr(maps)); } void mlir::linalg::TensorReshapeOp::build( - Builder *b, OperationState &result, Type resultType, Value src, + OpBuilder &b, OperationState &result, Type resultType, Value src, ArrayRef> reassociation, ArrayRef attrs) { auto maps = getSymbolLessAffineMaps(reassociation); build(b, result, resultType, src, attrs); result.addAttribute(TensorReshapeOp::getReassociationAttrName(), - b->getAffineMapArrayAttr(maps)); + b.getAffineMapArrayAttr(maps)); } static LogicalResult verify(TensorReshapeOp op) { @@ -611,7 +610,7 @@ static LogicalResult verify(TensorReshapeOp op) { //===----------------------------------------------------------------------===// // SliceOp //===----------------------------------------------------------------------===// -void mlir::linalg::SliceOp::build(Builder *b, OperationState &result, +void mlir::linalg::SliceOp::build(OpBuilder &b, OperationState &result, Value base, ValueRange indexings) { result.addOperands(base); result.addOperands(indexings); @@ -629,7 +628,7 @@ void mlir::linalg::SliceOp::build(Builder *b, OperationState &result, result.addTypes({MemRefType::Builder(memRefType) .setShape(sizes) .setAffineMaps(makeStridedLinearLayoutMap( - strides, offset, b->getContext()))}); + strides, offset, b.getContext()))}); } static void print(OpAsmPrinter &p, SliceOp op) { @@ -688,7 +687,7 @@ Value SliceOp::getViewSource() { return view(); } //===----------------------------------------------------------------------===// // TransposeOp //===----------------------------------------------------------------------===// -void mlir::linalg::TransposeOp::build(Builder *b, OperationState &result, +void mlir::linalg::TransposeOp::build(OpBuilder &b, OperationState &result, Value view, AffineMapAttr permutation, ArrayRef attrs) { auto permutationMap = permutation.getValue(); @@ -709,7 +708,7 @@ void mlir::linalg::TransposeOp::build(Builder *b, OperationState &result, auto res = getStridesAndOffset(memRefType, strides, offset); assert(succeeded(res) && strides.size() == static_cast(rank)); (void)res; - auto map = makeStridedLinearLayoutMap(strides, offset, b->getContext()); + auto map = makeStridedLinearLayoutMap(strides, offset, b.getContext()); map = permutationMap ? map.compose(permutationMap) : map; // Compute result type. MemRefType resultType = @@ -1085,9 +1084,10 @@ OpFoldResult TransposeOp::fold(ArrayRef) { //===----------------------------------------------------------------------===// template -void buildNamedStructuredOpRegion(Builder &builder, OperationState &result, - TypeRange operandTypes, - TypeRange tensorResultTypes) { +void buildNamedStructuredOpRegionAndAttributes(Builder &builder, + OperationState &result, + TypeRange operandTypes, + TypeRange tensorResultTypes) { Region ®ion = *result.addRegion(); Block *body = new Block(); // TODO: atm all operands go through getElementTypeOrSelf, @@ -1102,12 +1102,24 @@ void buildNamedStructuredOpRegion(Builder &builder, OperationState &result, opBuilder.setInsertionPointToStart(®ion.front()); mlir::edsc::ScopedContext scope(opBuilder, builder.getUnknownLoc()); NamedStructuredOpType::regionBuilder(*body); + + auto indexingMaps = builder.getAffineMapArrayAttr( + NamedStructuredOpType::referenceIndexingMaps(operandTypes, + tensorResultTypes)); + result.addAttribute(getIndexingMapsAttrName(), indexingMaps); + + auto iterators = + builder.getStrArrayAttr(NamedStructuredOpType::referenceIterators( + operandTypes, tensorResultTypes)); + result.addAttribute(getIteratorTypesAttrName(), iterators); } template static void printNamedStructuredOp(OpAsmPrinter &p, NamedStructuredOpType op) { + std::array silentAttrNames{getIndexingMapsAttrName(), + getIteratorTypesAttrName()}; p << op.getOperationName() << ' '; - p.printOptionalAttrDict(op.getAttrs()); + p.printOptionalAttrDict(op.getAttrs(), silentAttrNames); p << ' ' << op.getOperands(); p << ": (" << op.getOperandTypes() << ")"; auto outputTensorTypes = op.getResultTypes(); @@ -1139,7 +1151,7 @@ static ParseResult parseNamedStructuredOp(OpAsmParser &parser, if (!tensorResultTypes.empty()) result.addTypes(tensorResultTypes); - buildNamedStructuredOpRegion( + buildNamedStructuredOpRegionAndAttributes( parser.getBuilder(), result, operandTypes, tensorResultTypes); return parser.resolveOperands(operandsInfo, operandTypes, diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index c8e74ea30e8d9..dcd28c0152713 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -1,20 +1,20 @@ add_mlir_dialect_library(MLIRLinalgTransforms Fusion.cpp - LinalgTransforms.cpp - LinalgToLoops.cpp + Interchange.cpp + Loops.cpp Promotion.cpp + TensorsToBuffers.cpp Tiling.cpp + Transforms.cpp + Vectorization.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Linalg DEPENDS - intrinsics_gen MLIRLinalgPassIncGen - MLIRLinalgTransformPatternsIncGen - ) -target_link_libraries(MLIRLinalgTransforms - PUBLIC + + LINK_LIBS PUBLIC MLIRAffineOps MLIRAnalysis MLIREDSC diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index cd6301ae249c6..b85c586633cbf 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "PassDetail.h" -#include "mlir/Analysis/Dominance.h" #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" #include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" @@ -21,6 +20,7 @@ #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" +#include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/FoldUtils.h" @@ -161,17 +161,6 @@ static LinalgOp fuse(Value producedView, LinalgOp producer, LinalgOp consumer, assert(consumer.hasBufferSemantics() && "expected linalg op with buffer semantics"); - if (auto convOp = dyn_cast(producer.getOperation())) { - // TODO(ntv): add a level of indirection to linalg.generic. - if (convOp.padding()) - llvm_unreachable("Unexpected conv with padding"); - } - if (auto convOp = dyn_cast(consumer.getOperation())) { - // TODO(ntv): add a level of indirection to linalg.generic. - if (convOp.padding()) - llvm_unreachable("Unexpected conv with padding"); - } - auto subView = dyn_cast_or_null( consumer.getBuffer(consumerIdx).getDefiningOp()); auto slice = dyn_cast_or_null( @@ -287,6 +276,16 @@ bool mlir::linalg::isFusableInto(const LinalgDependenceGraph &graph, << *producer.getOperation()); return false; } + if (auto convOp = dyn_cast(producer.getOperation())) { + // TODO(ntv): add a level of indirection to linalg.generic. + if (convOp.padding()) + return false; + } + if (auto convOp = dyn_cast(consumer.getOperation())) { + // TODO(ntv): add a level of indirection to linalg.generic. + if (convOp.padding()) + return false; + } return true; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp b/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp new file mode 100644 index 0000000000000..71e4969d46570 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp @@ -0,0 +1,85 @@ +//===- Interchange.cpp - Linalg interchange transformation ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the linalg interchange transformation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/EDSC/Intrinsics.h" +#include "mlir/Dialect/Vector/VectorOps.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +#define DEBUG_TYPE "linalg-interchange" + +using namespace mlir; +using namespace mlir::linalg; + +LogicalResult mlir::linalg::interchangeGenericLinalgOpPrecondition( + Operation *op, ArrayRef interchangeVector) { + if (interchangeVector.empty()) + return failure(); + // Transformation applies to generic ops only. + if (!isa(op) && !isa(op)) + return failure(); + LinalgOp linOp = cast(op); + // Transformation applies to buffers only. + if (!linOp.hasBufferSemantics()) + return failure(); + // Permutation must be applicable. + if (linOp.getIndexingMap(0).getNumInputs() != interchangeVector.size()) + return failure(); + // Permutation map must be invertible. + if (!inversePermutation( + AffineMap::getPermutationMap(interchangeVector, op->getContext()))) + return failure(); + return success(); +} + +LinalgOp mlir::linalg::interchange(LinalgOp op, + ArrayRef interchangeVector) { + if (interchangeVector.empty()) + return op; + + MLIRContext *context = op.getContext(); + auto permutationMap = inversePermutation( + AffineMap::getPermutationMap(interchangeVector, context)); + assert(permutationMap && "expected permutation to be invertible"); + SmallVector newIndexingMaps; + auto indexingMaps = op.indexing_maps().getValue(); + for (unsigned i = 0, e = op.getNumInputsAndOutputs(); i != e; ++i) { + AffineMap m = indexingMaps[i].cast().getValue(); + if (!permutationMap.isEmpty()) + m = m.compose(permutationMap); + newIndexingMaps.push_back(AffineMapAttr::get(m)); + } + auto itTypes = op.iterator_types().getValue(); + SmallVector itTypesVector; + for (unsigned i = 0, e = itTypes.size(); i != e; ++i) + itTypesVector.push_back(itTypes[i]); + applyPermutationToVector(itTypesVector, interchangeVector); + + op.setAttr(getIndexingMapsAttrName(), + ArrayAttr::get(newIndexingMaps, context)); + op.setAttr(getIteratorTypesAttrName(), + ArrayAttr::get(itTypesVector, context)); + + return op; +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgTransforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgTransforms.cpp deleted file mode 100644 index 423c1c10596aa..0000000000000 --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgTransforms.cpp +++ /dev/null @@ -1,381 +0,0 @@ -//===- LinalgTransforms.cpp - Linalg transformations as patterns ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements logic for transforming Linalg operations. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Linalg/Transforms/LinalgTransforms.h" -#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" -#include "mlir/Dialect/Linalg/IR/LinalgOps.h" -#include "mlir/Dialect/Linalg/Utils/Utils.h" -#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" -#include "mlir/Dialect/Utils/StructuredOpsUtils.h" -#include "mlir/Dialect/Vector/EDSC/Intrinsics.h" -#include "mlir/Dialect/Vector/VectorOps.h" -#include "mlir/IR/AffineExpr.h" -#include "mlir/IR/Matchers.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Support/LLVM.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include - -#define DEBUG_TYPE "linalg-transforms" - -using namespace mlir; -using namespace mlir::edsc; -using namespace mlir::edsc::intrinsics; -using namespace mlir::linalg; - -using llvm::dbgs; -using llvm::SetVector; - -// Marker used as attribute name in generated Linalg rewriting transformations. -const StringLiteral mlir::linalg::LinalgTransforms::kLinalgTransformMarker = - "__internal_linalg_transform__"; - -using TileFn = Optional(OpBuilder &, LinalgOp, ArrayRef, - ArrayRef, OperationFolder *); - -static LogicalResult -tileLinalgOpAndSetMarkerImpl(TileFn tileFn, PatternRewriter &rewriter, - Operation *op, ArrayRef sizes, - StringRef linalgMarker, - ArrayRef permutation) { - assert(permutation.empty() || permutation.size() == sizes.size()); - auto tileRes = tileFn(rewriter, op, sizes, permutation, /*folder=*/nullptr); - if (!tileRes) - return failure(); - tileRes->op.setAttr(LinalgTransforms::kLinalgTransformMarker, - rewriter.getStringAttr(linalgMarker)); - return success(); -} - -LogicalResult mlir::linalg::tileLinalgOpAndSetMarker( - PatternRewriter &rewriter, Operation *op, ArrayRef sizes, - StringRef linalgMarker, ArrayRef permutation) { - return tileLinalgOpAndSetMarkerImpl(tileLinalgOp, rewriter, op, sizes, - linalgMarker, permutation); -} -LogicalResult mlir::linalg::tileLinalgOpToParallelLoopsAndSetMarker( - PatternRewriter &rewriter, Operation *op, ArrayRef sizes, - StringRef linalgMarker, ArrayRef permutation) { - return tileLinalgOpAndSetMarkerImpl(tileLinalgOpToParallelLoops, rewriter, op, - sizes, linalgMarker, permutation); -} - -static LogicalResult -tileAndFuseLinalgOpAndSetMarkerImpl(TileFn tileFn, PatternRewriter &rewriter, - Operation *op, ArrayRef sizes, - ArrayRef operandIndicesToFuse, - StringRef linalgMarker) { - auto tileRes = - tileFn(rewriter, op, sizes, /*permutation=*/{}, /*folder=*/nullptr); - if (!tileRes) - return failure(); - tileRes->op.setAttr(LinalgTransforms::kLinalgTransformMarker, - rewriter.getStringAttr(linalgMarker)); - Aliases aliases; - auto G = LinalgDependenceGraph::buildDependenceGraph( - aliases, op->getParentOfType()); - SmallVector originalProducers; - for (auto operandIdx : operandIndicesToFuse) { - auto fusionRes = fuseProducerOf(rewriter, tileRes->op, operandIdx, G); - if (!fusionRes) { - // Linalg fusion requires tiled loops to even determine whether it is - // possible to fuse. As a consequence, the pattern may fail even though a - // tiled version of op has already been introduced. - // So we need to remove the tiled version ourselves in case of failure. - // Another possibility is to ensure the constraints on the pattern - // guarantee that fusion will occur and just assert here. As we develop - // more complex patterns we can choose what is best. - rewriter.eraseOp(tileRes->loops[0]); - return failure(); - } - fusionRes->fusedProducer.setAttr(LinalgTransforms::kLinalgTransformMarker, - rewriter.getStringAttr(linalgMarker)); - originalProducers.push_back(fusionRes->originalProducer); - } - - // The originalProducers can now be safely erased. This is similar to - // SSA-value use-def but in the world of buffer + structured ops. - for (auto *originalProducer : originalProducers) - rewriter.eraseOp(originalProducer); - return success(); -} - -LogicalResult mlir::linalg::tileAndFuseLinalgOpAndSetMarker( - PatternRewriter &rewriter, Operation *op, ArrayRef sizes, - ArrayRef operandIndicesToFuse, StringRef linalgMarker) { - return tileAndFuseLinalgOpAndSetMarkerImpl( - tileLinalgOp, rewriter, op, sizes, operandIndicesToFuse, linalgMarker); -} -LogicalResult mlir::linalg::tileAndFuseLinalgOpToParallelLoopsAndSetMarker( - PatternRewriter &rewriter, Operation *op, ArrayRef sizes, - ArrayRef operandIndicesToFuse, StringRef linalgMarker) { - return tileAndFuseLinalgOpAndSetMarkerImpl( - tileLinalgOpToParallelLoops, rewriter, op, sizes, operandIndicesToFuse, - linalgMarker); -} - -bool mlir::linalg::detail::isProducedByOpOfTypeImpl( - Operation *consumerOp, Value consumedView, - function_ref isaOpType) { - LinalgOp consumer = dyn_cast(consumerOp); - assert(consumer.hasBufferSemantics() && - "expected linalg op with buffer semantics"); - if (!consumer) - return false; - - auto maybeConsumerIndex = consumer.getIndexOfInput(consumedView); - if (!maybeConsumerIndex) - return false; - - Aliases aliases; - auto G = LinalgDependenceGraph::buildDependenceGraph( - aliases, consumer.getParentOfType()); - for (auto dependence : G.getDependencesInto( - consumer, LinalgDependenceGraph::DependenceType::RAW)) { - auto producer = cast(dependence.dependentOpView.op); - if (!isProducerLastWriteOfView(G, consumer, consumedView, producer)) - continue; - if (isaOpType(dependence.dependentOpView.op)) - return true; - } - return false; -} - -//============================================================================// -// Precondition and transformation for vectorization of Linalg generic ops. -//============================================================================// -static bool hasMultiplyAddBody(linalg::GenericOp op) { - auto &r = op.region(); - if (r.empty()) - return false; - if (r.getBlocks().size() != 1) - return false; - auto &ops = r.front().getOperations(); - if (ops.size() != 3) - return false; - - using mlir::matchers::m_Val; - auto a = m_Val(r.front().getArgument(0)); - auto b = m_Val(r.front().getArgument(1)); - auto c = m_Val(r.front().getArgument(2)); - // TODO(ntv) Update this detection once we have matcher support for - // specifying that any permutation of operands matches. - auto pattern1 = m_Op(m_Op(m_Op(a, b), c)); - auto pattern2 = m_Op(m_Op(c, m_Op(a, b))); - auto pattern3 = m_Op(m_Op(m_Op(b, a), c)); - auto pattern4 = m_Op(m_Op(c, m_Op(b, a))); - return pattern1.match(&ops.back()) || pattern2.match(&ops.back()) || - pattern3.match(&ops.back()) || pattern4.match(&ops.back()); -} - -// TODO(ntv) should be Tablegen'd from a single source that generates the op -// itself. -static bool isRowMajorMatmul(linalg::GenericOp genericOp) { - return genericOp.getNumInputs() == 2 && genericOp.getNumOutputs() == 1 && - isRowMajorMatmul(genericOp.indexing_maps()) && - hasMultiplyAddBody(genericOp); -} - -// TODO(ntv, ataei): This is in fact much more general than just vectorization -// for matmul and fill ops. -LogicalResult mlir::linalg::vectorizeLinalgOpPrecondition(Operation *op) { - auto linalgOp = cast(op); - // All types must be static shape to go to vector. - for (Value operand : linalgOp.getInputsAndOutputBuffers()) - if (!operand.getType().cast().hasStaticShape()) - return failure(); - for (Type outputTensorType : linalgOp.getOutputTensorTypes()) - if (!outputTensorType.cast().hasStaticShape()) - return failure(); - if (isa(op) || isa(op)) - return success(); - - auto genericOp = dyn_cast(op); - if (!genericOp || !::isRowMajorMatmul(genericOp)) - return failure(); - - // TODO(ntv): non-identity layout. - auto isStaticMemRefWithIdentityLayout = [](Value v) { - auto m = v.getType().dyn_cast(); - if (!m || !m.hasStaticShape() || !m.getAffineMaps().empty()) - return false; - return true; - }; - if (!llvm::all_of(genericOp.getInputsAndOutputBuffers(), - isStaticMemRefWithIdentityLayout)) - return failure(); - return success(); -} - -SmallVector mlir::linalg::vectorizeLinalgOp(PatternRewriter &rewriter, - Operation *op) { - assert(succeeded(vectorizeLinalgOpPrecondition(op)) && - "DRR failure case must be a precondition"); - auto linalgOp = cast(op); - assert(linalgOp.hasBufferSemantics() && - "expected linalg op with buffer semantics"); - if (auto convOp = dyn_cast(op)) { - // TODO(ntv): add a level of indirection to linalg.generic. - if (convOp.padding()) - llvm_unreachable("Unexpected conv with padding"); - } - - edsc::ScopedContext scope(rewriter, op->getLoc()); - - if (auto fillOp = dyn_cast(op)) { - // Vectorize fill as a vector.broadcast. - LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE - "]: Rewrite linalg.fill as vector.broadcast: " - << *op << ":\n"); - auto dstMemrefVec = vector_type_cast(fillOp.getOutputBuffer(0)); - Value dstVec = std_load(dstMemrefVec); - auto resVec = vector_broadcast(dstVec.getType(), fillOp.value()); - std_store(resVec, dstMemrefVec); - } else { - // Vectorize other ops as vector contraction (currently only matmul). - LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE - "]: Rewrite linalg op as vector.contract: " - << *op << ":\n"); - auto vA = std_load(vector_type_cast(linalgOp.getInput(0))); - auto vB = std_load(vector_type_cast(linalgOp.getInput(1))); - auto vectorMemRefC = vector_type_cast(linalgOp.getOutputBuffer(0)); - auto vC = std_load(vectorMemRefC); - auto vRes = vector_contract(vA, vB, vC, linalgOp.indexing_maps(), - linalgOp.iterator_types()); - std_store(vRes, vectorMemRefC); - } - return {}; -} - -//============================================================================// -// Precondition and transformation for permutation of Linalg generic ops. -//============================================================================// -LogicalResult mlir::linalg::permuteGenericLinalgOpPrecondition( - Operation *op, ArrayRef permutation) { - if (permutation.empty()) - return failure(); - // Transformation applies to generic ops only. - if (!isa(op) && !isa(op)) - return failure(); - LinalgOp linOp = cast(op); - // Transformation applies to buffers only. - if (!linOp.hasBufferSemantics()) - return failure(); - return success(); -} - -SmallVector -mlir::linalg::permuteGenericLinalgOp(PatternRewriter &rewriter, Operation *op, - ArrayRef permutation, - StringRef linalgMarker) { - LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: Permute dims for linalg op: " << *op - << ":\n"); - - assert(succeeded(permuteGenericLinalgOpPrecondition(op, permutation)) && - "DRR failure case must be a precondition"); - - auto linOp = cast(op); - auto permutationMap = inversePermutation( - AffineMap::getPermutationMap(permutation, rewriter.getContext())); - assert(permutationMap && "expected permutation to be invertible"); - SmallVector newIndexingMap; - auto indexingMaps = linOp.indexing_maps().getValue(); - for (unsigned i = 0, e = linOp.getNumInputsAndOutputs(); i != e; ++i) { - AffineMap m = indexingMaps[i].cast().getValue(); - if (!permutationMap.isEmpty()) - m = m.compose(permutationMap); - newIndexingMap.push_back(m); - } - auto itTypes = linOp.iterator_types().getValue(); - SmallVector itTypesVector; - for (unsigned i = 0, e = itTypes.size(); i != e; ++i) - itTypesVector.push_back(itTypes[i]); - applyPermutationToVector(itTypesVector, permutation); - op->setAttr(getIndexingMapsAttrName(), - rewriter.getAffineMapArrayAttr(newIndexingMap)); - op->setAttr(getIteratorTypesAttrName(), rewriter.getArrayAttr(itTypesVector)); - op->setAttr(LinalgTransforms::kLinalgTransformMarker, - rewriter.getStringAttr(linalgMarker)); - linOp.clone(rewriter, linOp.getLoc(), op->getOperands()); - return {}; -} - -//============================================================================// -// Precondition and transformation for Linalg subview promotion. -//============================================================================// -LogicalResult mlir::linalg::promoteSubviewsLinalgOpPrecondition(Operation *op) { - LinalgOp linOp = dyn_cast(op); - // Transformation applies to buffers only. - if (!linOp || !linOp.hasBufferSemantics()) - return failure(); - if (llvm::none_of(linOp.getInputsAndOutputBuffers(), [](Value v) { - return isa_and_nonnull(v.getDefiningOp()); - })) - return failure(); - return success(); -} - -SmallVector -mlir::linalg::promoteSubviewsLinalgOp(PatternRewriter &rewriter, - Operation *op) { - LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: Promote subviews for linalg op: " - << *op << ":\n"); - - assert(succeeded(promoteSubviewsLinalgOpPrecondition(op)) && - "DRR failure case must be a precondition"); - - LinalgOp linOp = cast(op); - SmallVector toPromote; - int64_t nBuffers = linOp.getNumInputsAndOutputBuffers(); - toPromote.reserve(nBuffers); - for (int64_t i = 0; i < nBuffers; ++i) - toPromote.push_back(i); - return promoteSelectedSubviewsLinalgOpAndSetMarker(rewriter, op, toPromote); -} - -SmallVector mlir::linalg::promoteSelectedSubviewsLinalgOpAndSetMarker( - PatternRewriter &rewriter, Operation *op, - ArrayRef operandIndicesToPromote, StringRef linalgMarker, - int64_t alignment) { - LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: Promote subviews for linalg op: " - << *op << ":\n"); - - assert(succeeded(promoteSubviewsLinalgOpPrecondition(op)) && - "DRR failure case must be a precondition"); - - if (auto convOp = dyn_cast(op)) { - // TODO(ntv): add a level of indirection to linalg.generic. - if (convOp.padding()) - llvm_unreachable("Unexpected conv with padding"); - } - - LinalgOp linOp = cast(op); - assert(linOp.hasBufferSemantics() && - "expected linalg op with buffer semantics"); - SetVector subViews; - for (int64_t index : operandIndicesToPromote) - if (auto sv = - dyn_cast_or_null(linOp.getBuffer(index).getDefiningOp())) - subViews.insert(sv); - - if (!subViews.empty()) { - auto newOp = - promoteSubViewOperands(rewriter, linOp, subViews, false, alignment); - if (!linalgMarker.empty()) - newOp.setAttr(LinalgTransforms::kLinalgTransformMarker, - rewriter.getStringAttr(linalgMarker)); - return {}; - } - llvm_unreachable("DRR failure case must be a precondition"); -} diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp similarity index 73% rename from mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp rename to mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index 88678e5d085c7..c5e7958b84a1c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -1,4 +1,4 @@ -//===- LinalgToLoops.cpp - conversion from Linalg library ops to loops-----===// +//===- Loops.cpp - conversion from Linalg named and generic ops to loops --===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -12,7 +12,7 @@ #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" #include "mlir/Dialect/Linalg/Passes.h" -#include "mlir/Dialect/Linalg/Transforms/LinalgTransforms.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/LoopOps/EDSC/Builders.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" @@ -52,7 +52,7 @@ static SmallVector makeCanonicalAffineApplies(OpBuilder &b, static SmallVector permuteIvs(ArrayRef ivs, Optional permutation) { - return permutation ? applyMapToValues(ScopedContext::getBuilder(), + return permutation ? applyMapToValues(ScopedContext::getBuilderRef(), ScopedContext::getLocation(), permutation.getValue(), ivs) : SmallVector(ivs.begin(), ivs.end()); @@ -78,12 +78,11 @@ SmallVector emitLoopRanges(OpBuilder &b, Location loc, AffineMap map, return res; } -template -static void -inlineRegionAndEmitStdStore(OpType op, ArrayRef indexedValues, - ArrayRef> indexing, - ArrayRef outputBuffers) { - auto &b = ScopedContext::getBuilder(); +template +static void inlineRegionAndEmitStore(OpType op, ArrayRef indexedValues, + ArrayRef> indexing, + ArrayRef outputBuffers) { + auto &b = ScopedContext::getBuilderRef(); auto &block = op.region().front(); BlockAndValueMapping map; map.map(block.getArguments(), indexedValues); @@ -95,10 +94,10 @@ inlineRegionAndEmitStdStore(OpType op, ArrayRef indexedValues, Operation &terminator = block.back(); assert(isa(terminator) && - "expected an yield op in the end of the region"); + "expected a yield op in the end of the region"); for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) { - std_store(map.lookupOrDefault(terminator.getOperand(i)), outputBuffers[i], - ArrayRef{indexing[i].begin(), indexing[i].end()}); + IndexedValueType O(outputBuffers[i]); + O(indexing[i]) = map.lookupOrDefault(terminator.getOperand(i)); } } @@ -111,7 +110,7 @@ struct InputAndOutputIndices { template static InputAndOutputIndices getInputAndOutputIndices(ArrayRef allIvs, SingleInputPoolingOp op) { - auto &b = ScopedContext::getBuilder(); + auto &b = ScopedContext::getBuilderRef(); auto loc = ScopedContext::getLocation(); auto mapsRange = op.indexing_maps().template getAsRange(); auto maps = llvm::to_vector<8>( @@ -123,9 +122,36 @@ static InputAndOutputIndices getInputAndOutputIndices(ArrayRef allIvs, namespace { -// Generic loop emitter, to be specialized on an op-per op basis. -// TODO: Hook up to named ops interface and, later, retire when all named ops -// are auto-generated. +/// Emits the MLIR for the scalar part of the generic op by: +/// 1. Emitting load ops for each input and output view in order. This is +/// achieved by applying the appropriate input or output map to the +/// enclosing induction variables. +/// 2. Emitting a call to `op.fun()` that takes as arguments the scalars +/// from point 1. above. +/// 3. Emitting store ops to store the results of 2. to the output +/// views. +/// +/// An example output may resemble: +/// +/// ``` +/// loop.for %i = %c0 to %0 step %c1 { +/// loop.for %j = %c0 to %1 step %c1 { +/// loop.for %k = %c0 to %4 step %c1 { +/// %11 = load %arg0[%i, %j] : +/// memref +/// %12 = load %arg1[%i, %j, %k] : +/// memref +/// %13 = load %arg2[%i, %k, %j] : +/// memref +/// %14:2 = call @foo(%11, %12, %13) : (f32, f32, f32) -> (f32, f32) +/// store %14#0, %arg1[%i, %j, %k] : +/// memref +/// store %14#1, %arg2[%i, %k, %j] : +/// memref +/// } +/// } +/// } +/// ``` template class LinalgScopedEmitter { public: @@ -133,9 +159,43 @@ class LinalgScopedEmitter { LinalgOpType linalgOp) { assert(linalgOp.hasBufferSemantics() && "expected linalg op with buffer semantics"); - llvm_unreachable("NYI"); - linalgOp.emitScalarImplementation()(ScopedContext::getBuilder(), - ScopedContext::getLocation(), allIvs); + auto &b = ScopedContext::getBuilderRef(); + auto loc = ScopedContext::getLocation(); + unsigned nInputs = linalgOp.getNumInputs(); + unsigned nOutputs = linalgOp.getNumOutputs(); + SmallVector indexedValues; + indexedValues.reserve(nInputs + nOutputs); + + // TODO(mravishankar): Avoid the loads if the corresponding argument of the + // region has no uses. + // 1.a. Emit load from input views. + for (unsigned i = 0; i < nInputs; ++i) { + auto indexing = makeCanonicalAffineApplies( + b, loc, linalgOp.getInputIndexingMap(i), allIvs); + // Passing through IndexedValueType emits the proper load operation. + indexedValues.push_back(IndexedValueType(linalgOp.getInput(i))(indexing)); + } + // 1.b. Emit load from output views. + for (unsigned i = 0; i < nOutputs; ++i) { + auto indexing = makeCanonicalAffineApplies( + b, loc, linalgOp.getOutputIndexingMap(i), allIvs); + // Passing through IndexedValueType emits the proper load operation. + indexedValues.push_back( + IndexedValueType(linalgOp.getOutputBuffer(i))(indexing)); + } + + // TODO(ntv): When a region inliner exists, use it. + // 2. Inline region, currently only works for a single basic block. + // 3. Emit store. + SmallVector, 8> indexing; + SmallVector outputBuffers; + for (unsigned i = 0; i < nOutputs; ++i) { + indexing.push_back(makeCanonicalAffineApplies( + b, loc, linalgOp.getOutputIndexingMap(i), allIvs)); + outputBuffers.push_back(linalgOp.getOutputBuffer(i)); + } + inlineRegionAndEmitStore(linalgOp, indexedValues, + indexing, outputBuffers); } }; @@ -231,7 +291,7 @@ class LinalgScopedEmitter { public: /// Returns the input value of convOp. If the indices in `imIdx` is out of /// boundary, returns 0 instead. - static Value getConvOpInput(ConvOp convOp, IndexedValueType im, + static Value getConvOpInput(ConvOp convOp, StdIndexedValue im, MutableArrayRef imIdx) { // TODO(ntv): add a level of indirection to linalg.generic. if (!convOp.padding()) @@ -271,7 +331,7 @@ class LinalgScopedEmitter { affine_max(dim.getType(), maxMap, ValueRange{dim})); } - auto b = ScopedContext::getBuilder(); + auto &b = ScopedContext::getBuilderRef(); Type type = convOp.input().getType().cast().getElementType(); Value zero = std_constant(type, b.getZeroAttr(type)); Value readInput = im(clampedImIdx); @@ -282,7 +342,7 @@ class LinalgScopedEmitter { static void emitScalarImplementation(ArrayRef allIvs, ConvOp convOp) { assert(convOp.hasBufferSemantics() && "expected linalg op with buffer semantics"); - auto b = ScopedContext::getBuilder(); + auto &b = ScopedContext::getBuilderRef(); auto loc = ScopedContext::getLocation(); auto mapsRange = convOp.indexing_maps().getAsRange(); auto maps = llvm::to_vector<8>(llvm::map_range( @@ -293,7 +353,11 @@ class LinalgScopedEmitter { makeCanonicalAffineApplies(b, loc, maps[1], allIvs)); SmallVector oIdx( makeCanonicalAffineApplies(b, loc, maps[2], allIvs)); - IndexedValueType F(convOp.filter()), I(convOp.input()), O(convOp.output()); + + // Padded conv involves an affine.max in the memory access which is not + // allowed by affine.load. Override to always use an StdIndexedValue. + StdIndexedValue I(convOp.input()); + IndexedValueType F(convOp.filter()), O(convOp.output()); // Emit scalar form. Value paddedInput = getConvOpInput(convOp, I, imIdx); @@ -344,111 +408,36 @@ class LinalgScopedEmitter { } }; -// Emits the MLIR for the scalar part of the generic op by: -// 1. Emitting std_load and std_store ops for each input and output -// view in order. This is achieved by applying the appropriate input or -// output map to the enclosing induction variables. -// 2. Emitting a call to `op.fun()` that takes as arguments the scalars -// from point 1. above. -// 3. Emitting std_store to store the results of 2. to the output -// views. -// -// An example output may resemble: -// -// ``` -// loop.for %i = %c0 to %0 step %c1 { -// loop.for %j = %c0 to %1 step %c1 { -// loop.for %k = %c0 to %4 step %c1 { -// %11 = load %arg0[%i, %j] : -// memref -// %12 = load %arg1[%i, %j, %k] : -// memref -// %13 = load %arg2[%i, %k, %j] : -// memref -// %14:2 = call @foo(%11, %12, %13) : (f32, f32, f32) -> (f32, f32) -// store %14#0, %arg1[%i, %j, %k] : -// memref -// store %14#1, %arg2[%i, %k, %j] : -// memref -// } -// } -// } -// ``` -template -class LinalgScopedEmitter { -public: - static void emitScalarImplementation(ArrayRef allIvs, - GenericOp genericOp) { - assert(genericOp.hasBufferSemantics() && - "expected linalg op with buffer semantics"); - auto b = ScopedContext::getBuilder(); - auto loc = ScopedContext::getLocation(); - unsigned nInputs = genericOp.getNumInputs(); - unsigned nOutputs = genericOp.getNumOutputs(); - SmallVector indexedValues(nInputs + nOutputs); - - // 1.a. Emit std_load from input views. - for (unsigned i = 0; i < nInputs; ++i) { - auto indexing = makeCanonicalAffineApplies( - b, loc, genericOp.getInputIndexingMap(i), allIvs); - indexedValues[i] = std_load(genericOp.getInput(i), indexing); - } - - // 1.b. Emit std_load from output views. - // TODO(mravishankar): Avoid the loads if the corresponding argument of the - // region has no uses. - for (unsigned i = 0; i < nOutputs; ++i) { - Value output = genericOp.getOutputBuffer(i); - auto indexing = makeCanonicalAffineApplies( - b, loc, genericOp.getOutputIndexingMap(i), allIvs); - indexedValues[nInputs + i] = std_load(output, indexing); - } - - // TODO(ntv): When a region inliner exists, use it. - // 2. Inline region, currently only works for a single basic block. - // 3. Emit std_store. - SmallVector, 8> indexing; - SmallVector outputBuffers; - for (unsigned i = 0; i < nOutputs; ++i) { - indexing.push_back(makeCanonicalAffineApplies( - b, loc, genericOp.getOutputIndexingMap(i), allIvs)); - outputBuffers.push_back(genericOp.getOutputBuffer(i)); - } - inlineRegionAndEmitStdStore(genericOp, indexedValues, indexing, - outputBuffers); - } -}; - -// Emits the MLIR for the scalar part of the indexed generic op by: -// 1. Emitting std_load and std_store ops for each input and output view in -// order. This is achieved by applying the appropriate input or output map -// to the enclosing induction variables. -// 2. Emitting a call to `op.fun()` that takes as arguments the induction -// variables and the scalars from point 1. above. -// 3. Emitting std_store to store the results of 2. to the output views. -// -// An example output may resemble: -// -// ``` -// loop.for %i = %c0 to %0 step %c1 { -// loop.for %j = %c0 to %1 step %c1 { -// loop.for %k = %c0 to %4 step %c1 { -// %11 = load %arg0[%i, %j] : -// memref -// %12 = load %arg1[%i, %j, %k] : -// memref -// %13 = load %arg2[%i, %k, %j] : -// memref -// %14:2 = call @foo(%i, %j, %k, %11, %12, %13) : -// (index, index, index, f32, f32, f32) -> (f32, f32) -// store %14#0, %arg1[%i, %j, %k] : -// memref -// store %14#1, %arg2[%i, %k, %j] : -// memref -// } -// } -// } -// ``` +/// Emits the MLIR for the scalar part of the indexed generic op by: +/// 1. Emitting load ops for each input and output view in order. This is +/// achieved by applying the appropriate input or output map to the +/// enclosing induction variables. +/// 2. Emitting a call to `op.fun()` that takes as arguments the induction +/// variables and the scalars from point 1. above. +/// 3. Emitting store ops to store the results of 2. to the output views. +/// +/// An example output may resemble: +/// +/// ``` +/// loop.for %i = %c0 to %0 step %c1 { +/// loop.for %j = %c0 to %1 step %c1 { +/// loop.for %k = %c0 to %4 step %c1 { +/// %11 = load %arg0[%i, %j] : +/// memref +/// %12 = load %arg1[%i, %j, %k] : +/// memref +/// %13 = load %arg2[%i, %k, %j] : +/// memref +/// %14:2 = call @foo(%i, %j, %k, %11, %12, %13) : +/// (index, index, index, f32, f32, f32) -> (f32, f32) +/// store %14#0, %arg1[%i, %j, %k] : +/// memref +/// store %14#1, %arg2[%i, %k, %j] : +/// memref +/// } +/// } +/// } +/// ``` template class LinalgScopedEmitter { public: @@ -456,36 +445,38 @@ class LinalgScopedEmitter { IndexedGenericOp indexedGenericOp) { assert(indexedGenericOp.hasBufferSemantics() && "expected linalg op with buffer semantics"); - auto b = ScopedContext::getBuilder(); + auto &b = ScopedContext::getBuilderRef(); auto loc = ScopedContext::getLocation(); unsigned nInputs = indexedGenericOp.getNumInputs(); unsigned nOutputs = indexedGenericOp.getNumOutputs(); unsigned nLoops = allIvs.size(); - SmallVector indexedValues(nLoops + nInputs + nOutputs); + SmallVector indexedValues; + indexedValues.reserve(nLoops + nInputs + nOutputs); + for (unsigned i = 0; i < nLoops; ++i) + indexedValues.push_back(allIvs[i]); - for (unsigned i = 0; i < nLoops; ++i) { - indexedValues[i] = allIvs[i]; - } - - // 1.a. Emit std_load from input views. + // TODO(mravishankar): Avoid the loads if the corresponding argument of the + // region has no uses. + // 1.a. Emit load from input views. for (unsigned i = 0; i < nInputs; ++i) { - Value input = indexedGenericOp.getInput(i); auto indexing = makeCanonicalAffineApplies( b, loc, indexedGenericOp.getInputIndexingMap(i), allIvs); - indexedValues[nLoops + i] = std_load(input, indexing); + // Pass input i through IndexedValueType emits the proper load operation. + indexedValues.push_back( + IndexedValueType(indexedGenericOp.getInput(i))(indexing)); } - - // 1.b. Emit std_load from output views. + // 1.b. Emit load from output views. for (unsigned i = 0; i < nOutputs; ++i) { - Value output = indexedGenericOp.getOutputBuffer(i); auto indexing = makeCanonicalAffineApplies( b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs); - indexedValues[nLoops + nInputs + i] = std_load(output, indexing); + // Pass output i through IndexedValueType emits the proper load operation. + indexedValues.push_back( + IndexedValueType(indexedGenericOp.getOutputBuffer(i))(indexing)); } // TODO(ntv): When a region inliner exists, use it. // 2. Inline region, currently only works for a single basic block. - // 3. Emit std_store. + // 3. Emit store. SmallVector, 8> indexing; SmallVector outputBuffers; for (unsigned i = 0; i < nOutputs; ++i) { @@ -493,25 +484,11 @@ class LinalgScopedEmitter { b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs)); outputBuffers.push_back(indexedGenericOp.getOutputBuffer(i)); } - inlineRegionAndEmitStdStore(indexedGenericOp, indexedValues, indexing, - outputBuffers); + inlineRegionAndEmitStore(indexedGenericOp, indexedValues, + indexing, outputBuffers); } }; -// This struct is for factoring out the implementation and support template -// instantiations in the following 2 cases: -// 1. Appending to a list of patterns via RewritePatternList. -// 2. Direct invocation via `linalgOpToLoops` and `linalgOpToAffineLoops`. -// The implementation must work both in DRR and inside a RewritePattern. As a -// consequence, (1) it is only allowed to emit new ops if the match is -// guaranteed to be a success, (2) it is not allowed erase/replace, and (3) an -// encompassing pattern must take care of the erasure logic. -template -class LinalgOpToLoopsImpl { -public: - static Optional doit(Operation *op, PatternRewriter &rewriter); -}; - namespace { /// Helper struct to generate the loop nest for the op. This factored out here /// to be able to partially specialize this for different LoopTy. @@ -532,7 +509,7 @@ class GenerateLoopNest { } }; -/// Generates loops nest using loop.parallel. loop.parallel is only used for the +/// Generates loop nest using loop.parallel. loop.parallel is only used for the /// outer parallel loops. All other loops are generated using loop.for /// operation. template @@ -547,10 +524,6 @@ class GenerateLoopNest { // TODO(ravishankarm): Generate loop.parallel for all "parallel" iterator // types, not just the outer most ones. Also handle "reduction" iterator // types. - auto nPar = linalgOp.getNumParallelLoops(); - auto nRed = linalgOp.getNumReductionLoops(); - auto nWin = linalgOp.getNumWindowLoops(); - auto nLoops = nPar + nRed + nWin; auto nOuterPar = linalgOp.iterator_types() .getValue() .take_while([](Attribute attr) { @@ -560,14 +533,6 @@ class GenerateLoopNest { .size(); // If there are no outer parallel loops, then number of loop ops is same as // the number of loops, and they are all loop.for ops. - auto nLoopOps = (nOuterPar ? nLoops - nOuterPar + 1 : nLoops); - SmallVector allLoops(nLoopOps, OperationHandle()); - SmallVector allPLoops; - allPLoops.reserve(allLoops.size()); - for (OperationHandle &loop : allLoops) - allPLoops.push_back(&loop); - ArrayRef allPLoopsRef(allPLoops); - if (nOuterPar) { GenericLoopNestRangeBuilder( allIvs.take_front(nOuterPar), loopRanges.take_front(nOuterPar))([&] { @@ -594,14 +559,12 @@ class GenerateLoopNest { } // namespace template -Optional -LinalgOpToLoopsImpl::doit(Operation *op, - PatternRewriter &rewriter) { +Optional linalgOpToLoopsImpl(Operation *op, OpBuilder &builder) { using Impl = GenerateLoopNest; using IndexedValueTy = typename GenerateLoopNest::IndexedValueTy; - ScopedContext scope(rewriter, op->getLoc()); + ScopedContext scope(builder, op->getLoc()); // The flattened loopToOperandRangesMaps is expected to be an invertible // permutation map (which is asserted in the inverse calculation). @@ -627,8 +590,8 @@ LinalgOpToLoopsImpl::doit(Operation *op, SmallVector allIvs(nLoops); auto loopRanges = - emitLoopRanges(scope.getBuilder(), scope.getLocation(), invertedMap, - getViewSizes(rewriter, linalgOp)); + emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), invertedMap, + getViewSizes(builder, linalgOp)); assert(loopRanges.size() == allIvs.size()); Impl::doit(linalgOp, loopRanges, allIvs); // Number of loop ops might be different from the number of ivs since some @@ -656,15 +619,14 @@ class LinalgRewritePattern : public RewritePattern { LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override { - using Impl = LinalgOpToLoopsImpl; - if (!Impl::doit(op, rewriter)) + if (!linalgOpToLoopsImpl(op, rewriter)) return failure(); rewriter.eraseOp(op); return success(); } }; -// Helper classes for type list expansion. +/// Helper classes for type list expansion. template class RewritePatternList; @@ -683,7 +645,7 @@ class RewritePatternList { } }; -/// Populate the given list with patterns that convert from Linalg to LLVM. +/// Populate the given list with patterns that convert from Linalg to loops. template void FillRewritePatterns(OwningRewritePatternList &patterns, MLIRContext *ctx) { RewritePatternList::build(patterns, ctx); } -// Local folding pattern for AffineApplyOp that we can apply greedily. -// This replaces AffineApplyOp by the proper value in cases where the associated -// map is trivial. A trivial map here is defined as a map with a single result -// and either: -// 1. Zero operand + returns a single AffineConstantExpr -// 2. One operand + returns a single AffineDimExpr -// 3. One operands + returns a single AffineSymbolExpr +/// Local folding pattern for AffineApplyOp that we can apply greedily. +/// This replaces AffineApplyOp by the proper value in cases where the +/// associated map is trivial. +/// A trivial map here is defined as a map with a single result and either: +/// 1. Zero operand + returns a single AffineConstantExpr +/// 2. One operand + returns a single AffineDimExpr +/// 3. One operand + returns a single AffineSymbolExpr // -// In the first case, the AffineApplyOp is replaced by a new constant. In the -// other cases, it is replaced by its unique operand. +/// In the first case, the AffineApplyOp is replaced by a new constant. In the +/// other cases, it is replaced by its unique operand. struct FoldAffineOp : public RewritePattern { FoldAffineOp(MLIRContext *context) : RewritePattern(AffineApplyOp::getOperationName(), 0, context) {} @@ -780,50 +742,49 @@ mlir::createConvertLinalgToAffineLoopsPass() { /// Emits a loop nest with the proper body for `op`. template -Optional -mlir::linalg::linalgLowerOpToLoops(PatternRewriter &rewriter, Operation *op) { - return LinalgOpToLoopsImpl::doit(op, rewriter); +Optional mlir::linalg::linalgLowerOpToLoops(OpBuilder &builder, + Operation *op) { + return linalgOpToLoopsImpl(op, builder); } /// Emits a loop nest of `loop.for` with the proper body for `op`. template -LogicalResult mlir::linalg::linalgOpToLoops(PatternRewriter &rewriter, - Operation *op) { +LogicalResult mlir::linalg::linalgOpToLoops(OpBuilder &builder, Operation *op) { Optional loops = - linalgLowerOpToLoops(rewriter, op); + linalgLowerOpToLoops(builder, op); return loops ? success() : failure(); } /// Emits a loop nest of `affine.for` with the proper body for `op`. template -LogicalResult mlir::linalg::linalgOpToAffineLoops(PatternRewriter &rewriter, +LogicalResult mlir::linalg::linalgOpToAffineLoops(OpBuilder &builder, Operation *op) { Optional loops = - linalgLowerOpToLoops(rewriter, op); + linalgLowerOpToLoops(builder, op); return loops ? success() : failure(); } /// Emits a loop nest of `loop.parallel` with the proper body for `op`. template -LogicalResult mlir::linalg::linalgOpToParallelLoops(PatternRewriter &rewriter, +LogicalResult mlir::linalg::linalgOpToParallelLoops(OpBuilder &builder, Operation *op) { Optional loops = - linalgLowerOpToLoops(rewriter, op); + linalgLowerOpToLoops(builder, op); return loops ? success() : failure(); } -// TODO(ntv) Need to make these instantiations more future-proof to avoid the -// need to update as soon as we add new ops. +// TODO Need to make these instantiations more future-proof to avoid the need to +// update as soon as we add new ops. #define INSTANTIATE_LINALG_OP_TO_LOOPS(OP_TYPE) \ template LogicalResult mlir::linalg::linalgOpToLoops( \ - PatternRewriter & rewriter, Operation * op); \ + OpBuilder & builder, Operation * op); \ template LogicalResult mlir::linalg::linalgOpToAffineLoops( \ - PatternRewriter & rewriter, Operation * op); \ + OpBuilder & builder, Operation * op); \ template LogicalResult mlir::linalg::linalgOpToParallelLoops( \ - PatternRewriter & rewriter, Operation * op); \ + OpBuilder & builder, Operation * op); \ template Optional \ mlir::linalg::linalgLowerOpToLoops( \ - PatternRewriter & rewriter, Operation * op); + OpBuilder & builder, Operation * op); INSTANTIATE_LINALG_OP_TO_LOOPS(CopyOp) INSTANTIATE_LINALG_OP_TO_LOOPS(FillOp) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index ca905116d71ef..5e277b1876244 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" #include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/LoopOps/LoopOps.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" @@ -264,6 +265,21 @@ static void promoteSubViews(FuncOp f, bool dynamicBuffers) { op.erase(); } +LogicalResult mlir::linalg::promoteSubviewsLinalgOpPrecondition( + Operation *op, llvm::Optional> operandIndicesToPromote) { + LinalgOp linOp = dyn_cast(op); + // Transformation applies to buffers only. + if (!linOp || !linOp.hasBufferSemantics()) + return failure(); + for (auto en : llvm::enumerate(linOp.getInputsAndOutputBuffers())) { + auto sv = isa_and_nonnull(en.value().getDefiningOp()); + if (sv && (!operandIndicesToPromote.hasValue() || + operandIndicesToPromote->count(en.index()))) + return success(); + } + return failure(); +} + namespace { struct LinalgPromotionPass : public LinalgPromotionBase { LinalgPromotionPass() = default; diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp new file mode 100644 index 0000000000000..93501011a4164 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp @@ -0,0 +1,189 @@ +//===- TensorsToBuffers.cpp - Transformation from tensors to buffers ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the conversion from tensors to buffers on Linalg +// operations. +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/BufferPlacement.h" + +using namespace mlir; +using ReturnOpConverter = + NonVoidToVoidReturnOpConverter; + +namespace { +/// A pattern to convert Generic Linalg operations which work on tensors to +/// use buffers. A buffer is allocated using BufferAssignmentPlacer for +/// each operation result. BufferPlacement pass should be later used to move +/// Alloc operations to the correct positions and insert the missing Dealloc +/// operations in the correct places. +class GenericOpConverter + : public BufferAssignmentOpConversionPattern { +public: + using BufferAssignmentOpConversionPattern< + linalg::GenericOp>::BufferAssignmentOpConversionPattern; + + LogicalResult + matchAndRewrite(linalg::GenericOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + ResultRange results = op.getOperation()->getResults(); + SmallVector newArgs, newResults; + newArgs.reserve(operands.size() + results.size()); + newArgs.append(operands.begin(), operands.end()); + newResults.reserve(results.size()); + + // Update all types to memref types. + for (auto result : results) { + auto type = result.getType().cast(); + assert(type && "tensor to buffer conversion expects ranked results"); + if (!type.hasStaticShape()) + return rewriter.notifyMatchFailure( + op, "dynamic shapes not currently supported"); + auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); + + // Compute alloc position and insert a custom allocation node. + OpBuilder::InsertionGuard guard(rewriter); + rewriter.restoreInsertionPoint( + bufferAssignment->computeAllocPosition(result)); + auto alloc = rewriter.create(loc, memrefType); + newArgs.push_back(alloc); + newResults.push_back(alloc); + } + + // Generate a new linalg operation that works on buffers. + auto linalgOp = rewriter.create( + loc, llvm::None, newArgs, rewriter.getI64IntegerAttr(operands.size()), + rewriter.getI64IntegerAttr(results.size()), op.indexing_maps(), + op.iterator_types(), op.docAttr(), op.library_callAttr()); + + // Create a new block in the region of the new Generic Op. + Block &oldBlock = op.getRegion().front(); + Region &newRegion = linalgOp.region(); + Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(), + oldBlock.getArgumentTypes()); + + // Add the result arguments to the new block. + for (auto result : newResults) + newBlock->addArgument( + result.getType().cast().getElementType()); + + // Clone the body of the old block to the new block. + BlockAndValueMapping mapping; + for (unsigned i = 0; i < oldBlock.getNumArguments(); i++) + mapping.map(oldBlock.getArgument(i), newBlock->getArgument(i)); + rewriter.setInsertionPointToEnd(newBlock); + for (auto &op : oldBlock.getOperations()) { + Operation *clonedOp = rewriter.clone(op, mapping); + mapping.map(op.getResults(), clonedOp->getResults()); + } + + // Replace the results of the old Generic Op with the results of the new + // one. + rewriter.replaceOp(op, newResults); + return success(); + } +}; + +/// Populate the given list with patterns to convert Linalg operations on +/// tensors to buffers. +static void populateConvertLinalgOnTensorsToBuffersPattern( + MLIRContext *context, BufferAssignmentPlacer *placer, + TypeConverter *converter, OwningRewritePatternList *patterns) { + // clang-format off + patterns->insert(context, placer, converter); + // clang-format on +} + +/// Converts Linalg operations that work on tensor-type operands or results to +/// work on buffers. +struct ConvertLinalgOnTensorsToBuffers + : public LinalgOnTensorsToBuffersBase { + void runOnOperation() override { + MLIRContext &context = getContext(); + ConversionTarget target(context); + BufferAssignmentTypeConverter converter; + + // Mark all Standard operations legal. + target.addLegalDialect(); + + // Mark all Linalg operations illegal as long as they work on tensors. + auto isIllegalType = [&](Type type) { return !converter.isLegal(type); }; + auto isLegalOperation = [&](Operation *op) { + return llvm::none_of(op->getOperandTypes(), isIllegalType) && + llvm::none_of(op->getResultTypes(), isIllegalType); + }; + target.addDynamicallyLegalDialect( + Optional( + isLegalOperation)); + + // TODO: Considering the following dynamic legality checks, the current + // implementation of FunctionAndBlockSignatureConverter of Buffer Assignment + // will convert the function signature incorrectly. This converter moves + // all the return values of the function to the input argument list without + // considering the return value types and creates a void function. However, + // the NonVoidToVoidReturnOpConverter doesn't change the return operation if + // its operands are not tensors. The following example leaves the IR in a + // broken state. + // + // @function(%arg0: f32, %arg1: tensor<4xf32>) -> (f32, f32) { + // %0 = mulf %arg0, %arg0 : f32 + // return %0, %0 : f32, f32 + // } + // + // broken IR after conversion: + // + // func @function(%arg0: f32, %arg1: memref<4xf32>, f32, f32) { + // %0 = mulf %arg0, %arg0 : f32 + // return %0, %0 : f32, f32 + // } + // + // This issue must be fixed in FunctionAndBlockSignatureConverter and + // NonVoidToVoidReturnOpConverter. + + // Mark Standard Return operations illegal as long as one operand is tensor. + target.addDynamicallyLegalOp([&](mlir::ReturnOp returnOp) { + return llvm::none_of(returnOp.getOperandTypes(), isIllegalType); + }); + + // Mark the function operation illegal as long as an argument is tensor. + target.addDynamicallyLegalOp([&](FuncOp funcOp) { + return converter.isSignatureLegal(funcOp.getType()) && + llvm::none_of(funcOp.getType().getResults(), + [&](Type type) { return type.isa(); }); + }); + + // Walk over all the functions to apply buffer assignment. + getOperation().walk([&](FuncOp function) { + OwningRewritePatternList patterns; + BufferAssignmentPlacer placer(function); + populateConvertLinalgOnTensorsToBuffersPattern(&context, &placer, + &converter, &patterns); + + // Applying full conversion + return WalkResult( + applyFullConversion(function, target, patterns, &converter)); + }); + } +}; +} // end anonymous namespace + +std::unique_ptr> +mlir::createConvertLinalgOnTensorsToBuffersPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index 4dd8c5ad0e509..b6977e01266f6 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -15,6 +15,7 @@ #include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h" #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" #include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/LoopOps/EDSC/Builders.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" @@ -320,10 +321,9 @@ makeTiledViews(OpBuilder &b, Location loc, LinalgOp linalgOp, } template -Optional static tileLinalgOpImpl(OpBuilder &b, LinalgOp op, - ArrayRef tileSizes, - ArrayRef permutation, - OperationFolder *folder) { +Optional static tileLinalgOpImpl( + OpBuilder &b, LinalgOp op, ArrayRef tileSizes, + ArrayRef interchangeVector, OperationFolder *folder) { assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); // 1. Enforce the convention that "tiling by zero" skips tiling a particular // dimension. This convention is significantly simpler to handle instead of @@ -342,13 +342,13 @@ Optional static tileLinalgOpImpl(OpBuilder &b, LinalgOp op, return llvm::None; } - // If permutation is empty, use the identity. Build the permutation map + // If interchangeVector is empty, use the identity. Build the permutation map // otherwise. auto invPermutationMap = AffineMap::getMultiDimIdentityMap( tileSizes.size(), ScopedContext::getContext()); - if (!permutation.empty()) - invPermutationMap = inversePermutation( - AffineMap::getPermutationMap(permutation, ScopedContext::getContext())); + if (!interchangeVector.empty()) + invPermutationMap = inversePermutation(AffineMap::getPermutationMap( + interchangeVector, ScopedContext::getContext())); if (!invPermutationMap) return llvm::None; @@ -371,8 +371,8 @@ Optional static tileLinalgOpImpl(OpBuilder &b, LinalgOp op, std::tie(loopRanges, loopIndexToRangeIndex) = makeTiledLoopRanges(b, scope.getLocation(), viewSizesToLoopsMap, viewSizes, tileSizes, folder); - if (!permutation.empty()) - applyPermutationToVector(loopRanges, permutation); + if (!interchangeVector.empty()) + applyPermutationToVector(loopRanges, interchangeVector); // 3. Create the tiled loops. LinalgOp res = op; @@ -384,7 +384,7 @@ Optional static tileLinalgOpImpl(OpBuilder &b, LinalgOp op, linalg_range(range.offset, range.size, range.stride)); } GenericLoopNestRangeBuilder(ivs, linalgRanges)([&] { - auto b = ScopedContext::getBuilder(); + auto &b = ScopedContext::getBuilderRef(); auto loc = ScopedContext::getLocation(); SmallVector ivValues(ivs.begin(), ivs.end()); @@ -393,7 +393,7 @@ Optional static tileLinalgOpImpl(OpBuilder &b, LinalgOp op, // assuming that loopRanges have previously been permuted by // (i,j,k)->(k,i,j) So this permutation should be the inversePermutation of // that one: (d0,d1,d2)->(d2,d0,d1) - if (!permutation.empty()) + if (!interchangeVector.empty()) ivValues = applyMapToValues(b, loc, invPermutationMap, ivValues, folder); auto views = @@ -409,8 +409,10 @@ Optional static tileLinalgOpImpl(OpBuilder &b, LinalgOp op, // 5. Gather the newly created loops and return them with the new op. SmallVector loops; loops.reserve(ivs.size()); - for (auto iv : ivs) - loops.push_back(loop::getForInductionVarOwner(iv)); + for (auto iv : ivs) { + loops.push_back(iv.cast().getOwner()->getParentOp()); + assert(loops.back() && "no owner found for induction variable!"); + } return TiledLinalgOp{res, loops}; } @@ -418,7 +420,8 @@ Optional static tileLinalgOpImpl(OpBuilder &b, LinalgOp op, template static Optional tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ArrayRef tileSizes, - ArrayRef permutation, OperationFolder *folder) { + ArrayRef interchangeVector, + OperationFolder *folder) { assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics"); if (tileSizes.empty()) return llvm::None; @@ -457,33 +460,36 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ArrayRef tileSizes, tileSizeValues.push_back(folded_std_constant_index(folder, 0)); } - return tileLinalgOpImpl(b, op, tileSizeValues, permutation, folder); + return tileLinalgOpImpl(b, op, tileSizeValues, interchangeVector, + folder); } Optional mlir::linalg::tileLinalgOp(OpBuilder &b, LinalgOp op, ArrayRef tileSizes, - ArrayRef permutation, + ArrayRef interchangeVector, OperationFolder *folder) { - return tileLinalgOpImpl(b, op, tileSizes, permutation, folder); + return tileLinalgOpImpl(b, op, tileSizes, interchangeVector, + folder); } Optional mlir::linalg::tileLinalgOpToParallelLoops( OpBuilder &b, LinalgOp op, ArrayRef tileSizes, - ArrayRef permutation, OperationFolder *folder) { - return tileLinalgOpImpl(b, op, tileSizes, permutation, + ArrayRef interchangeVector, OperationFolder *folder) { + return tileLinalgOpImpl(b, op, tileSizes, interchangeVector, folder); } Optional mlir::linalg::tileLinalgOp( OpBuilder &b, LinalgOp op, ArrayRef tileSizes, - ArrayRef permutation, OperationFolder *folder) { - return tileLinalgOpImpl(b, op, tileSizes, permutation, folder); + ArrayRef interchangeVector, OperationFolder *folder) { + return tileLinalgOpImpl(b, op, tileSizes, interchangeVector, + folder); } Optional mlir::linalg::tileLinalgOpToParallelLoops( OpBuilder &b, LinalgOp op, ArrayRef tileSizes, - ArrayRef permutation, OperationFolder *folder) { - return tileLinalgOpImpl(b, op, tileSizes, permutation, + ArrayRef interchangeVector, OperationFolder *folder) { + return tileLinalgOpImpl(b, op, tileSizes, interchangeVector, folder); } @@ -494,8 +500,8 @@ static void tileLinalgOps(FuncOp f, ArrayRef tileSizes) { f.walk([tileSizes, &b, &folder](LinalgOp op) { if (!op.hasBufferSemantics()) return; - auto opLoopsPair = - tileLinalgOpImpl(b, op, tileSizes, /*permutation=*/{}, &folder); + auto opLoopsPair = tileLinalgOpImpl( + b, op, tileSizes, /*interchangeVector=*/{}, &folder); // If tiling occurred successfully, erase old op. if (opLoopsPair) op.erase(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp new file mode 100644 index 0000000000000..e229b10072f0c --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -0,0 +1,228 @@ +//===- LinalgTransforms.cpp - Linalg transformations as patterns ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements logic and helpers to expose Linalg transforms as rewrite +// patterns. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/EDSC/Intrinsics.h" +#include "mlir/Dialect/Vector/VectorOps.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +#define DEBUG_TYPE "linalg-transforms" + +using namespace mlir; +using namespace mlir::edsc; +using namespace mlir::edsc::intrinsics; +using namespace mlir::linalg; + +using llvm::dbgs; + +//===----------------------------------------------------------------------===// +// Transformations exposed as rewrite patterns. +//===----------------------------------------------------------------------===// +// Marker used as attribute name in generated Linalg rewriting transformations. +const StringLiteral mlir::linalg::LinalgTransforms::kLinalgTransformMarker = + "__internal_linalg_transform__"; + +mlir::linalg::LinalgMarker::LinalgMarker(ArrayRef matchDisjunction, + llvm::Optional replacement) + : matchDisjunction(matchDisjunction.begin(), matchDisjunction.end()), + replacement(replacement) {} + +mlir::linalg::LinalgMarker::LinalgMarker(ArrayRef matchDisjunction, + StringRef replacement) + : LinalgMarker(matchDisjunction, llvm::Optional{replacement}) {} + +LogicalResult +mlir::linalg::LinalgMarker::checkAndNotify(PatternRewriter &rewriter, + Operation *op) const { + auto attr = op->template getAttrOfType( + LinalgTransforms::kLinalgTransformMarker); + + if (!attr) { + // 1. Has no marker case and matchDisjunction is empty. + if (matchDisjunction.empty()) + return success(); + + // 2. Has no marker and matchDisjuntion matches the no-moarker case. + for (auto marker : matchDisjunction) + if (marker.empty()) + return success(); + + // 3. Has no marker but was expecting a marker. + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << " does not have any marker from list: "; + llvm::interleaveComma(matchDisjunction, diag); + }); + } + + // 4. Match explicit marker. + for (auto marker : matchDisjunction) + if (attr.getValue() == marker) + return success(); + + // 5. Fail to match. + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << " does not have any marker from list: "; + llvm::interleaveComma(matchDisjunction, diag); + }); +} + +void mlir::linalg::LinalgMarker::replaceLinalgMarker(PatternRewriter &rewriter, + Operation *op) const { + if (replacement.hasValue()) + op->setAttr(LinalgTransforms::kLinalgTransformMarker, + rewriter.getStringAttr(replacement.getValue())); + else + op->removeAttr(Identifier::get(LinalgTransforms::kLinalgTransformMarker, + rewriter.getContext())); +} + +/// Linalg base tiling pattern. +mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern( + StringRef opName, MLIRContext *context, LinalgTilingOptions options, + LinalgMarker marker, PatternBenefit benefit) + : RewritePattern(opName, {}, benefit, context), marker(marker), + options(options) {} + +LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewrite( + Operation *op, PatternRewriter &rewriter) const { + LinalgOp linalgOp = dyn_cast(op); + if (!linalgOp) + return failure(); + if (failed(marker.checkAndNotify(rewriter, linalgOp))) + return failure(); + Optional res; + if (options.loopType == LinalgTilingLoopType::Loops) + res = tileLinalgOp(rewriter, linalgOp, options.tileSizes, + options.interchangeVector); + else if (options.loopType == LinalgTilingLoopType::ParallelLoops) + res = tileLinalgOpToParallelLoops(rewriter, linalgOp, options.tileSizes, + options.interchangeVector); + // TODO: Impl tiling to affine loops when it makes sense. + + if (!res) + return failure(); + + // New marker if specified. + marker.replaceLinalgMarker(rewriter, res->op.getOperation()); + + rewriter.eraseOp(op); + return success(); +} + +/// Linalg base interchange pattern. +mlir::linalg::LinalgBaseInterchangePattern::LinalgBaseInterchangePattern( + StringRef opName, MLIRContext *context, + ArrayRef interchangeVector, LinalgMarker marker, + PatternBenefit benefit) + : RewritePattern(opName, {}, benefit, context), marker(marker), + interchangeVector(interchangeVector.begin(), interchangeVector.end()) {} + +LogicalResult mlir::linalg::LinalgBaseInterchangePattern::matchAndRewrite( + Operation *op, PatternRewriter &rewriter) const { + LinalgOp linalgOp = dyn_cast(op); + if (!linalgOp) + return failure(); + if (failed(marker.checkAndNotify(rewriter, linalgOp))) + return failure(); + if (failed(interchangeGenericLinalgOpPrecondition(op, interchangeVector))) + return failure(); + + // TODO: figure out how this interplays with named ops. In particular this + // should break the named op property. + rewriter.updateRootInPlace(op, [&]() { + interchange(linalgOp, interchangeVector); + // New marker if specified. + marker.replaceLinalgMarker(rewriter, op); + }); + return success(); +} + +mlir::linalg::LinalgBasePromotionPattern::LinalgBasePromotionPattern( + StringRef opName, MLIRContext *context, + ArrayRef operandsToPromote, unsigned alignment, + LinalgMarker marker, PatternBenefit benefit) + : RewritePattern(opName, {}, benefit, context), marker(marker), + operandsToPromote(operandsToPromote.begin(), operandsToPromote.end()), + alignment(alignment) {} + +LogicalResult mlir::linalg::LinalgBasePromotionPattern::matchAndRewrite( + Operation *op, PatternRewriter &rewriter) const { + LinalgOp linalgOp = dyn_cast(op); + if (!linalgOp) + return failure(); + if (failed(marker.checkAndNotify(rewriter, linalgOp))) + return failure(); + if (operandsToPromote.empty()) { + if (failed(promoteSubviewsLinalgOpPrecondition(op, llvm::None))) + return failure(); + } else { + DenseSet set; + set.insert(operandsToPromote.begin(), operandsToPromote.end()); + if (failed(promoteSubviewsLinalgOpPrecondition(op, set))) + return failure(); + } + + llvm::SetVector subViews; + if (!operandsToPromote.empty()) { + for (unsigned idx : operandsToPromote) { + auto *op = linalgOp.getBuffer(idx).getDefiningOp(); + if (auto sv = dyn_cast_or_null(op)) + subViews.insert(sv); + } + } else { + unsigned nBuffers = linalgOp.getNumInputsAndOutputBuffers(); + for (unsigned idx = 0; idx < nBuffers; ++idx) { + auto *op = linalgOp.getBuffer(idx).getDefiningOp(); + if (auto sv = dyn_cast_or_null(op)) + subViews.insert(sv); + } + } + + auto promotedOp = + promoteSubViewOperands(rewriter, op, subViews, /*dynamicBuffers=*/false, + /*alignment=*/alignment); + marker.replaceLinalgMarker(rewriter, promotedOp.getOperation()); + rewriter.eraseOp(op); + return success(); +} + +mlir::linalg::LinalgBaseVectorizationPattern::LinalgBaseVectorizationPattern( + StringRef opName, MLIRContext *context, LinalgMarker marker, + PatternBenefit benefit) + : RewritePattern(opName, {}, benefit, context), marker(marker) {} + +LogicalResult mlir::linalg::LinalgBaseVectorizationPattern::matchAndRewrite( + Operation *op, PatternRewriter &rewriter) const { + LinalgOp linalgOp = dyn_cast(op); + if (!linalgOp) + return failure(); + if (failed(marker.checkAndNotify(rewriter, linalgOp))) + return failure(); + if (failed(vectorizeLinalgOpPrecondition(op))) + return failure(); + vectorizeLinalgOp(rewriter, op); + rewriter.eraseOp(op); + return success(); +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp new file mode 100644 index 0000000000000..f27baa3c662a9 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -0,0 +1,131 @@ +//===- Vectorization.cpp - Implementation of linalg Vectorization ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the linalg dialect Vectorization transformations. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/EDSC/Intrinsics.h" +#include "mlir/Dialect/Vector/VectorOps.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace mlir; +using namespace mlir::edsc; +using namespace mlir::edsc::intrinsics; +using namespace mlir::linalg; + +using llvm::dbgs; + +#define DEBUG_TYPE "linalg-vectorization" + +static bool hasMultiplyAddBody(linalg::GenericOp op) { + auto &r = op.region(); + if (!llvm::hasSingleElement(r)) + return false; + if (!llvm::hasNItems(r.front().begin(), r.front().end(), 3)) + return false; + + using mlir::matchers::m_Val; + auto a = m_Val(r.front().getArgument(0)); + auto b = m_Val(r.front().getArgument(1)); + auto c = m_Val(r.front().getArgument(2)); + // TODO: Update this detection once we have matcher support for specifying + // that any permutation of operands matches. + auto pattern1 = m_Op(m_Op(m_Op(a, b), c)); + auto pattern2 = m_Op(m_Op(c, m_Op(a, b))); + auto pattern3 = m_Op(m_Op(m_Op(b, a), c)); + auto pattern4 = m_Op(m_Op(c, m_Op(b, a))); + return pattern1.match(&r.front().back()) || + pattern2.match(&r.front().back()) || + pattern3.match(&r.front().back()) || pattern4.match(&r.front().back()); +} + +// TODO: Should be Tablegen'd from a single source that generates the op itself. +static bool isRowMajorMatmul(linalg::GenericOp genericOp) { + return genericOp.getNumInputs() == 2 && genericOp.getNumOutputs() == 1 && + isRowMajorMatmul(genericOp.indexing_maps()) && + hasMultiplyAddBody(genericOp); +} + +// TODO: This is in fact much more general than just vectorization for matmul +// and fill ops. +LogicalResult mlir::linalg::vectorizeLinalgOpPrecondition(Operation *op) { + auto linalgOp = cast(op); + // All types must be static shape to go to vector. + for (Value operand : linalgOp.getInputsAndOutputBuffers()) + if (!operand.getType().cast().hasStaticShape()) + return failure(); + for (Type outputTensorType : linalgOp.getOutputTensorTypes()) + if (!outputTensorType.cast().hasStaticShape()) + return failure(); + if (isa(op) || isa(op)) + return success(); + + auto genericOp = dyn_cast(op); + if (!genericOp || !::isRowMajorMatmul(genericOp)) + return failure(); + + // TODO(ntv): non-identity layout. + auto isStaticMemRefWithIdentityLayout = [](Value v) { + auto m = v.getType().dyn_cast(); + if (!m || !m.hasStaticShape() || !m.getAffineMaps().empty()) + return false; + return true; + }; + return success(llvm::all_of(genericOp.getInputsAndOutputBuffers(), + isStaticMemRefWithIdentityLayout)); +} + +void mlir::linalg::vectorizeLinalgOp(OpBuilder &builder, Operation *op) { + assert(succeeded(vectorizeLinalgOpPrecondition(op))); + + if (auto convOp = dyn_cast(op)) { + // TODO: add a level of indirection to linalg.generic. + if (convOp.padding()) + llvm_unreachable("Unexpected conv with padding"); + } + + edsc::ScopedContext scope(builder, op->getLoc()); + if (auto fillOp = dyn_cast(op)) { + // Vectorize fill as a vector.broadcast. + LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE + "]: Rewrite linalg.fill as vector.broadcast: " + << *op << ":\n"); + Value memref = vector_type_cast(fillOp.getOutputBuffer(0)); + Value dst = std_load(memref); + Value res = vector_broadcast(dst.getType(), fillOp.value()); + std_store(res, memref); + return; + } + + // Vectorize other ops as vector contraction (currently only matmul). + LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE + "]: Rewrite linalg op as vector.contract: " + << *op << ":\n"); + auto linalgOp = cast(op); + Value a = std_load(vector_type_cast(linalgOp.getInput(0))); + Value b = std_load(vector_type_cast(linalgOp.getInput(1))); + Value memref = vector_type_cast(linalgOp.getOutputBuffer(0)); + Value c = std_load(memref); + Value res = vector_contract(a, b, c, linalgOp.indexing_maps(), + linalgOp.iterator_types()); + std_store(res, memref); +} diff --git a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt index 681a47d31271e..3b740748c62c6 100644 --- a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt @@ -3,12 +3,8 @@ add_mlir_dialect_library(MLIRLinalgUtils ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Linalg - DEPENDS - intrinsics_gen - ) -target_link_libraries(MLIRLinalgUtils - PUBLIC + LINK_LIBS PUBLIC MLIRAffineOps MLIREDSC MLIRIR diff --git a/mlir/lib/Dialect/LoopOps/CMakeLists.txt b/mlir/lib/Dialect/LoopOps/CMakeLists.txt index 44de8ada21500..ea72f25a303ad 100644 --- a/mlir/lib/Dialect/LoopOps/CMakeLists.txt +++ b/mlir/lib/Dialect/LoopOps/CMakeLists.txt @@ -8,15 +8,13 @@ add_mlir_dialect_library(MLIRLoopOps DEPENDS MLIRLoopOpsIncGen - ) -target_link_libraries(MLIRLoopOps - PUBLIC + + LINK_LIBS PUBLIC MLIREDSC MLIRIR MLIRLoopLikeInterface MLIRSideEffects MLIRStandardOps - LLVMSupport ) add_subdirectory(Transforms) diff --git a/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp b/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp index fec803deaec2b..af1f39a000855 100644 --- a/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/LoopOps/EDSC/Builders.cpp @@ -82,14 +82,14 @@ LoopBuilder mlir::edsc::makeParallelLoopBuilder(MutableArrayRef ivs, ArrayRef lbs, ArrayRef ubs, ArrayRef steps) { - LoopBuilder result; - auto opHandle = OperationHandle::create(lbs, ubs, steps); - - loop::ParallelOp parallelOp = - cast(*opHandle.getOperation()); + loop::ParallelOp parallelOp = OperationBuilder( + SmallVector(lbs.begin(), lbs.end()), + SmallVector(ubs.begin(), ubs.end()), + SmallVector(steps.begin(), steps.end())); for (size_t i = 0, e = ivs.size(); i < e; ++i) ivs[i] = parallelOp.getBody()->getArgument(i); - result.enter(parallelOp.getBody(), /*prev=*/1); + LoopBuilder result; + result.enter(parallelOp.getBody()); return result; } @@ -98,15 +98,15 @@ mlir::edsc::makeLoopBuilder(Value *iv, Value lb, Value ub, Value step, MutableArrayRef iterArgsHandles, ValueRange iterArgsInitValues) { mlir::edsc::LoopBuilder result; - auto forOp = - OperationHandle::createOp(lb, ub, step, iterArgsInitValues); - *iv = forOp.getInductionVar(); + loop::ForOp forOp = + OperationBuilder(lb, ub, step, iterArgsInitValues); + *iv = Value(forOp.getInductionVar()); auto *body = loop::getForInductionVarOwner(*iv).getBody(); for (size_t i = 0, e = iterArgsHandles.size(); i < e; ++i) { // Skipping the induction variable. iterArgsHandles[i] = body->getArgument(i + 1); } result.setOp(forOp); - result.enter(body, /*prev=*/1); + result.enter(body); return result; } diff --git a/mlir/lib/Dialect/LoopOps/LoopOps.cpp b/mlir/lib/Dialect/LoopOps/LoopOps.cpp index 80f8120a81b61..fb3945d33c9c6 100644 --- a/mlir/lib/Dialect/LoopOps/LoopOps.cpp +++ b/mlir/lib/Dialect/LoopOps/LoopOps.cpp @@ -39,8 +39,8 @@ LoopOpsDialect::LoopOpsDialect(MLIRContext *context) // ForOp //===----------------------------------------------------------------------===// -void ForOp::build(Builder *builder, OperationState &result, Value lb, Value ub, - Value step, ValueRange iterArgs) { +void ForOp::build(OpBuilder &builder, OperationState &result, Value lb, + Value ub, Value step, ValueRange iterArgs) { result.addOperands({lb, ub, step}); result.addOperands(iterArgs); for (Value v : iterArgs) @@ -48,8 +48,8 @@ void ForOp::build(Builder *builder, OperationState &result, Value lb, Value ub, Region *bodyRegion = result.addRegion(); bodyRegion->push_back(new Block()); if (iterArgs.empty()) - ForOp::ensureTerminator(*bodyRegion, *builder, result.location); - bodyRegion->front().addArgument(builder->getIndexType()); + ForOp::ensureTerminator(*bodyRegion, builder, result.location); + bodyRegion->front().addArgument(builder.getIndexType()); for (Value v : iterArgs) bodyRegion->front().addArgument(v.getType()); } @@ -233,12 +233,12 @@ void ForOp::getSuccessorRegions(Optional index, // IfOp //===----------------------------------------------------------------------===// -void IfOp::build(Builder *builder, OperationState &result, Value cond, +void IfOp::build(OpBuilder &builder, OperationState &result, Value cond, bool withElseRegion) { build(builder, result, /*resultTypes=*/llvm::None, cond, withElseRegion); } -void IfOp::build(Builder *builder, OperationState &result, +void IfOp::build(OpBuilder &builder, OperationState &result, TypeRange resultTypes, Value cond, bool withElseRegion) { result.addOperands(cond); result.addTypes(resultTypes); @@ -246,13 +246,13 @@ void IfOp::build(Builder *builder, OperationState &result, Region *thenRegion = result.addRegion(); thenRegion->push_back(new Block()); if (resultTypes.empty()) - IfOp::ensureTerminator(*thenRegion, *builder, result.location); + IfOp::ensureTerminator(*thenRegion, builder, result.location); Region *elseRegion = result.addRegion(); if (withElseRegion) { elseRegion->push_back(new Block()); if (resultTypes.empty()) - IfOp::ensureTerminator(*elseRegion, *builder, result.location); + IfOp::ensureTerminator(*elseRegion, builder, result.location); } } @@ -371,22 +371,23 @@ void IfOp::getSuccessorRegions(Optional index, // ParallelOp //===----------------------------------------------------------------------===// -void ParallelOp::build(Builder *builder, OperationState &result, ValueRange lbs, - ValueRange ubs, ValueRange steps, ValueRange initVals) { +void ParallelOp::build(OpBuilder &builder, OperationState &result, + ValueRange lbs, ValueRange ubs, ValueRange steps, + ValueRange initVals) { result.addOperands(lbs); result.addOperands(ubs); result.addOperands(steps); result.addOperands(initVals); result.addAttribute( ParallelOp::getOperandSegmentSizeAttr(), - builder->getI32VectorAttr({static_cast(lbs.size()), - static_cast(ubs.size()), - static_cast(steps.size()), - static_cast(initVals.size())})); + builder.getI32VectorAttr({static_cast(lbs.size()), + static_cast(ubs.size()), + static_cast(steps.size()), + static_cast(initVals.size())})); Region *bodyRegion = result.addRegion(); - ParallelOp::ensureTerminator(*bodyRegion, *builder, result.location); + ParallelOp::ensureTerminator(*bodyRegion, builder, result.location); for (size_t i = 0, e = steps.size(); i < e; ++i) - bodyRegion->front().addArgument(builder->getIndexType()); + bodyRegion->front().addArgument(builder.getIndexType()); for (Value init : initVals) result.addTypes(init.getType()); } @@ -554,7 +555,8 @@ ParallelOp mlir::loop::getParallelForInductionVarOwner(Value val) { // ReduceOp //===----------------------------------------------------------------------===// -void ReduceOp::build(Builder *builder, OperationState &result, Value operand) { +void ReduceOp::build(OpBuilder &builder, OperationState &result, + Value operand) { auto type = operand.getType(); result.addOperands(operand); Region *bodyRegion = result.addRegion(); diff --git a/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt b/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt index 13a6aa6f288aa..0339d15f1fdcf 100644 --- a/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt @@ -8,14 +8,12 @@ add_mlir_dialect_library(MLIRLoopOpsTransforms DEPENDS MLIRLoopPassIncGen - ) -target_link_libraries(MLIRLoopOpsTransforms - PUBLIC + + LINK_LIBS PUBLIC MLIRAffineOps MLIRIR MLIRPass MLIRLoopOps MLIRStandardOps MLIRSupport - LLVMSupport ) diff --git a/mlir/lib/Dialect/OpenMP/CMakeLists.txt b/mlir/lib/Dialect/OpenMP/CMakeLists.txt index 68a939ed42999..23f8cd4058f7b 100644 --- a/mlir/lib/Dialect/OpenMP/CMakeLists.txt +++ b/mlir/lib/Dialect/OpenMP/CMakeLists.txt @@ -6,8 +6,7 @@ add_mlir_dialect_library(MLIROpenMP DEPENDS MLIROpenMPOpsIncGen - ) -target_link_libraries(MLIROpenMP - PUBLIC + + LINK_LIBS PUBLIC MLIRIR ) diff --git a/mlir/lib/Dialect/Quant/CMakeLists.txt b/mlir/lib/Dialect/Quant/CMakeLists.txt index 4f83829622065..19ac787de0278 100644 --- a/mlir/lib/Dialect/Quant/CMakeLists.txt +++ b/mlir/lib/Dialect/Quant/CMakeLists.txt @@ -15,9 +15,8 @@ add_mlir_dialect_library(MLIRQuant DEPENDS MLIRQuantOpsIncGen MLIRQuantPassIncGen - ) -target_link_libraries(MLIRQuant - PUBLIC + + LINK_LIBS PUBLIC MLIRIR MLIRPass MLIRSideEffects diff --git a/mlir/lib/Dialect/SDBM/CMakeLists.txt b/mlir/lib/Dialect/SDBM/CMakeLists.txt index 6f5a119408cf5..db2b9ac85472c 100644 --- a/mlir/lib/Dialect/SDBM/CMakeLists.txt +++ b/mlir/lib/Dialect/SDBM/CMakeLists.txt @@ -5,8 +5,7 @@ add_mlir_dialect_library(MLIRSDBM ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SDBM -) -target_link_libraries(MLIRSDBM - PUBLIC + + LINK_LIBS PUBLIC MLIRIR ) diff --git a/mlir/lib/Dialect/SPIRV/CMakeLists.txt b/mlir/lib/Dialect/SPIRV/CMakeLists.txt index e6834ca2ef3e4..8fe368aa04e62 100644 --- a/mlir/lib/Dialect/SPIRV/CMakeLists.txt +++ b/mlir/lib/Dialect/SPIRV/CMakeLists.txt @@ -27,9 +27,8 @@ add_mlir_dialect_library(MLIRSPIRV MLIRSPIRVOpsIncGen MLIRSPIRVOpUtilsGen MLIRSPIRVTargetAndABIIncGen - ) -target_link_libraries(MLIRSPIRV - PUBLIC + + LINK_LIBS PUBLIC MLIRControlFlowInterfaces MLIRIR MLIRParser diff --git a/mlir/lib/Dialect/SPIRV/SPIRVCanonicalization.cpp b/mlir/lib/Dialect/SPIRV/SPIRVCanonicalization.cpp index efd15d773f8f3..05f0eb4462e83 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVCanonicalization.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVCanonicalization.cpp @@ -353,8 +353,8 @@ struct ConvertSelectionOpToSelect } bool isSameAttrList(spirv::StoreOp lhs, spirv::StoreOp rhs) const { - return lhs.getOperation()->getAttrList().getDictionary() == - rhs.getOperation()->getAttrList().getDictionary(); + return lhs.getOperation()->getMutableAttrDict().getDictionary() == + rhs.getOperation()->getMutableAttrDict().getDictionary(); } diff --git a/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp b/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp index c6e15d5db4851..c9f2983e232b0 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp @@ -596,7 +596,7 @@ spirv::AccessChainOp mlir::spirv::getElementPtr( } SmallVector linearizedIndices; // Add a '0' at the start to index into the struct. - auto zero = spirv::ConstantOp::getZero(indexType, loc, &builder); + auto zero = spirv::ConstantOp::getZero(indexType, loc, builder); linearizedIndices.push_back(zero); // If it is a zero-rank memref type, extract the element directly. if (!ptrLoc) { diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index 6deae62c69871..5d4e309a2e96e 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -788,7 +788,7 @@ static Type getElementPtrType(Type type, ValueRange indices, Location baseLoc) { return spirv::PointerType::get(resultType, resultStorageClass); } -void spirv::AccessChainOp::build(Builder *builder, OperationState &state, +void spirv::AccessChainOp::build(OpBuilder &builder, OperationState &state, Value basePtr, ValueRange indices) { auto type = getElementPtrType(basePtr.getType(), indices, state.location); assert(type && "Unable to deduce return type based on basePtr and indices"); @@ -857,9 +857,9 @@ static LogicalResult verify(spirv::AccessChainOp accessChainOp) { // spv._address_of //===----------------------------------------------------------------------===// -void spirv::AddressOfOp::build(Builder *builder, OperationState &state, +void spirv::AddressOfOp::build(OpBuilder &builder, OperationState &state, spirv::GlobalVariableOp var) { - build(builder, state, var.type(), builder->getSymbolRefAttr(var)); + build(builder, state, var.type(), builder.getSymbolRefAttr(var)); } static LogicalResult verify(spirv::AddressOfOp addressOfOp) { @@ -987,26 +987,23 @@ static LogicalResult verify(spirv::BitcastOp bitcastOp) { // spv.BranchOp //===----------------------------------------------------------------------===// -Optional spirv::BranchOp::getSuccessorOperands(unsigned index) { +Optional +spirv::BranchOp::getMutableSuccessorOperands(unsigned index) { assert(index == 0 && "invalid successor index"); - return getOperands(); + return targetOperandsMutable(); } -bool spirv::BranchOp::canEraseSuccessorOperand() { return true; } - //===----------------------------------------------------------------------===// // spv.BranchConditionalOp //===----------------------------------------------------------------------===// -Optional -spirv::BranchConditionalOp::getSuccessorOperands(unsigned index) { +Optional +spirv::BranchConditionalOp::getMutableSuccessorOperands(unsigned index) { assert(index < 2 && "invalid successor index"); - return index == kTrueIndex ? getTrueBlockArguments() - : getFalseBlockArguments(); + return index == kTrueIndex ? trueTargetOperandsMutable() + : falseTargetOperandsMutable(); } -bool spirv::BranchConditionalOp::canEraseSuccessorOperand() { return true; } - static ParseResult parseBranchConditionalOp(OpAsmParser &parser, OperationState &state) { auto &builder = parser.getBuilder(); @@ -1163,10 +1160,10 @@ static LogicalResult verify(spirv::CompositeConstructOp compositeConstructOp) { // spv.CompositeExtractOp //===----------------------------------------------------------------------===// -void spirv::CompositeExtractOp::build(Builder *builder, OperationState &state, +void spirv::CompositeExtractOp::build(OpBuilder &builder, OperationState &state, Value composite, ArrayRef indices) { - auto indexAttr = builder->getI32ArrayAttr(indices); + auto indexAttr = builder.getI32ArrayAttr(indices); auto elementType = getElementType(composite.getType(), indexAttr, state.location); if (!elementType) { @@ -1386,28 +1383,28 @@ bool spirv::ConstantOp::isBuildableWith(Type type) { } spirv::ConstantOp spirv::ConstantOp::getZero(Type type, Location loc, - OpBuilder *builder) { + OpBuilder &builder) { if (auto intType = type.dyn_cast()) { unsigned width = intType.getWidth(); if (width == 1) - return builder->create(loc, type, - builder->getBoolAttr(false)); - return builder->create( - loc, type, builder->getIntegerAttr(type, APInt(width, 0))); + return builder.create(loc, type, + builder.getBoolAttr(false)); + return builder.create( + loc, type, builder.getIntegerAttr(type, APInt(width, 0))); } llvm_unreachable("unimplemented types for ConstantOp::getZero()"); } spirv::ConstantOp spirv::ConstantOp::getOne(Type type, Location loc, - OpBuilder *builder) { + OpBuilder &builder) { if (auto intType = type.dyn_cast()) { unsigned width = intType.getWidth(); if (width == 1) - return builder->create(loc, type, - builder->getBoolAttr(true)); - return builder->create( - loc, type, builder->getIntegerAttr(type, APInt(width, 1))); + return builder.create(loc, type, + builder.getBoolAttr(true)); + return builder.create( + loc, type, builder.getIntegerAttr(type, APInt(width, 1))); } llvm_unreachable("unimplemented types for ConstantOp::getOne()"); @@ -1417,14 +1414,14 @@ spirv::ConstantOp spirv::ConstantOp::getOne(Type type, Location loc, // spv.EntryPoint //===----------------------------------------------------------------------===// -void spirv::EntryPointOp::build(Builder *builder, OperationState &state, +void spirv::EntryPointOp::build(OpBuilder &builder, OperationState &state, spirv::ExecutionModel executionModel, spirv::FuncOp function, ArrayRef interfaceVars) { build(builder, state, - builder->getI32IntegerAttr(static_cast(executionModel)), - builder->getSymbolRefAttr(function), - builder->getArrayAttr(interfaceVars)); + builder.getI32IntegerAttr(static_cast(executionModel)), + builder.getSymbolRefAttr(function), + builder.getArrayAttr(interfaceVars)); } static ParseResult parseEntryPointOp(OpAsmParser &parser, @@ -1479,13 +1476,13 @@ static LogicalResult verify(spirv::EntryPointOp entryPointOp) { // spv.ExecutionMode //===----------------------------------------------------------------------===// -void spirv::ExecutionModeOp::build(Builder *builder, OperationState &state, +void spirv::ExecutionModeOp::build(OpBuilder &builder, OperationState &state, spirv::FuncOp function, spirv::ExecutionMode executionMode, ArrayRef params) { - build(builder, state, builder->getSymbolRefAttr(function), - builder->getI32IntegerAttr(static_cast(executionMode)), - builder->getI32ArrayAttr(params)); + build(builder, state, builder.getSymbolRefAttr(function), + builder.getI32IntegerAttr(static_cast(executionMode)), + builder.getI32ArrayAttr(params)); } static ParseResult parseExecutionModeOp(OpAsmParser &parser, @@ -1632,16 +1629,15 @@ LogicalResult spirv::FuncOp::verifyBody() { return failure(walkResult.wasInterrupted()); } -void spirv::FuncOp::build(Builder *builder, OperationState &state, +void spirv::FuncOp::build(OpBuilder &builder, OperationState &state, StringRef name, FunctionType type, spirv::FunctionControl control, ArrayRef attrs) { state.addAttribute(SymbolTable::getSymbolAttrName(), - builder->getStringAttr(name)); + builder.getStringAttr(name)); state.addAttribute(getTypeAttrName(), TypeAttr::get(type)); - state.addAttribute( - spirv::attributeName(), - builder->getI32IntegerAttr(static_cast(control))); + state.addAttribute(spirv::attributeName(), + builder.getI32IntegerAttr(static_cast(control))); state.attributes.append(attrs.begin(), attrs.end()); state.addRegion(); } @@ -1725,27 +1721,27 @@ Operation::operand_range spirv::FunctionCallOp::getArgOperands() { // spv.globalVariable //===----------------------------------------------------------------------===// -void spirv::GlobalVariableOp::build(Builder *builder, OperationState &state, +void spirv::GlobalVariableOp::build(OpBuilder &builder, OperationState &state, Type type, StringRef name, unsigned descriptorSet, unsigned binding) { - build(builder, state, TypeAttr::get(type), builder->getStringAttr(name), + build(builder, state, TypeAttr::get(type), builder.getStringAttr(name), nullptr); state.addAttribute( spirv::SPIRVDialect::getAttributeName(spirv::Decoration::DescriptorSet), - builder->getI32IntegerAttr(descriptorSet)); + builder.getI32IntegerAttr(descriptorSet)); state.addAttribute( spirv::SPIRVDialect::getAttributeName(spirv::Decoration::Binding), - builder->getI32IntegerAttr(binding)); + builder.getI32IntegerAttr(binding)); } -void spirv::GlobalVariableOp::build(Builder *builder, OperationState &state, +void spirv::GlobalVariableOp::build(OpBuilder &builder, OperationState &state, Type type, StringRef name, spirv::BuiltIn builtin) { - build(builder, state, TypeAttr::get(type), builder->getStringAttr(name), + build(builder, state, TypeAttr::get(type), builder.getStringAttr(name), nullptr); state.addAttribute( spirv::SPIRVDialect::getAttributeName(spirv::Decoration::BuiltIn), - builder->getStringAttr(spirv::stringifyBuiltIn(builtin))); + builder.getStringAttr(spirv::stringifyBuiltIn(builtin))); } static ParseResult parseGlobalVariableOp(OpAsmParser &parser, @@ -1849,10 +1845,10 @@ static LogicalResult verify(spirv::GroupNonUniformBallotOp ballotOp) { // spv.GroupNonUniformElectOp //===----------------------------------------------------------------------===// -void spirv::GroupNonUniformElectOp::build(Builder *builder, +void spirv::GroupNonUniformElectOp::build(OpBuilder &builder, OperationState &state, spirv::Scope scope) { - build(builder, state, builder->getI1Type(), scope); + build(builder, state, builder.getI1Type(), scope); } static LogicalResult verify(spirv::GroupNonUniformElectOp groupOp) { @@ -1868,7 +1864,7 @@ static LogicalResult verify(spirv::GroupNonUniformElectOp groupOp) { // spv.LoadOp //===----------------------------------------------------------------------===// -void spirv::LoadOp::build(Builder *builder, OperationState &state, +void spirv::LoadOp::build(OpBuilder &builder, OperationState &state, Value basePtr, IntegerAttr memory_access, IntegerAttr alignment) { auto ptrType = basePtr.getType().cast(); @@ -1926,9 +1922,9 @@ static LogicalResult verify(spirv::LoadOp loadOp) { // spv.loop //===----------------------------------------------------------------------===// -void spirv::LoopOp::build(Builder *builder, OperationState &state) { +void spirv::LoopOp::build(OpBuilder &builder, OperationState &state) { state.addAttribute("loop_control", - builder->getI32IntegerAttr( + builder.getI32IntegerAttr( static_cast(spirv::LoopControl::None))); state.addRegion(); } @@ -2104,19 +2100,19 @@ static LogicalResult verify(spirv::MergeOp mergeOp) { // spv.module //===----------------------------------------------------------------------===// -void spirv::ModuleOp::build(Builder *builder, OperationState &state) { - ensureTerminator(*state.addRegion(), *builder, state.location); +void spirv::ModuleOp::build(OpBuilder &builder, OperationState &state) { + ensureTerminator(*state.addRegion(), builder, state.location); } -void spirv::ModuleOp::build(Builder *builder, OperationState &state, +void spirv::ModuleOp::build(OpBuilder &builder, OperationState &state, spirv::AddressingModel addressing_model, spirv::MemoryModel memory_model) { state.addAttribute( "addressing_model", - builder->getI32IntegerAttr(static_cast(addressing_model))); - state.addAttribute("memory_model", builder->getI32IntegerAttr( + builder.getI32IntegerAttr(static_cast(addressing_model))); + state.addAttribute("memory_model", builder.getI32IntegerAttr( static_cast(memory_model))); - ensureTerminator(*state.addRegion(), *builder, state.location); + ensureTerminator(*state.addRegion(), builder, state.location); } static ParseResult parseModuleOp(OpAsmParser &parser, OperationState &state) { @@ -2272,8 +2268,8 @@ static LogicalResult verify(spirv::ReturnValueOp retValOp) { // spv.Select //===----------------------------------------------------------------------===// -void spirv::SelectOp::build(Builder *builder, OperationState &state, Value cond, - Value trueValue, Value falseValue) { +void spirv::SelectOp::build(OpBuilder &builder, OperationState &state, + Value cond, Value trueValue, Value falseValue) { build(builder, state, trueValue.getType(), cond, trueValue, falseValue); } @@ -2381,10 +2377,10 @@ void spirv::SelectionOp::addMergeBlock() { spirv::SelectionOp spirv::SelectionOp::createIfThen( Location loc, Value condition, - function_ref thenBody, OpBuilder *builder) { - auto selectionControl = builder->getI32IntegerAttr( + function_ref thenBody, OpBuilder &builder) { + auto selectionControl = builder.getI32IntegerAttr( static_cast(spirv::SelectionControl::None)); - auto selectionOp = builder->create(loc, selectionControl); + auto selectionOp = builder.create(loc, selectionControl); selectionOp.addMergeBlock(); Block *mergeBlock = selectionOp.getMergeBlock(); @@ -2392,17 +2388,17 @@ spirv::SelectionOp spirv::SelectionOp::createIfThen( // Build the "then" block. { - OpBuilder::InsertionGuard guard(*builder); - thenBlock = builder->createBlock(mergeBlock); + OpBuilder::InsertionGuard guard(builder); + thenBlock = builder.createBlock(mergeBlock); thenBody(builder); - builder->create(loc, mergeBlock); + builder.create(loc, mergeBlock); } // Build the header block. { - OpBuilder::InsertionGuard guard(*builder); - builder->createBlock(thenBlock); - builder->create( + OpBuilder::InsertionGuard guard(builder); + builder.createBlock(thenBlock); + builder.create( loc, condition, thenBlock, /*trueArguments=*/ArrayRef(), mergeBlock, /*falseArguments=*/ArrayRef()); diff --git a/mlir/lib/Dialect/SPIRV/Serialization/CMakeLists.txt b/mlir/lib/Dialect/SPIRV/Serialization/CMakeLists.txt index 8f24491f47158..c04f801321ea2 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/CMakeLists.txt +++ b/mlir/lib/Dialect/SPIRV/Serialization/CMakeLists.txt @@ -9,9 +9,8 @@ add_mlir_dialect_library(MLIRSPIRVSerialization DEPENDS MLIRSPIRVSerializationGen - ) -target_link_libraries(MLIRSPIRVSerialization - PUBLIC + + LINK_LIBS PUBLIC MLIRIR MLIRSPIRV MLIRSupport diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp index 9ea6ac02be0ac..b46e17a8c6ab2 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp +++ b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp @@ -68,6 +68,16 @@ struct BlockMergeInfo { : mergeBlock(m), continueBlock(c) {} }; +/// A struct for containing OpLine instruction information. +struct DebugLine { + uint32_t fileID; + uint32_t line; + uint32_t col; + + DebugLine(uint32_t fileIDNum, uint32_t lineNum, uint32_t colNum) + : fileID(fileIDNum), line(lineNum), col(colNum) {} +}; + /// Map from a selection/loop's header block to its merge (and continue) target. using BlockMergeInfoMap = DenseMap; @@ -232,6 +242,23 @@ class Deserializer { /// Processes a SPIR-V OpConstantNull instruction with the given `operands`. LogicalResult processConstantNull(ArrayRef operands); + //===--------------------------------------------------------------------===// + // Debug + //===--------------------------------------------------------------------===// + + /// Discontinues any source-level location information that might be active + /// from a previous OpLine instruction. + LogicalResult clearDebugLine(); + + /// Creates a FileLineColLoc with the OpLine location information. + Location createFileLineColLoc(OpBuilder opBuilder); + + /// Processes a SPIR-V OpLine instruction with the given `operands`. + LogicalResult processDebugLine(ArrayRef operands); + + /// Processes a SPIR-V OpString instruction with the given `operands`. + LogicalResult processDebugString(ArrayRef operands); + //===--------------------------------------------------------------------===// // Control flow //===--------------------------------------------------------------------===// @@ -376,6 +403,10 @@ class Deserializer { /// The SPIR-V binary module. ArrayRef binary; + /// Contains the data of the OpLine instruction which precedes the current + /// processing instruction. + llvm::Optional debugLine; + /// The current word offset into the binary module. unsigned curOffset = 0; @@ -444,8 +475,11 @@ class Deserializer { // Result to name mapping. DenseMap nameMap; + // Result to debug info mapping. + DenseMap debugInfoMap; + // Result to decorations mapping. - DenseMap decorations; + DenseMap decorations; // Result to type decorations. DenseMap typeDecorations; @@ -521,9 +555,9 @@ Optional Deserializer::collect() { return module; } //===----------------------------------------------------------------------===// spirv::ModuleOp Deserializer::createModuleOp() { - Builder builder(context); + OpBuilder builder(context); OperationState state(unknownLoc, spirv::ModuleOp::getOperationName()); - spirv::ModuleOp::build(&builder, state); + spirv::ModuleOp::build(builder, state); return cast(Operation::create(state)); } @@ -1506,6 +1540,7 @@ LogicalResult Deserializer::processBranch(ArrayRef operands) { auto *target = getOrCreateBlock(operands[0]); opBuilder.create(unknownLoc, target); + clearDebugLine(); return success(); } @@ -1536,6 +1571,7 @@ Deserializer::processBranchConditional(ArrayRef operands) { /*trueArguments=*/ArrayRef(), falseBlock, /*falseArguments=*/ArrayRef(), weights); + clearDebugLine(); return success(); } @@ -1994,6 +2030,57 @@ LogicalResult Deserializer::structurizeControlFlow() { return success(); } +//===----------------------------------------------------------------------===// +// Debug +//===----------------------------------------------------------------------===// + +Location Deserializer::createFileLineColLoc(OpBuilder opBuilder) { + if (!debugLine) + return unknownLoc; + + auto fileName = debugInfoMap.lookup(debugLine->fileID).str(); + if (fileName.empty()) + fileName = ""; + return opBuilder.getFileLineColLoc(opBuilder.getIdentifier(fileName), + debugLine->line, debugLine->col); +} + +LogicalResult Deserializer::processDebugLine(ArrayRef operands) { + // According to SPIR-V spec: + // "This location information applies to the instructions physically + // following this instruction, up to the first occurrence of any of the + // following: the next end of block, the next OpLine instruction, or the next + // OpNoLine instruction." + if (operands.size() != 3) + return emitError(unknownLoc, "OpLine must have 3 operands"); + debugLine = DebugLine(operands[0], operands[1], operands[2]); + return success(); +} + +LogicalResult Deserializer::clearDebugLine() { + debugLine = llvm::None; + return success(); +} + +LogicalResult Deserializer::processDebugString(ArrayRef operands) { + if (operands.size() < 2) + return emitError(unknownLoc, "OpString needs at least 2 operands"); + + if (!debugInfoMap.lookup(operands[0]).empty()) + return emitError(unknownLoc, + "duplicate debug string found for result ") + << operands[0]; + + unsigned wordIndex = 1; + StringRef debugString = decodeStringLiteral(operands, wordIndex); + if (wordIndex != operands.size()) + return emitError(unknownLoc, + "unexpected trailing words in OpString instruction"); + + debugInfoMap[operands[0]] = debugString; + return success(); +} + //===----------------------------------------------------------------------===// // Instruction //===----------------------------------------------------------------------===// @@ -2085,10 +2172,15 @@ LogicalResult Deserializer::processInstruction(spirv::Opcode opcode, return processGlobalVariable(operands); } break; + case spirv::Opcode::OpLine: + return processDebugLine(operands); + case spirv::Opcode::OpNoLine: + return clearDebugLine(); case spirv::Opcode::OpName: return processName(operands); - case spirv::Opcode::OpModuleProcessed: case spirv::Opcode::OpString: + return processDebugString(operands); + case spirv::Opcode::OpModuleProcessed: case spirv::Opcode::OpSource: case spirv::Opcode::OpSourceContinued: case spirv::Opcode::OpSourceExtension: diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp index 9db503ea0bdfa..cc29bae39c1e8 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp +++ b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp @@ -41,7 +41,7 @@ static LogicalResult encodeInstructionInto(SmallVectorImpl &binary, ArrayRef operands) { uint32_t wordCount = 1 + operands.size(); binary.push_back(spirv::getPrefixedOpcode(wordCount, op)); - binary.append(operands.begin(), operands.end()); + binary.append(operands.begin(), operands.end()); return success(); } @@ -132,7 +132,7 @@ namespace { class Serializer { public: /// Creates a serializer for the given SPIR-V `module`. - explicit Serializer(spirv::ModuleOp module); + explicit Serializer(spirv::ModuleOp module, bool emitDebugInfo = false); /// Serializes the remembered SPIR-V module. LogicalResult serialize(); @@ -189,6 +189,8 @@ class Serializer { void processCapability(); + void processDebugInfo(); + void processExtension(); void processMemoryModel(); @@ -375,6 +377,10 @@ class Serializer { LogicalResult emitDecoration(uint32_t target, spirv::Decoration decoration, ArrayRef params = {}); + /// Emits an OpLine instruction with the given `loc` location information into + /// the given `binary` vector. + LogicalResult emitDebugLine(SmallVectorImpl &binary, Location loc); + private: /// The SPIR-V module to be serialized. spirv::ModuleOp module; @@ -382,6 +388,13 @@ class Serializer { /// An MLIR builder for getting MLIR constructs. mlir::Builder mlirBuilder; + /// A flag which indicates if the debuginfo should be emitted. + bool emitDebugInfo = false; + + /// The of the OpString instruction, which specifies a file name, for + /// use by other debug instructions. + uint32_t fileID = 0; + /// The next available result . uint32_t nextID = 1; @@ -394,7 +407,7 @@ class Serializer { SmallVector memoryModel; SmallVector entryPoints; SmallVector executionModes; - // TODO(antiagainst): debug instructions + SmallVector debug; SmallVector names; SmallVector decorations; SmallVector typesGlobalValues; @@ -482,8 +495,9 @@ class Serializer { }; } // namespace -Serializer::Serializer(spirv::ModuleOp module) - : module(module), mlirBuilder(module.getContext()) {} +Serializer::Serializer(spirv::ModuleOp module, bool emitDebugInfo) + : module(module), mlirBuilder(module.getContext()), + emitDebugInfo(emitDebugInfo) {} LogicalResult Serializer::serialize() { LLVM_DEBUG(llvm::dbgs() << "+++ starting serialization +++\n"); @@ -495,6 +509,7 @@ LogicalResult Serializer::serialize() { processCapability(); processExtension(); processMemoryModel(); + processDebugInfo(); // Iterate over the module body to serialize it. Assumptions are that there is // only one basic block in the moduleOp @@ -525,6 +540,7 @@ void Serializer::collect(SmallVectorImpl &binary) { binary.append(memoryModel.begin(), memoryModel.end()); binary.append(entryPoints.begin(), entryPoints.end()); binary.append(executionModes.begin(), executionModes.end()); + binary.append(debug.begin(), debug.end()); binary.append(names.begin(), names.end()); binary.append(decorations.begin(), decorations.end()); binary.append(typesGlobalValues.begin(), typesGlobalValues.end()); @@ -569,6 +585,19 @@ void Serializer::processCapability() { {static_cast(cap)}); } +void Serializer::processDebugInfo() { + if (!emitDebugInfo) + return; + auto fileLoc = module.getLoc().dyn_cast(); + auto fileName = fileLoc ? fileLoc.getFilename() : ""; + fileID = getNextID(); + SmallVector operands; + operands.push_back(fileID); + spirv::encodeStringLiteralInto(operands, fileName); + encodeInstructionInto(debug, spirv::Opcode::OpString, operands); + // TODO: Encode more debug instructions. +} + void Serializer::processExtension() { llvm::SmallVector extName; for (spirv::Extension ext : module.vce_triple()->getExtensions()) { @@ -1838,13 +1867,26 @@ LogicalResult Serializer::emitDecoration(uint32_t target, return success(); } +LogicalResult Serializer::emitDebugLine(SmallVectorImpl &binary, + Location loc) { + if (!emitDebugInfo) + return success(); + + auto fileLoc = loc.dyn_cast(); + if (fileLoc) + encodeInstructionInto(binary, spirv::Opcode::OpLine, + {fileID, fileLoc.getLine(), fileLoc.getColumn()}); + return success(); +} + LogicalResult spirv::serialize(spirv::ModuleOp module, - SmallVectorImpl &binary) { + SmallVectorImpl &binary, + bool emitDebugInfo) { if (!module.vce_triple().hasValue()) return module.emitError( "module must have 'vce_triple' attribute to be serializeable"); - Serializer serializer(module); + Serializer serializer(module, emitDebugInfo); if (failed(serializer.serialize())) return failure(); diff --git a/mlir/lib/Dialect/SPIRV/Serialization/TranslateRegistration.cpp b/mlir/lib/Dialect/SPIRV/Serialization/TranslateRegistration.cpp index 4850be5ceb488..4c3fb1e8d422f 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/TranslateRegistration.cpp +++ b/mlir/lib/Dialect/SPIRV/Serialization/TranslateRegistration.cpp @@ -61,7 +61,7 @@ static OwningModuleRef deserializeModule(const llvm::MemoryBuffer *input, } namespace mlir { -void registerToSPIRVTranslation() { +void registerFromSPIRVTranslation() { TranslateToMLIRRegistration fromBinary( "deserialize-spirv", [](llvm::SourceMgr &sourceMgr, MLIRContext *context) { @@ -91,7 +91,8 @@ static LogicalResult serializeModule(ModuleOp module, raw_ostream &output) { if (spirvModules.size() != 1) return module.emitError("found more than one 'spv.module' op"); - if (failed(spirv::serialize(spirvModules[0], binary))) + if (failed( + spirv::serialize(spirvModules[0], binary, /*emitDebuginfo=*/false))) return failure(); output.write(reinterpret_cast(binary.data()), @@ -101,7 +102,7 @@ static LogicalResult serializeModule(ModuleOp module, raw_ostream &output) { } namespace mlir { -void registerFromSPIRVTranslation() { +void registerToSPIRVTranslation() { TranslateFromMLIRRegistration toBinary( "serialize-spirv", [](ModuleOp module, raw_ostream &output) { return serializeModule(module, output); @@ -114,7 +115,7 @@ void registerFromSPIRVTranslation() { //===----------------------------------------------------------------------===// static LogicalResult roundTripModule(llvm::SourceMgr &sourceMgr, - raw_ostream &output, + bool emitDebugInfo, raw_ostream &output, MLIRContext *context) { // Parse an MLIR module from the source manager. auto srcModule = OwningModuleRef(parseSourceFile(sourceMgr, context)); @@ -131,7 +132,7 @@ static LogicalResult roundTripModule(llvm::SourceMgr &sourceMgr, if (std::next(spirvModules.begin()) != spirvModules.end()) return srcModule->emitError("found more than one 'spv.module' op"); - if (failed(spirv::serialize(*spirvModules.begin(), binary))) + if (failed(spirv::serialize(*spirvModules.begin(), binary, emitDebugInfo))) return failure(); // Then deserialize to get back a SPIR-V module. @@ -153,7 +154,18 @@ void registerTestRoundtripSPIRV() { TranslateRegistration roundtrip( "test-spirv-roundtrip", [](llvm::SourceMgr &sourceMgr, raw_ostream &output, MLIRContext *context) { - return roundTripModule(sourceMgr, output, context); + return roundTripModule(sourceMgr, /*emitDebugInfo=*/false, output, + context); + }); +} + +void registerTestRoundtripDebugSPIRV() { + TranslateRegistration roundtrip( + "test-spirv-roundtrip-debug", + [](llvm::SourceMgr &sourceMgr, raw_ostream &output, + MLIRContext *context) { + return roundTripModule(sourceMgr, /*emitDebugInfo=*/true, output, + context); }); } } // namespace mlir diff --git a/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt index e388069d52eea..632194f213d69 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt @@ -8,10 +8,8 @@ add_mlir_dialect_library(MLIRSPIRVTransforms DEPENDS MLIRSPIRVPassIncGen - ) -target_link_libraries(MLIRSPIRVTransforms - PUBLIC + LINK_LIBS PUBLIC MLIRPass MLIRSPIRV ) diff --git a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp index daee70976ac2c..139b6bc093d0a 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp @@ -194,7 +194,7 @@ LogicalResult ProcessInterfaceVarABI::matchAndRewrite( if (argType.value().cast().isScalarOrVector()) { auto indexType = SPIRVTypeConverter::getIndexType(funcOp.getContext()); auto zero = - spirv::ConstantOp::getZero(indexType, funcOp.getLoc(), &rewriter); + spirv::ConstantOp::getZero(indexType, funcOp.getLoc(), rewriter); auto loadPtr = rewriter.create( funcOp.getLoc(), replacement, zero.constant()); replacement = rewriter.create(funcOp.getLoc(), loadPtr); diff --git a/mlir/lib/Dialect/Shape/CMakeLists.txt b/mlir/lib/Dialect/Shape/CMakeLists.txt index 4ed02acc3d465..1f198f7b81489 100644 --- a/mlir/lib/Dialect/Shape/CMakeLists.txt +++ b/mlir/lib/Dialect/Shape/CMakeLists.txt @@ -6,12 +6,10 @@ add_mlir_dialect_library(MLIRShape DEPENDS MLIRShapeOpsIncGen - ) -target_link_libraries(MLIRShape - PUBLIC + + LINK_LIBS PUBLIC MLIRDialect MLIRInferTypeOpInterface MLIRIR MLIRSideEffects - LLVMSupport ) diff --git a/mlir/lib/Dialect/StandardOps/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/CMakeLists.txt index 471674c0ca764..a2f496c7ab93f 100644 --- a/mlir/lib/Dialect/StandardOps/CMakeLists.txt +++ b/mlir/lib/Dialect/StandardOps/CMakeLists.txt @@ -8,14 +8,14 @@ add_mlir_dialect_library(MLIRStandardOps DEPENDS MLIRStandardOpsIncGen - ) -target_link_libraries(MLIRStandardOps - PUBLIC + + LINK_LIBS PUBLIC MLIRCallInterfaces MLIRControlFlowInterfaces MLIREDSC MLIRIR MLIRSideEffects MLIRViewLikeInterface - LLVMSupport ) + +add_subdirectory(Transforms) diff --git a/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp b/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp index 612235d27f9ff..30a1a8ca30abc 100644 --- a/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp @@ -30,7 +30,7 @@ static SmallVector getMemRefSizes(Value memRef) { return res; } -mlir::edsc::MemRefBoundsCapture::MemRefBoundsCapture(Value v) : base(v) { +mlir::edsc::MemRefBoundsCapture::MemRefBoundsCapture(Value v) { auto memrefSizeValues = getMemRefSizes(v); for (auto s : memrefSizeValues) { lbs.push_back(std_constant_index(0)); @@ -39,11 +39,13 @@ mlir::edsc::MemRefBoundsCapture::MemRefBoundsCapture(Value v) : base(v) { } } -mlir::edsc::VectorBoundsCapture::VectorBoundsCapture(Value v) : base(v) { - auto vectorType = v.getType().cast(); - for (auto s : vectorType.getShape()) { +mlir::edsc::VectorBoundsCapture::VectorBoundsCapture(VectorType t) { + for (auto s : t.getShape()) { lbs.push_back(std_constant_index(0)); ubs.push_back(std_constant_index(s)); steps.push_back(1); } } + +mlir::edsc::VectorBoundsCapture::VectorBoundsCapture(Value v) + : VectorBoundsCapture(v.getType().cast()) {} diff --git a/mlir/lib/Dialect/StandardOps/EDSC/Intrinsics.cpp b/mlir/lib/Dialect/StandardOps/EDSC/Intrinsics.cpp index cd8ad74ed4fd1..614dfa926a0cd 100644 --- a/mlir/lib/Dialect/StandardOps/EDSC/Intrinsics.cpp +++ b/mlir/lib/Dialect/StandardOps/EDSC/Intrinsics.cpp @@ -12,47 +12,43 @@ using namespace mlir; using namespace mlir::edsc; -OperationHandle mlir::edsc::intrinsics::std_br(BlockHandle bh, - ArrayRef operands) { +BranchOp mlir::edsc::intrinsics::std_br(BlockHandle bh, ValueRange operands) { assert(bh && "Expected already captured BlockHandle"); - for (auto &o : operands) { - (void)o; - assert(o && "Expected already captured Value"); - } SmallVector ops(operands.begin(), operands.end()); - return OperationHandle::create(bh.getBlock(), ops); + return OperationBuilder(bh.getBlock(), ops); } -OperationHandle mlir::edsc::intrinsics::std_br(BlockHandle *bh, - ArrayRef types, - MutableArrayRef captures, - ArrayRef operands) { +BranchOp mlir::edsc::intrinsics::std_br(BlockHandle *bh, ArrayRef types, + MutableArrayRef captures, + ValueRange operands) { assert(!*bh && "Unexpected already captured BlockHandle"); BlockBuilder(bh, types, captures)(/* no body */); SmallVector ops(operands.begin(), operands.end()); - return OperationHandle::create(bh->getBlock(), ops); + return OperationBuilder(bh->getBlock(), ops); } -OperationHandle mlir::edsc::intrinsics::std_cond_br( - Value cond, BlockHandle trueBranch, ArrayRef trueOperands, - BlockHandle falseBranch, ArrayRef falseOperands) { +CondBranchOp mlir::edsc::intrinsics::std_cond_br(Value cond, + BlockHandle trueBranch, + ValueRange trueOperands, + BlockHandle falseBranch, + ValueRange falseOperands) { SmallVector trueOps(trueOperands.begin(), trueOperands.end()); SmallVector falseOps(falseOperands.begin(), falseOperands.end()); - return OperationHandle::create( - cond, trueBranch.getBlock(), trueOps, falseBranch.getBlock(), falseOps); + return OperationBuilder(cond, trueBranch.getBlock(), trueOps, + falseBranch.getBlock(), falseOps); } -OperationHandle mlir::edsc::intrinsics::std_cond_br( +CondBranchOp mlir::edsc::intrinsics::std_cond_br( Value cond, BlockHandle *trueBranch, ArrayRef trueTypes, - MutableArrayRef trueCaptures, ArrayRef trueOperands, + MutableArrayRef trueCaptures, ValueRange trueOperands, BlockHandle *falseBranch, ArrayRef falseTypes, - MutableArrayRef falseCaptures, ArrayRef falseOperands) { + MutableArrayRef falseCaptures, ValueRange falseOperands) { assert(!*trueBranch && "Unexpected already captured BlockHandle"); assert(!*falseBranch && "Unexpected already captured BlockHandle"); BlockBuilder(trueBranch, trueTypes, trueCaptures)(/* no body */); BlockBuilder(falseBranch, falseTypes, falseCaptures)(/* no body */); SmallVector trueOps(trueOperands.begin(), trueOperands.end()); SmallVector falseOps(falseOperands.begin(), falseOperands.end()); - return OperationHandle::create( - cond, trueBranch->getBlock(), trueOps, falseBranch->getBlock(), falseOps); + return OperationBuilder(cond, trueBranch->getBlock(), trueOps, + falseBranch->getBlock(), falseOps); } diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index b46abd637e36f..972a37d20f97b 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -484,7 +484,7 @@ static LogicalResult verify(AtomicRMWOp op) { // GenericAtomicRMWOp //===----------------------------------------------------------------------===// -void GenericAtomicRMWOp::build(Builder *builder, OperationState &result, +void GenericAtomicRMWOp::build(OpBuilder &builder, OperationState &result, Value memref, ValueRange ivs) { result.addOperands(memref); result.addOperands(ivs); @@ -677,13 +677,12 @@ void BranchOp::getCanonicalizationPatterns(OwningRewritePatternList &results, context); } -Optional BranchOp::getSuccessorOperands(unsigned index) { +Optional +BranchOp::getMutableSuccessorOperands(unsigned index) { assert(index == 0 && "invalid successor index"); - return getOperands(); + return destOperandsMutable(); } -bool BranchOp::canEraseSuccessorOperand() { return true; } - Block *BranchOp::getSuccessorForOperands(ArrayRef) { return dest(); } //===----------------------------------------------------------------------===// @@ -775,13 +774,12 @@ static Type getI1SameShape(Type type) { // CmpIOp //===----------------------------------------------------------------------===// -static void buildCmpIOp(Builder *build, OperationState &result, +static void buildCmpIOp(OpBuilder &build, OperationState &result, CmpIPredicate predicate, Value lhs, Value rhs) { result.addOperands({lhs, rhs}); result.types.push_back(getI1SameShape(lhs.getType())); - result.addAttribute( - CmpIOp::getPredicateAttrName(), - build->getI64IntegerAttr(static_cast(predicate))); + result.addAttribute(CmpIOp::getPredicateAttrName(), + build.getI64IntegerAttr(static_cast(predicate))); } // Compute `lhs` `pred` `rhs`, where `pred` is one of the known integer @@ -830,13 +828,12 @@ OpFoldResult CmpIOp::fold(ArrayRef operands) { // CmpFOp //===----------------------------------------------------------------------===// -static void buildCmpFOp(Builder *build, OperationState &result, +static void buildCmpFOp(OpBuilder &build, OperationState &result, CmpFPredicate predicate, Value lhs, Value rhs) { result.addOperands({lhs, rhs}); result.types.push_back(getI1SameShape(lhs.getType())); - result.addAttribute( - CmpFOp::getPredicateAttrName(), - build->getI64IntegerAttr(static_cast(predicate))); + result.addAttribute(CmpFOp::getPredicateAttrName(), + build.getI64IntegerAttr(static_cast(predicate))); } /// Compute `lhs` `pred` `rhs`, where `pred` is one of the known floating point @@ -1023,13 +1020,13 @@ void CondBranchOp::getCanonicalizationPatterns( SimplifyCondBranchIdenticalSuccessors>(context); } -Optional CondBranchOp::getSuccessorOperands(unsigned index) { +Optional +CondBranchOp::getMutableSuccessorOperands(unsigned index) { assert(index < getNumSuccessors() && "invalid successor index"); - return index == trueIndex ? getTrueOperands() : getFalseOperands(); + return index == trueIndex ? trueDestOperandsMutable() + : falseDestOperandsMutable(); } -bool CondBranchOp::canEraseSuccessorOperand() { return true; } - Block *CondBranchOp::getSuccessorForOperands(ArrayRef operands) { if (BoolAttr condAttr = operands.front().dyn_cast_or_null()) return condAttr.getValue() ? trueDest() : falseDest(); @@ -1180,9 +1177,9 @@ bool ConstantOp::isBuildableWith(Attribute value, Type type) { value.isa(); } -void ConstantFloatOp::build(Builder *builder, OperationState &result, +void ConstantFloatOp::build(OpBuilder &builder, OperationState &result, const APFloat &value, FloatType type) { - ConstantOp::build(builder, result, type, builder->getFloatAttr(type, value)); + ConstantOp::build(builder, result, type, builder.getFloatAttr(type, value)); } bool ConstantFloatOp::classof(Operation *op) { @@ -1195,21 +1192,19 @@ bool ConstantIntOp::classof(Operation *op) { op->getResult(0).getType().isSignlessInteger(); } -void ConstantIntOp::build(Builder *builder, OperationState &result, +void ConstantIntOp::build(OpBuilder &builder, OperationState &result, int64_t value, unsigned width) { - Type type = builder->getIntegerType(width); - ConstantOp::build(builder, result, type, - builder->getIntegerAttr(type, value)); + Type type = builder.getIntegerType(width); + ConstantOp::build(builder, result, type, builder.getIntegerAttr(type, value)); } /// Build a constant int op producing an integer with the specified type, /// which must be an integer type. -void ConstantIntOp::build(Builder *builder, OperationState &result, +void ConstantIntOp::build(OpBuilder &builder, OperationState &result, int64_t value, Type type) { assert(type.isSignlessInteger() && "ConstantIntOp can only have signless integer type"); - ConstantOp::build(builder, result, type, - builder->getIntegerAttr(type, value)); + ConstantOp::build(builder, result, type, builder.getIntegerAttr(type, value)); } /// ConstantIndexOp only matches values whose result type is Index. @@ -1217,11 +1212,10 @@ bool ConstantIndexOp::classof(Operation *op) { return ConstantOp::classof(op) && op->getResult(0).getType().isIndex(); } -void ConstantIndexOp::build(Builder *builder, OperationState &result, +void ConstantIndexOp::build(OpBuilder &builder, OperationState &result, int64_t value) { - Type type = builder->getIndexType(); - ConstantOp::build(builder, result, type, - builder->getIntegerAttr(type, value)); + Type type = builder.getIndexType(); + ConstantOp::build(builder, result, type, builder.getIntegerAttr(type, value)); } //===----------------------------------------------------------------------===// @@ -1363,7 +1357,7 @@ OpFoldResult DimOp::fold(ArrayRef operands) { // DmaStartOp // --------------------------------------------------------------------------- -void DmaStartOp::build(Builder *builder, OperationState &result, +void DmaStartOp::build(OpBuilder &builder, OperationState &result, Value srcMemRef, ValueRange srcIndices, Value destMemRef, ValueRange destIndices, Value numElements, Value tagMemRef, ValueRange tagIndices, Value stride, @@ -1450,49 +1444,82 @@ ParseResult DmaStartOp::parse(OpAsmParser &parser, OperationState &result) { parser.resolveOperands(tagIndexInfos, indexType, result.operands)) return failure(); - auto memrefType0 = types[0].dyn_cast(); - if (!memrefType0) - return parser.emitError(parser.getNameLoc(), - "expected source to be of memref type"); - - auto memrefType1 = types[1].dyn_cast(); - if (!memrefType1) - return parser.emitError(parser.getNameLoc(), - "expected destination to be of memref type"); - - auto memrefType2 = types[2].dyn_cast(); - if (!memrefType2) - return parser.emitError(parser.getNameLoc(), - "expected tag to be of memref type"); - if (isStrided) { if (parser.resolveOperands(strideInfo, indexType, result.operands)) return failure(); } - // Check that source/destination index list size matches associated rank. - if (static_cast(srcIndexInfos.size()) != memrefType0.getRank() || - static_cast(dstIndexInfos.size()) != memrefType1.getRank()) - return parser.emitError(parser.getNameLoc(), - "memref rank not equal to indices count"); - if (static_cast(tagIndexInfos.size()) != memrefType2.getRank()) - return parser.emitError(parser.getNameLoc(), - "tag memref rank not equal to indices count"); return success(); } LogicalResult DmaStartOp::verify() { + unsigned numOperands = getNumOperands(); + + // Mandatory non-variadic operands are: src memref, dst memref, tag memref and + // the number of elements. + if (numOperands < 4) + return emitOpError("expected at least 4 operands"); + + // Check types of operands. The order of these calls is important: the later + // calls rely on some type properties to compute the operand position. + // 1. Source memref. + if (!getSrcMemRef().getType().isa()) + return emitOpError("expected source to be of memref type"); + if (numOperands < getSrcMemRefRank() + 4) + return emitOpError() << "expected at least " << getSrcMemRefRank() + 4 + << " operands"; + if (!getSrcIndices().empty() && + !llvm::all_of(getSrcIndices().getTypes(), + [](Type t) { return t.isIndex(); })) + return emitOpError("expected source indices to be of index type"); + + // 2. Destination memref. + if (!getDstMemRef().getType().isa()) + return emitOpError("expected destination to be of memref type"); + unsigned numExpectedOperands = getSrcMemRefRank() + getDstMemRefRank() + 4; + if (numOperands < numExpectedOperands) + return emitOpError() << "expected at least " << numExpectedOperands + << " operands"; + if (!getDstIndices().empty() && + !llvm::all_of(getDstIndices().getTypes(), + [](Type t) { return t.isIndex(); })) + return emitOpError("expected destination indices to be of index type"); + + // 3. Number of elements. + if (!getNumElements().getType().isIndex()) + return emitOpError("expected num elements to be of index type"); + + // 4. Tag memref. + if (!getTagMemRef().getType().isa()) + return emitOpError("expected tag to be of memref type"); + numExpectedOperands += getTagMemRefRank(); + if (numOperands < numExpectedOperands) + return emitOpError() << "expected at least " << numExpectedOperands + << " operands"; + if (!getTagIndices().empty() && + !llvm::all_of(getTagIndices().getTypes(), + [](Type t) { return t.isIndex(); })) + return emitOpError("expected tag indices to be of index type"); + // DMAs from different memory spaces supported. if (getSrcMemorySpace() == getDstMemorySpace()) return emitOpError("DMA should be between different memory spaces"); - if (getNumOperands() != getTagMemRefRank() + getSrcMemRefRank() + - getDstMemRefRank() + 3 + 1 && - getNumOperands() != getTagMemRefRank() + getSrcMemRefRank() + - getDstMemRefRank() + 3 + 1 + 2) { + // Optional stride-related operands must be either both present or both + // absent. + if (numOperands != numExpectedOperands && + numOperands != numExpectedOperands + 2) return emitOpError("incorrect number of operands"); + + // 5. Strides. + if (isStrided()) { + if (!getStride().getType().isIndex() || + !getNumElementsPerStride().getType().isIndex()) + return emitOpError( + "expected stride and num elements per stride to be of type index"); } + return success(); } @@ -1506,8 +1533,9 @@ LogicalResult DmaStartOp::fold(ArrayRef cstOperands, // DmaWaitOp // --------------------------------------------------------------------------- -void DmaWaitOp::build(Builder *builder, OperationState &result, Value tagMemRef, - ValueRange tagIndices, Value numElements) { +void DmaWaitOp::build(OpBuilder &builder, OperationState &result, + Value tagMemRef, ValueRange tagIndices, + Value numElements) { result.addOperands(tagMemRef); result.addOperands(tagIndices); result.addOperands(numElements); @@ -1541,15 +1569,6 @@ ParseResult DmaWaitOp::parse(OpAsmParser &parser, OperationState &result) { parser.resolveOperand(numElementsInfo, indexType, result.operands)) return failure(); - auto memrefType = type.dyn_cast(); - if (!memrefType) - return parser.emitError(parser.getNameLoc(), - "expected tag to be of memref type"); - - if (static_cast(tagIndexInfos.size()) != memrefType.getRank()) - return parser.emitError(parser.getNameLoc(), - "tag memref rank not equal to indices count"); - return success(); } @@ -1559,6 +1578,32 @@ LogicalResult DmaWaitOp::fold(ArrayRef cstOperands, return foldMemRefCast(*this); } +LogicalResult DmaWaitOp::verify() { + // Mandatory non-variadic operands are tag and the number of elements. + if (getNumOperands() < 2) + return emitOpError() << "expected at least 2 operands"; + + // Check types of operands. The order of these calls is important: the later + // calls rely on some type properties to compute the operand position. + if (!getTagMemRef().getType().isa()) + return emitOpError() << "expected tag to be of memref type"; + + if (getNumOperands() != 2 + getTagMemRefRank()) + return emitOpError() << "expected " << 2 + getTagMemRefRank() + << " operands"; + + if (!getTagIndices().empty() && + !llvm::all_of(getTagIndices().getTypes(), + [](Type t) { return t.isIndex(); })) + return emitOpError() << "expected tag indices to be of index type"; + + if (!getNumElements().getType().isIndex()) + return emitOpError() + << "expected the number of elements to be of index type"; + + return success(); +} + //===----------------------------------------------------------------------===// // ExtractElementOp //===----------------------------------------------------------------------===// @@ -2009,6 +2054,16 @@ OpFoldResult SignedDivIOp::fold(ArrayRef operands) { } return a.sdiv_ov(b, overflowOrDiv0); }); + + // Fold out division by one. Assumes all tensors of all ones are splats. + if (auto rhs = operands[1].dyn_cast_or_null()) { + if (rhs.getValue() == 1) + return lhs(); + } else if (auto rhs = operands[1].dyn_cast_or_null()) { + if (rhs.getSplatValue().getValue() == 1) + return lhs(); + } + return overflowOrDiv0 ? Attribute() : result; } @@ -2147,7 +2202,7 @@ static Type inferSubViewResultType(MemRefType memRefType) { .setAffineMaps(stridedLayout); } -void mlir::SubViewOp::build(Builder *b, OperationState &result, Value source, +void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, Value source, ValueRange offsets, ValueRange sizes, ValueRange strides, Type resultType, ArrayRef attrs) { @@ -2157,8 +2212,8 @@ void mlir::SubViewOp::build(Builder *b, OperationState &result, Value source, result.addAttributes(attrs); } -void mlir::SubViewOp::build(Builder *b, OperationState &result, Type resultType, - Value source) { +void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, + Type resultType, Value source) { build(b, result, source, /*offsets=*/{}, /*sizes=*/{}, /*strides=*/{}, resultType); } @@ -2537,6 +2592,16 @@ OpFoldResult UnsignedDivIOp::fold(ArrayRef operands) { } return a.udiv(b); }); + + // Fold out division by one. Assumes all tensors of all ones are splats. + if (auto rhs = operands[1].dyn_cast_or_null()) { + if (rhs.getValue() == 1) + return lhs(); + } else if (auto rhs = operands[1].dyn_cast_or_null()) { + if (rhs.getSplatValue().getValue() == 1) + return lhs(); + } + return div0 ? Attribute() : result; } diff --git a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt new file mode 100644 index 0000000000000..5db3c370954c7 --- /dev/null +++ b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt @@ -0,0 +1,18 @@ +add_mlir_dialect_library(MLIRStandardOpsTransforms + ExpandAtomic.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/StandardOps/Transforms + + DEPENDS + MLIRStandardTransformsIncGen + ) +target_link_libraries(MLIRStandardOpsTransforms + PUBLIC + MLIRIR + MLIRPass + MLIRStandardOps + MLIRSupport + MLIRTransforms + LLVMSupport + ) diff --git a/mlir/lib/Dialect/StandardOps/Transforms/ExpandAtomic.cpp b/mlir/lib/Dialect/StandardOps/Transforms/ExpandAtomic.cpp new file mode 100644 index 0000000000000..41e0ffb60bc95 --- /dev/null +++ b/mlir/lib/Dialect/StandardOps/Transforms/ExpandAtomic.cpp @@ -0,0 +1,93 @@ +//===- ExpandAtomic.cpp - Code to perform loop fusion ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements expansion of AtomicRMWOp into GenericAtomicRMWOp. +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/StandardOps/Transforms/Passes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +namespace { + +/// Converts `atomic_rmw` that cannot be lowered to a simple atomic op with +/// AtomicRMWOpLowering pattern, e.g. with "minf" or "maxf" attributes, to +/// `generic_atomic_rmw` with the expanded code. +/// +/// %x = atomic_rmw "maxf" %fval, %F[%i] : (f32, memref<10xf32>) -> f32 +/// +/// will be lowered to +/// +/// %x = std.generic_atomic_rmw %F[%i] : memref<10xf32> { +/// ^bb0(%current: f32): +/// %cmp = cmpf "ogt", %current, %fval : f32 +/// %new_value = select %cmp, %current, %fval : f32 +/// atomic_yield %new_value : f32 +/// } +struct AtomicRMWOpConverter : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(AtomicRMWOp op, + PatternRewriter &rewriter) const final { + CmpFPredicate predicate; + switch (op.kind()) { + case AtomicRMWKind::maxf: + predicate = CmpFPredicate::OGT; + break; + case AtomicRMWKind::minf: + predicate = CmpFPredicate::OLT; + break; + default: + return failure(); + } + + auto loc = op.getLoc(); + auto genericOp = + rewriter.create(loc, op.memref(), op.indices()); + OpBuilder bodyBuilder = OpBuilder::atBlockEnd(genericOp.getBody()); + + Value lhs = genericOp.getCurrentValue(); + Value rhs = op.value(); + Value cmp = bodyBuilder.create(loc, predicate, lhs, rhs); + Value select = bodyBuilder.create(loc, cmp, lhs, rhs); + bodyBuilder.create(loc, select); + + rewriter.replaceOp(op, genericOp.getResult()); + return success(); + } +}; + +struct ExpandAtomic : public ExpandAtomicBase { + void runOnFunction() override { + OwningRewritePatternList patterns; + patterns.insert(&getContext()); + + ConversionTarget target(getContext()); + target.addLegalOp(); + target.addDynamicallyLegalOp([](AtomicRMWOp op) { + return op.kind() != AtomicRMWKind::maxf && + op.kind() != AtomicRMWKind::minf; + }); + if (failed(mlir::applyPartialConversion(getFunction(), target, patterns))) + signalPassFailure(); + } +}; + +} // namespace + +std::unique_ptr mlir::createExpandAtomicPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h b/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h new file mode 100644 index 0000000000000..4748bf83ab997 --- /dev/null +++ b/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h @@ -0,0 +1,23 @@ +//===- PassDetail.h - GPU Pass class details --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef DIALECT_STANDARD_TRANSFORMS_PASSDETAIL_H_ +#define DIALECT_STANDARD_TRANSFORMS_PASSDETAIL_H_ + +#include "mlir/Pass/Pass.h" + +namespace mlir { + +class AtomicRMWOp; + +#define GEN_PASS_CLASSES +#include "mlir/Dialect/StandardOps/Transforms/Passes.h.inc" + +} // end namespace mlir + +#endif // DIALECT_STANDARD_TRANSFORMS_PASSDETAIL_H_ diff --git a/mlir/lib/Dialect/Vector/CMakeLists.txt b/mlir/lib/Dialect/Vector/CMakeLists.txt index 3e1d8de0d3ba4..9ccbb76b7bd24 100644 --- a/mlir/lib/Dialect/Vector/CMakeLists.txt +++ b/mlir/lib/Dialect/Vector/CMakeLists.txt @@ -10,9 +10,8 @@ add_mlir_dialect_library(MLIRVector DEPENDS MLIRVectorOpsIncGen MLIRVectorTransformPatternsIncGen - ) -target_link_libraries(MLIRVector - PUBLIC + + LINK_LIBS PUBLIC MLIREDSC MLIRIR MLIRStandardOps diff --git a/mlir/lib/Dialect/Vector/EDSC/Builders.cpp b/mlir/lib/Dialect/Vector/EDSC/Builders.cpp index 0759f93edaeca..7d6d2b9432d7c 100644 --- a/mlir/lib/Dialect/Vector/EDSC/Builders.cpp +++ b/mlir/lib/Dialect/Vector/EDSC/Builders.cpp @@ -10,7 +10,6 @@ #include "mlir/Dialect/Vector/EDSC/Intrinsics.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/EDSC/Builders.h" -#include "mlir/EDSC/Intrinsics.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/Builders.h" diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 40b7f6a691881..5ff858dcc3a24 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -1,4 +1,4 @@ -//===- VectorOps.cpp - MLIR Super Vectorizer Operations -------------------===// +//===- VectorOps.cpp - MLIR Vector Dialect Operations ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -44,8 +44,8 @@ VectorDialect::VectorDialect(MLIRContext *context) /// Materialize a single constant operation from a given attribute value with /// the desired resultant type. Operation *VectorDialect::materializeConstant(OpBuilder &builder, - Attribute value, Type type, - Location loc) { + Attribute value, Type type, + Location loc) { return builder.create(loc, type, value); } @@ -126,20 +126,20 @@ static void print(OpAsmPrinter &p, ReductionOp op) { // ContractionOp //===----------------------------------------------------------------------===// -void vector::ContractionOp::build(Builder *builder, OperationState &result, +void vector::ContractionOp::build(OpBuilder &builder, OperationState &result, Value lhs, Value rhs, Value acc, ArrayRef> indexingExprs, ArrayRef iteratorTypes) { result.addOperands({lhs, rhs, acc}); result.addTypes(acc.getType()); result.addAttribute(getIndexingMapsAttrName(), - builder->getAffineMapArrayAttr( + builder.getAffineMapArrayAttr( AffineMap::inferFromExprList(indexingExprs))); result.addAttribute(getIteratorTypesAttrName(), - builder->getStrArrayAttr(iteratorTypes)); + builder.getStrArrayAttr(iteratorTypes)); } -void vector::ContractionOp::build(Builder *builder, OperationState &result, +void vector::ContractionOp::build(OpBuilder &builder, OperationState &result, Value lhs, Value rhs, Value acc, ArrayAttr indexingMaps, ArrayAttr iteratorTypes) { @@ -461,10 +461,10 @@ static Type inferExtractOpResultType(VectorType vectorType, vectorType.getElementType()); } -void vector::ExtractOp::build(Builder *builder, OperationState &result, +void vector::ExtractOp::build(OpBuilder &builder, OperationState &result, Value source, ArrayRef position) { result.addOperands(source); - auto positionAttr = getVectorSubscriptAttr(*builder, position); + auto positionAttr = getVectorSubscriptAttr(builder, position); result.addTypes(inferExtractOpResultType(source.getType().cast(), positionAttr)); result.addAttribute(getPositionAttrName(), positionAttr); @@ -528,13 +528,13 @@ static LogicalResult verify(vector::ExtractOp op) { // ExtractSlicesOp //===----------------------------------------------------------------------===// -void ExtractSlicesOp::build(Builder *builder, OperationState &result, +void ExtractSlicesOp::build(OpBuilder &builder, OperationState &result, TupleType tupleType, Value vector, ArrayRef sizes, ArrayRef strides) { result.addOperands(vector); - auto sizesAttr = getVectorSubscriptAttr(*builder, sizes); - auto stridesAttr = getVectorSubscriptAttr(*builder, strides); + auto sizesAttr = getVectorSubscriptAttr(builder, sizes); + auto stridesAttr = getVectorSubscriptAttr(builder, strides); result.addTypes(tupleType); result.addAttribute(getSizesAttrName(), sizesAttr); result.addAttribute(getStridesAttrName(), stridesAttr); @@ -634,10 +634,10 @@ static LogicalResult verify(BroadcastOp op) { // ShuffleOp //===----------------------------------------------------------------------===// -void ShuffleOp::build(Builder *builder, OperationState &result, Value v1, +void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value v1, Value v2, ArrayRef mask) { result.addOperands({v1, v2}); - auto maskAttr = getVectorSubscriptAttr(*builder, mask); + auto maskAttr = getVectorSubscriptAttr(builder, mask); result.addTypes(v1.getType()); result.addAttribute(getMaskAttrName(), maskAttr); } @@ -731,10 +731,10 @@ static LogicalResult verify(InsertElementOp op) { // InsertOp //===----------------------------------------------------------------------===// -void InsertOp::build(Builder *builder, OperationState &result, Value source, +void InsertOp::build(OpBuilder &builder, OperationState &result, Value source, Value dest, ArrayRef position) { result.addOperands({source, dest}); - auto positionAttr = getVectorSubscriptAttr(*builder, position); + auto positionAttr = getVectorSubscriptAttr(builder, position); result.addTypes(dest.getType()); result.addAttribute(getPositionAttrName(), positionAttr); } @@ -795,13 +795,13 @@ void InsertSlicesOp::getStrides(SmallVectorImpl &results) { // InsertStridedSliceOp //===----------------------------------------------------------------------===// -void InsertStridedSliceOp::build(Builder *builder, OperationState &result, +void InsertStridedSliceOp::build(OpBuilder &builder, OperationState &result, Value source, Value dest, ArrayRef offsets, ArrayRef strides) { result.addOperands({source, dest}); - auto offsetsAttr = getVectorSubscriptAttr(*builder, offsets); - auto stridesAttr = getVectorSubscriptAttr(*builder, strides); + auto offsetsAttr = getVectorSubscriptAttr(builder, offsets); + auto stridesAttr = getVectorSubscriptAttr(builder, strides); result.addTypes(dest.getType()); result.addAttribute(getOffsetsAttrName(), offsetsAttr); result.addAttribute(getStridesAttrName(), stridesAttr); @@ -1074,13 +1074,13 @@ static Type inferStridedSliceOpResultType(VectorType vectorType, return VectorType::get(shape, vectorType.getElementType()); } -void StridedSliceOp::build(Builder *builder, OperationState &result, +void StridedSliceOp::build(OpBuilder &builder, OperationState &result, Value source, ArrayRef offsets, ArrayRef sizes, ArrayRef strides) { result.addOperands(source); - auto offsetsAttr = getVectorSubscriptAttr(*builder, offsets); - auto sizesAttr = getVectorSubscriptAttr(*builder, sizes); - auto stridesAttr = getVectorSubscriptAttr(*builder, strides); + auto offsetsAttr = getVectorSubscriptAttr(builder, offsets); + auto sizesAttr = getVectorSubscriptAttr(builder, sizes); + auto stridesAttr = getVectorSubscriptAttr(builder, strides); result.addTypes( inferStridedSliceOpResultType(source.getType().cast(), offsetsAttr, sizesAttr, stridesAttr)); @@ -1462,30 +1462,52 @@ static LogicalResult verify(ShapeCastOp op) { // TypeCastOp //===----------------------------------------------------------------------===// -static MemRefType inferVectorTypeCastResultType(MemRefType t) { - return MemRefType::get({}, VectorType::get(t.getShape(), t.getElementType())); +static SmallVector extractShape(MemRefType memRefType) { + auto vectorType = memRefType.getElementType().dyn_cast(); + SmallVector res(memRefType.getShape().begin(), + memRefType.getShape().end()); + if (vectorType) { + res.reserve(memRefType.getRank() + vectorType.getRank()); + for (auto s : vectorType.getShape()) + res.push_back(s); + } + return res; } -void TypeCastOp::build(Builder *builder, OperationState &result, Value source) { +/// Build the canonical memRefType with a single vector. +/// E.g. memref<4 x 5 x vector<6 x f32>> -> memref>. +void TypeCastOp::build(OpBuilder &builder, OperationState &result, + Value source) { result.addOperands(source); + MemRefType memRefType = source.getType().cast(); + VectorType vectorType = + VectorType::get(extractShape(memRefType), + getElementTypeOrSelf(getElementTypeOrSelf(memRefType))); result.addTypes( - inferVectorTypeCastResultType(source.getType().cast())); -} - -static void print(OpAsmPrinter &p, TypeCastOp op) { - auto type = op.getOperand().getType().cast(); - p << op.getOperationName() << ' ' << op.memref() << " : " << type << " to " - << inferVectorTypeCastResultType(type); + MemRefType::get({}, vectorType, {}, memRefType.getMemorySpace())); } static LogicalResult verify(TypeCastOp op) { MemRefType canonicalType = canonicalizeStridedLayout(op.getMemRefType()); if (!canonicalType.getAffineMaps().empty()) return op.emitOpError("expects operand to be a memref with no layout"); - - auto resultType = inferVectorTypeCastResultType(op.getMemRefType()); - if (op.getResultMemRefType() != resultType) - return op.emitOpError("expects result type to be: ") << resultType; + if (!op.getResultMemRefType().getAffineMaps().empty()) + return op.emitOpError("expects result to be a memref with no layout"); + if (op.getResultMemRefType().getMemorySpace() != + op.getMemRefType().getMemorySpace()) + return op.emitOpError("expects result in same memory space"); + + auto sourceType = op.getMemRefType(); + auto resultType = op.getResultMemRefType(); + if (getElementTypeOrSelf(getElementTypeOrSelf(sourceType)) != + getElementTypeOrSelf(getElementTypeOrSelf(resultType))) + return op.emitOpError( + "expects result and operand with same underlying scalar type: ") + << resultType; + if (extractShape(sourceType) != extractShape(resultType)) + return op.emitOpError( + "expects concatenated result and operand shapes to be equal: ") + << resultType; return success(); } diff --git a/mlir/lib/EDSC/Builders.cpp b/mlir/lib/EDSC/Builders.cpp index 247ca4af6c1a9..ad12561d81c14 100644 --- a/mlir/lib/EDSC/Builders.cpp +++ b/mlir/lib/EDSC/Builders.cpp @@ -15,32 +15,25 @@ using namespace mlir; using namespace mlir::edsc; -mlir::edsc::ScopedContext::ScopedContext(OpBuilder &builder, Location location) - : builder(builder), location(location), - enclosingScopedContext(ScopedContext::getCurrentScopedContext()), - nestedBuilder(nullptr) { +mlir::edsc::ScopedContext::ScopedContext(OpBuilder &b, Location location) + : builder(b), guard(builder), location(location), + enclosingScopedContext(ScopedContext::getCurrentScopedContext()) { getCurrentScopedContext() = this; } /// Sets the insertion point of the builder to 'newInsertPt' for the duration /// of the scope. The existing insertion point of the builder is restored on /// destruction. -mlir::edsc::ScopedContext::ScopedContext(OpBuilder &builder, +mlir::edsc::ScopedContext::ScopedContext(OpBuilder &b, OpBuilder::InsertPoint newInsertPt, Location location) - : builder(builder), prevBuilderInsertPoint(builder.saveInsertionPoint()), - location(location), - enclosingScopedContext(ScopedContext::getCurrentScopedContext()), - nestedBuilder(nullptr) { + : builder(b), guard(builder), location(location), + enclosingScopedContext(ScopedContext::getCurrentScopedContext()) { getCurrentScopedContext() = this; builder.restoreInsertionPoint(newInsertPt); } mlir::edsc::ScopedContext::~ScopedContext() { - assert(!nestedBuilder && - "Active NestedBuilder must have been exited at this point!"); - if (prevBuilderInsertPoint) - builder.restoreInsertionPoint(*prevBuilderInsertPoint); getCurrentScopedContext() = enclosingScopedContext; } @@ -49,7 +42,7 @@ ScopedContext *&mlir::edsc::ScopedContext::getCurrentScopedContext() { return context; } -OpBuilder &mlir::edsc::ScopedContext::getBuilder() { +OpBuilder &mlir::edsc::ScopedContext::getBuilderRef() { assert(ScopedContext::getCurrentScopedContext() && "Unexpected Null ScopedContext"); return ScopedContext::getCurrentScopedContext()->builder; @@ -62,29 +55,15 @@ Location mlir::edsc::ScopedContext::getLocation() { } MLIRContext *mlir::edsc::ScopedContext::getContext() { - return getBuilder().getContext(); -} - -OperationHandle OperationHandle::create(StringRef name, - ArrayRef operands, - ArrayRef resultTypes, - ArrayRef attributes) { - OperationState state(ScopedContext::getLocation(), name); - SmallVector ops(operands.begin(), operands.end()); - state.addOperands(ops); - state.addTypes(resultTypes); - for (const auto &attr : attributes) { - state.addAttribute(attr.first, attr.second); - } - return OperationHandle(ScopedContext::getBuilder().createOperation(state)); + return getBuilderRef().getContext(); } BlockHandle mlir::edsc::BlockHandle::create(ArrayRef argTypes) { - auto ¤tB = ScopedContext::getBuilder(); + auto ¤tB = ScopedContext::getBuilderRef(); auto *ib = currentB.getInsertionBlock(); auto ip = currentB.getInsertionPoint(); BlockHandle res; - res.block = ScopedContext::getBuilder().createBlock(ib->getParent()); + res.block = ScopedContext::getBuilderRef().createBlock(ib->getParent()); // createBlock sets the insertion point inside the block. // We do not want this behavior when using declarative builders with nesting. currentB.setInsertionPoint(ib, ip); @@ -96,17 +75,15 @@ BlockHandle mlir::edsc::BlockHandle::create(ArrayRef argTypes) { BlockHandle mlir::edsc::BlockHandle::createInRegion(Region ®ion, ArrayRef argTypes) { - auto ¤tB = ScopedContext::getBuilder(); BlockHandle res; region.push_back(new Block); res.block = ®ion.back(); // createBlock sets the insertion point inside the block. // We do not want this behavior when using declarative builders with nesting. - OpBuilder::InsertionGuard g(currentB); - currentB.setInsertionPoint(res.block, res.block->begin()); - for (auto t : argTypes) { - res.block->addArgument(t); - } + OpBuilder::InsertionGuard g(ScopedContext::getBuilderRef()); + ScopedContext::getBuilderRef().setInsertionPoint(res.block, + res.block->begin()); + res.block->addArguments(argTypes); return res; } diff --git a/mlir/lib/EDSC/CMakeLists.txt b/mlir/lib/EDSC/CMakeLists.txt index 1435dbb22926b..6d56f263f2713 100644 --- a/mlir/lib/EDSC/CMakeLists.txt +++ b/mlir/lib/EDSC/CMakeLists.txt @@ -8,10 +8,8 @@ add_mlir_library(MLIREDSC ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/EDSC - ) -target_link_libraries(MLIREDSC - PUBLIC + LINK_LIBS PUBLIC MLIRIR MLIRSupport ) @@ -21,9 +19,8 @@ add_mlir_library(MLIREDSCInterface ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/EDSC - ) -target_link_libraries(MLIREDSCInterface - PUBLIC + + LINK_LIBS PUBLIC MLIRIR MLIRSupport MLIRParser diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt index df3268a49d59f..06e25dcf27b84 100644 --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -1,3 +1,6 @@ +# Exclude these from libMLIR.so because the JIT infrastructure +# is a big dependency which most don't need. + set(LLVM_OPTIONAL_SOURCES CRunnerUtils.cpp ExecutionEngine.cpp @@ -5,43 +8,63 @@ set(LLVM_OPTIONAL_SOURCES OptUtils.cpp ) -llvm_map_components_to_libnames(outlibs "nativecodegen" "IPO") add_mlir_library(MLIRExecutionEngine ExecutionEngine.cpp OptUtils.cpp + EXCLUDE_FROM_LIBMLIR + ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/ExecutionEngine - ) -target_link_libraries(MLIRExecutionEngine - PUBLIC + + DEPENDS + intrinsics_gen + + LINK_COMPONENTS + Core + ExecutionEngine + Object + OrcJIT + JITLink + Analysis + AggressiveInstCombine + InstCombine + MC + ScalarOpts + Target + Vectorize + TransformUtils + nativecodegen + IPO + + LINK_LIBS PUBLIC MLIRLLVMIR MLIRTargetLLVMIR - LLVMExecutionEngine - LLVMObject - LLVMOrcJIT - LLVMJITLink - LLVMSupport - LLVMAnalysis - LLVMAggressiveInstCombine - LLVMInstCombine - LLVMMC - LLVMScalarOpts - LLVMTarget - LLVMVectorize - LLVMTransformUtils - - ${outlibs}) - -add_llvm_library(mlir_c_runner_utils SHARED CRunnerUtils.cpp) + ) + +add_mlir_library(mlir_c_runner_utils + SHARED + CRunnerUtils.cpp + + EXCLUDE_FROM_LIBMLIR + ) set_property(TARGET mlir_c_runner_utils PROPERTY CXX_STANDARD 11) -add_llvm_library(mlir_c_runner_utils_static CRunnerUtils.cpp) + +add_mlir_library(mlir_c_runner_utils_static + CRunnerUtils.cpp + + EXCLUDE_FROM_LIBMLIR + ) set_property(TARGET mlir_c_runner_utils_static PROPERTY CXX_STANDARD 11) target_compile_definitions(mlir_c_runner_utils PRIVATE mlir_c_runner_utils_EXPORTS) -add_llvm_library(mlir_runner_utils SHARED RunnerUtils.cpp) -target_link_libraries(mlir_runner_utils - PUBLIC +add_mlir_library(mlir_runner_utils + SHARED + RunnerUtils.cpp + + EXCLUDE_FROM_LIBMLIR + + LINK_LIBS PUBLIC mlir_c_runner_utils_static ) target_compile_definitions(mlir_runner_utils PRIVATE mlir_runner_utils_EXPORTS) diff --git a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp index ad5be24378ceb..11d6c6ddbfffd 100644 --- a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp +++ b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp @@ -23,6 +23,7 @@ // By providing elementary printing methods only, this // library can remain fully unaware of low-level implementation // details of our vectors. Also useful for direct LLVM IR output. +extern "C" void print_i1(bool b) { fputc(b ? '1' : '0', stdout); } extern "C" void print_i32(int32_t i) { fprintf(stdout, "%" PRId32, i); } extern "C" void print_i64(int64_t l) { fprintf(stdout, "%" PRId64, l); } extern "C" void print_f32(float f) { fprintf(stdout, "%g", f); } diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp index e05556a7cbf92..25e913aac5fda 100644 --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -95,6 +95,22 @@ AffineMap AffineMap::getConstantMap(int64_t val, MLIRContext *context) { {getAffineConstantExpr(val, context)}); } +/// Returns an identity affine map (d0, ..., dn) -> (dp, ..., dn) on the most +/// minor dimensions. +AffineMap AffineMap::getMinorIdentityMap(unsigned dims, unsigned results, + MLIRContext *context) { + assert(dims >= results && "Dimension mismatch"); + auto id = AffineMap::getMultiDimIdentityMap(dims, context); + return AffineMap::get(dims, 0, id.getResults().take_back(results), context); +} + +bool AffineMap::isMinorIdentity(AffineMap map) { + if (!map) + return false; + return map == getMinorIdentityMap(map.getNumDims(), map.getNumResults(), + map.getContext()); +}; + /// Returns an AffineMap representing a permutation. AffineMap AffineMap::getPermutationMap(ArrayRef permutation, MLIRContext *context) { diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index f17d8fde6a82f..005571afb59a5 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -2302,11 +2302,11 @@ void OperationPrinter::print(Block *block, bool printBlockArgs, // Print out some context information about the predecessors of this block. if (!block->getParent()) { - os << "\t// block is not in a region!"; + os << " // block is not in a region!"; } else if (block->hasNoPredecessors()) { - os << "\t// no predecessors"; + os << " // no predecessors"; } else if (auto *pred = block->getSinglePredecessor()) { - os << "\t// pred: "; + os << " // pred: "; printBlockName(pred); } else { // We want to print the predecessors in increasing numeric order, not in @@ -2316,7 +2316,7 @@ void OperationPrinter::print(Block *block, bool printBlockArgs, predIDs.push_back({state->getSSANameState().getBlockID(pred), pred}); llvm::array_pod_sort(predIDs.begin(), predIDs.end()); - os << "\t// " << predIDs.size() << " preds: "; + os << " // " << predIDs.size() << " preds: "; interleaveComma(predIDs, [&](std::pair pred) { printBlockName(pred.second); diff --git a/mlir/lib/IR/Attributes.cpp b/mlir/lib/IR/Attributes.cpp index a380bc7b22c5c..5cb2d5a2ea84c 100644 --- a/mlir/lib/IR/Attributes.cpp +++ b/mlir/lib/IR/Attributes.cpp @@ -16,6 +16,7 @@ #include "mlir/IR/Types.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/Endian.h" using namespace mlir; using namespace mlir::detail; @@ -101,45 +102,40 @@ static bool compareNamedAttributeWithName(const NamedAttribute &attr, return strncmp(attr.first.data(), name.data(), name.size()) < 0; } -DictionaryAttr DictionaryAttr::get(ArrayRef value, - MLIRContext *context) { - assert(llvm::all_of(value, - [](const NamedAttribute &attr) { return attr.second; }) && - "value cannot have null entries"); - - // We need to sort the element list to canonicalize it, but we also don't want - // to do a ton of work in the super common case where the element list is - // already sorted. - SmallVector storage; +/// Helper function that does either an in place sort or sorts from source array +/// into destination. If inPlace then storage is both the source and the +/// destination, else value is the source and storage destination. Returns +/// whether source was sorted. +template +static bool dictionaryAttrSort(ArrayRef value, + SmallVectorImpl &storage) { + // Specialize for the common case. switch (value.size()) { case 0: - break; case 1: - // A single element is already sorted. + // Zero or one elements are already sorted. break; case 2: assert(value[0].first != value[1].first && "DictionaryAttr element names must be unique"); - - // Don't invoke a general sort for two element case. if (compareNamedAttributes(&value[0], &value[1]) > 0) { - storage.push_back(value[1]); - storage.push_back(value[0]); - value = storage; + if (inPlace) + std::swap(storage[0], storage[1]); + else + storage.append({value[1], value[0]}); + return true; } break; default: // Check to see they are sorted already. - bool isSorted = true; - for (unsigned i = 0, e = value.size() - 1; i != e; ++i) { - if (compareNamedAttributes(&value[i], &value[i + 1]) > 0) { - isSorted = false; - break; - } - } - // If not, do a general sort. + bool isSorted = + llvm::is_sorted(value, [](NamedAttribute l, NamedAttribute r) { + return compareNamedAttributes(&l, &r) < 0; + }); if (!isSorted) { - storage.append(value.begin(), value.end()); + // If not, do a general sort. + if (!inPlace) + storage.append(value.begin(), value.end()); llvm::array_pod_sort(storage.begin(), storage.end(), compareNamedAttributes); value = storage; @@ -151,7 +147,28 @@ DictionaryAttr DictionaryAttr::get(ArrayRef value, return l.first == r.first; }) == value.end() && "DictionaryAttr element names must be unique"); + return !isSorted; } + return false; +} + +/// Sorts the NamedAttributes in the array ordered by name as expected by +/// getWithSorted. +/// Requires: uniquely named attributes. +void DictionaryAttr::sort(SmallVectorImpl &array) { + dictionaryAttrSort(array, array); +} + +DictionaryAttr DictionaryAttr::get(ArrayRef value, + MLIRContext *context) { + assert(llvm::all_of(value, + [](const NamedAttribute &attr) { return attr.second; }) && + "value cannot have null entries"); + + // We need to sort the element list to canonicalize it. + SmallVector storage; + if (dictionaryAttrSort(value, storage)) + value = storage; return Base::get(context, StandardAttributes::Dictionary, value); } @@ -180,15 +197,26 @@ ArrayRef DictionaryAttr::getValue() const { /// Return the specified attribute if present, null otherwise. Attribute DictionaryAttr::get(StringRef name) const { + Optional attr = getNamed(name); + return attr ? attr->second : nullptr; +} +Attribute DictionaryAttr::get(Identifier name) const { + Optional attr = getNamed(name); + return attr ? attr->second : nullptr; +} + +/// Return the specified named attribute if present, None otherwise. +Optional DictionaryAttr::getNamed(StringRef name) const { ArrayRef values = getValue(); auto it = llvm::lower_bound(values, name, compareNamedAttributeWithName); - return it != values.end() && it->first == name ? it->second : Attribute(); + return it != values.end() && it->first == name ? *it + : Optional(); } -Attribute DictionaryAttr::get(Identifier name) const { +Optional DictionaryAttr::getNamed(Identifier name) const { for (auto elt : getValue()) if (elt.first == name) - return elt.second; - return nullptr; + return elt; + return llvm::None; } DictionaryAttr::iterator DictionaryAttr::begin() const { @@ -523,6 +551,29 @@ static bool getBit(const char *rawData, size_t bitPos) { return (rawData[bitPos / CHAR_BIT] & (1 << (bitPos % CHAR_BIT))) != 0; } +/// Get start position of actual data in `value`. Actual data is +/// stored in last `bitWidth`/CHAR_BIT bytes in big endian. +static char *getAPIntDataPos(APInt &value, size_t bitWidth) { + char *dataPos = + const_cast(reinterpret_cast(value.getRawData())); + if (llvm::support::endian::system_endianness() == + llvm::support::endianness::big) + dataPos = dataPos + 8 - llvm::divideCeil(bitWidth, CHAR_BIT); + return dataPos; +} + +/// Read APInt `value` from appropriate position. +static void readAPInt(APInt &value, size_t bitWidth, char *outData) { + char *dataPos = getAPIntDataPos(value, bitWidth); + std::copy_n(dataPos, llvm::divideCeil(bitWidth, CHAR_BIT), outData); +} + +/// Write `inData` to appropriate position of APInt `value`. +static void writeAPInt(const char *inData, size_t bitWidth, APInt &value) { + char *dataPos = getAPIntDataPos(value, bitWidth); + std::copy_n(inData, llvm::divideCeil(bitWidth, CHAR_BIT), dataPos); +} + /// Writes value to the bit position `bitPos` in array `rawData`. static void writeBits(char *rawData, size_t bitPos, APInt value) { size_t bitWidth = value.getBitWidth(); @@ -533,9 +584,7 @@ static void writeBits(char *rawData, size_t bitPos, APInt value) { // Otherwise, the bit position is guaranteed to be byte aligned. assert((bitPos % CHAR_BIT) == 0 && "expected bitPos to be 8-bit aligned"); - std::copy_n(reinterpret_cast(value.getRawData()), - llvm::divideCeil(bitWidth, CHAR_BIT), - rawData + (bitPos / CHAR_BIT)); + readAPInt(value, bitWidth, rawData + (bitPos / CHAR_BIT)); } /// Reads the next `bitWidth` bits from the bit position `bitPos` in array @@ -548,9 +597,7 @@ static APInt readBits(const char *rawData, size_t bitPos, size_t bitWidth) { // Otherwise, the bit position must be 8-bit aligned. assert((bitPos % CHAR_BIT) == 0 && "expected bitPos to be 8-bit aligned"); APInt result(bitWidth, 0); - std::copy_n( - rawData + (bitPos / CHAR_BIT), llvm::divideCeil(bitWidth, CHAR_BIT), - const_cast(reinterpret_cast(result.getRawData()))); + writeAPInt(rawData + (bitPos / CHAR_BIT), bitWidth, result); return result; } @@ -588,6 +635,8 @@ Attribute DenseElementsAttr::AttributeElementIterator::operator*() const { FloatElementIterator floatIt(floatEltTy.getFloatSemantics(), intIt); return FloatAttr::get(eltTy, *floatIt); } + if (owner.isa()) + return StringAttr::get(owner.getRawStringData()[index], eltTy); llvm_unreachable("unexpected element type"); } @@ -628,11 +677,23 @@ DenseElementsAttr::FloatElementIterator::FloatElementIterator( DenseElementsAttr DenseElementsAttr::get(ShapedType type, ArrayRef values) { - assert(type.getElementType().isIntOrIndexOrFloat() && - "expected int or index or float element type"); assert(hasSameElementsOrSplat(type, values)); + // If the element type is not based on int/float/index, assume it is a string + // type. auto eltType = type.getElementType(); + if (!type.getElementType().isIntOrIndexOrFloat()) { + SmallVector stringValues; + stringValues.reserve(values.size()); + for (Attribute attr : values) { + assert(attr.isa() && + "expected string value for non integer/index/float element"); + stringValues.push_back(attr.cast().getValue()); + } + return get(type, stringValues); + } + + // Otherwise, get the raw storage width to use for the allocation. size_t bitWidth = getDenseElementBitWidth(eltType); size_t storageBitWidth = getDenseElementStorageWidth(bitWidth); @@ -1144,19 +1205,20 @@ std::vector SparseElementsAttr::getFlattenedSparseIndices() const { } //===----------------------------------------------------------------------===// -// NamedAttributeList +// MutableDictionaryAttr //===----------------------------------------------------------------------===// -NamedAttributeList::NamedAttributeList(ArrayRef attributes) { +MutableDictionaryAttr::MutableDictionaryAttr( + ArrayRef attributes) { setAttrs(attributes); } -ArrayRef NamedAttributeList::getAttrs() const { +ArrayRef MutableDictionaryAttr::getAttrs() const { return attrs ? attrs.getValue() : llvm::None; } /// Replace the held attributes with ones provided in 'newAttrs'. -void NamedAttributeList::setAttrs(ArrayRef attributes) { +void MutableDictionaryAttr::setAttrs(ArrayRef attributes) { // Don't create an attribute list if there are no attributes. if (attributes.empty()) attrs = nullptr; @@ -1165,18 +1227,27 @@ void NamedAttributeList::setAttrs(ArrayRef attributes) { } /// Return the specified attribute if present, null otherwise. -Attribute NamedAttributeList::get(StringRef name) const { +Attribute MutableDictionaryAttr::get(StringRef name) const { return attrs ? attrs.get(name) : nullptr; } /// Return the specified attribute if present, null otherwise. -Attribute NamedAttributeList::get(Identifier name) const { +Attribute MutableDictionaryAttr::get(Identifier name) const { return attrs ? attrs.get(name) : nullptr; } +/// Return the specified named attribute if present, None otherwise. +Optional MutableDictionaryAttr::getNamed(StringRef name) const { + return attrs ? attrs.getNamed(name) : Optional(); +} +Optional +MutableDictionaryAttr::getNamed(Identifier name) const { + return attrs ? attrs.getNamed(name) : Optional(); +} + /// If the an attribute exists with the specified name, change it to the new /// value. Otherwise, add a new attribute with the specified name/value. -void NamedAttributeList::set(Identifier name, Attribute value) { +void MutableDictionaryAttr::set(Identifier name, Attribute value) { assert(value && "attributes may never be null"); // Look for an existing value for the given name, and set it in-place. @@ -1206,7 +1277,7 @@ void NamedAttributeList::set(Identifier name, Attribute value) { /// Remove the attribute with the specified name if it exists. The return /// value indicates whether the attribute was present or not. -auto NamedAttributeList::remove(Identifier name) -> RemoveResult { +auto MutableDictionaryAttr::remove(Identifier name) -> RemoveResult { auto origAttrs = getAttrs(); for (unsigned i = 0, e = origAttrs.size(); i != e; ++i) { if (origAttrs[i].first == name) { diff --git a/mlir/lib/IR/Builders.cpp b/mlir/lib/IR/Builders.cpp index 22abeb5a364fc..fcaf33aa98eb3 100644 --- a/mlir/lib/IR/Builders.cpp +++ b/mlir/lib/IR/Builders.cpp @@ -330,15 +330,18 @@ AffineMap Builder::getShiftedAffineMap(AffineMap map, int64_t shift) { } //===----------------------------------------------------------------------===// -// OpBuilder. +// OpBuilder //===----------------------------------------------------------------------===// -OpBuilder::~OpBuilder() {} +OpBuilder::Listener::~Listener() {} /// Insert the given operation at the current insertion point and return it. Operation *OpBuilder::insert(Operation *op) { if (block) block->getOperations().insert(insertPoint, op); + + if (listener) + listener->notifyOperationInserted(op); return op; } @@ -355,6 +358,9 @@ Block *OpBuilder::createBlock(Region *parent, Region::iterator insertPt, b->addArguments(argTypes); parent->getBlocks().insert(insertPt, b); setInsertionPointToEnd(b); + + if (listener) + listener->notifyBlockCreated(b); return b; } diff --git a/mlir/lib/IR/CMakeLists.txt b/mlir/lib/IR/CMakeLists.txt index 64998e4252c34..2d5f5cb9e2b45 100644 --- a/mlir/lib/IR/CMakeLists.txt +++ b/mlir/lib/IR/CMakeLists.txt @@ -8,9 +8,9 @@ add_mlir_library(MLIRIR DEPENDS MLIRCallInterfacesIncGen MLIROpAsmInterfacesIncGen - ) -target_link_libraries(MLIRIR - PUBLIC + MLIRSymbolInterfacesIncGen + + LINK_LIBS PUBLIC MLIRSupport LLVMSupport ) diff --git a/mlir/lib/Analysis/Dominance.cpp b/mlir/lib/IR/Dominance.cpp similarity index 99% rename from mlir/lib/Analysis/Dominance.cpp rename to mlir/lib/IR/Dominance.cpp index ed3940535a8e3..86313adc4c0d8 100644 --- a/mlir/lib/Analysis/Dominance.cpp +++ b/mlir/lib/IR/Dominance.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Analysis/Dominance.h" +#include "mlir/IR/Dominance.h" #include "mlir/IR/Operation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/GenericDomTreeConstruction.h" diff --git a/mlir/lib/IR/Function.cpp b/mlir/lib/IR/Function.cpp index 1933c4df18bd8..a26d7db7e921d 100644 --- a/mlir/lib/IR/Function.cpp +++ b/mlir/lib/IR/Function.cpp @@ -24,8 +24,8 @@ using namespace mlir; FuncOp FuncOp::create(Location location, StringRef name, FunctionType type, ArrayRef attrs) { OperationState state(location, "func"); - Builder builder(location->getContext()); - FuncOp::build(&builder, state, name, type, attrs); + OpBuilder builder(location->getContext()); + FuncOp::build(builder, state, name, type, attrs); return cast(Operation::create(state)); } FuncOp FuncOp::create(Location location, StringRef name, FunctionType type, @@ -35,24 +35,24 @@ FuncOp FuncOp::create(Location location, StringRef name, FunctionType type, } FuncOp FuncOp::create(Location location, StringRef name, FunctionType type, ArrayRef attrs, - ArrayRef argAttrs) { + ArrayRef argAttrs) { FuncOp func = create(location, name, type, attrs); func.setAllArgAttrs(argAttrs); return func; } -void FuncOp::build(Builder *builder, OperationState &result, StringRef name, +void FuncOp::build(OpBuilder &builder, OperationState &result, StringRef name, FunctionType type, ArrayRef attrs) { result.addAttribute(SymbolTable::getSymbolAttrName(), - builder->getStringAttr(name)); + builder.getStringAttr(name)); result.addAttribute(getTypeAttrName(), TypeAttr::get(type)); result.attributes.append(attrs.begin(), attrs.end()); result.addRegion(); } -void FuncOp::build(Builder *builder, OperationState &result, StringRef name, +void FuncOp::build(OpBuilder &builder, OperationState &result, StringRef name, FunctionType type, ArrayRef attrs, - ArrayRef argAttrs) { + ArrayRef argAttrs) { build(builder, result, name, type, attrs); assert(type.getNumInputs() == argAttrs.size()); SmallString<8> argAttrName; @@ -115,7 +115,7 @@ void FuncOp::eraseArguments(ArrayRef argIndices) { // Update the function type and arg attrs. SmallVector newInputTypes; - SmallVector newArgAttrs; + SmallVector newArgAttrs; for (int i = 0; i < originalNumArgs; i++) { if (shouldEraseArg(i)) continue; diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index b25c5111d8dcb..c59c53567488d 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -50,6 +50,10 @@ namespace { /// various bits of an MLIRContext. This uses a struct wrapper to avoid the need /// for global command line options. struct MLIRContextOptions { + llvm::cl::opt disableThreading{ + "mlir-disable-threading", + llvm::cl::desc("Disabling multi-threading within MLIR")}; + llvm::cl::opt printOpOnDiagnostic{ "mlir-print-op-on-diagnostic", llvm::cl::desc("When a diagnostic is emitted on an operation, also print " @@ -101,6 +105,41 @@ struct BuiltinDialect : public Dialect { }; } // end anonymous namespace. +//===----------------------------------------------------------------------===// +// Locking Utilities +//===----------------------------------------------------------------------===// + +namespace { +/// Utility reader lock that takes a runtime flag that specifies if we really +/// need to lock. +struct ScopedReaderLock { + ScopedReaderLock(llvm::sys::SmartRWMutex &mutexParam, bool shouldLock) + : mutex(shouldLock ? &mutexParam : nullptr) { + if (mutex) + mutex->lock_shared(); + } + ~ScopedReaderLock() { + if (mutex) + mutex->unlock_shared(); + } + llvm::sys::SmartRWMutex *mutex; +}; +/// Utility writer lock that takes a runtime flag that specifies if we really +/// need to lock. +struct ScopedWriterLock { + ScopedWriterLock(llvm::sys::SmartRWMutex &mutexParam, bool shouldLock) + : mutex(shouldLock ? &mutexParam : nullptr) { + if (mutex) + mutex->lock(); + } + ~ScopedWriterLock() { + if (mutex) + mutex->unlock(); + } + llvm::sys::SmartRWMutex *mutex; +}; +} // end anonymous namespace. + //===----------------------------------------------------------------------===// // AffineMap and IntegerSet hashing //===----------------------------------------------------------------------===// @@ -111,8 +150,10 @@ template static ValueT safeGetOrCreate(DenseSet &container, KeyT &&key, llvm::sys::SmartRWMutex &mutex, + bool threadingIsEnabled, ConstructorFn &&constructorFn) { - { // Check for an existing instance in read-only mode. + // Check for an existing instance in read-only mode. + if (threadingIsEnabled) { llvm::sys::SmartScopedReader instanceLock(mutex); auto it = container.find_as(key); if (it != container.end()) @@ -120,16 +161,14 @@ static ValueT safeGetOrCreate(DenseSet &container, } // Acquire a writer-lock so that we can safely create the new instance. - llvm::sys::SmartScopedWriter instanceLock(mutex); + ScopedWriterLock instanceLock(mutex, threadingIsEnabled); // Check for an existing instance again here, because another writer thread - // may have already created one. + // may have already created one. Otherwise, construct a new instance. auto existing = container.insert_as(ValueT(), key); - if (!existing.second) - return *existing.first; - - // Otherwise, construct a new instance of the value. - return *existing.first = constructorFn(); + if (existing.second) + return *existing.first = constructorFn(); + return *existing.first; } namespace { @@ -217,6 +256,9 @@ class MLIRContextImpl { /// detect such use cases bool allowUnregisteredDialects = false; + /// Enable support for multi-threading within MLIR. + bool threadingIsEnabled = true; + /// If the operation should be attached to diagnostics printed via the /// Operation::emit methods. bool printOpOnDiagnostic = true; @@ -288,17 +330,19 @@ class MLIRContextImpl { UnknownLoc unknownLocAttr; public: - MLIRContextImpl() : identifiers(identifierAllocator) { - // Initialize values based on the command line flags if they were provided. - if (clOptions.isConstructed()) { - printOpOnDiagnostic = clOptions->printOpOnDiagnostic; - printStackTraceOnDiagnostic = clOptions->printStackTraceOnDiagnostic; - } - } + MLIRContextImpl() : identifiers(identifierAllocator) {} }; } // end namespace mlir MLIRContext::MLIRContext() : impl(new MLIRContextImpl()) { + // Initialize values based on the command line flags if they were provided. + if (clOptions.isConstructed()) { + disableMultithreading(clOptions->disableThreading); + printOpOnDiagnostic(clOptions->printOpOnDiagnostic); + printStackTraceOnDiagnostic(clOptions->printStackTraceOnDiagnostic); + } + + // Register dialects with this context. new BuiltinDialect(this); registerAllDialects(this); @@ -372,11 +416,10 @@ DiagnosticEngine &MLIRContext::getDiagEngine() { return getImpl().diagEngine; } /// Return information about all registered IR dialects. std::vector MLIRContext::getRegisteredDialects() { // Lock access to the context registry. - llvm::sys::SmartScopedReader registryLock(getImpl().contextMutex); - + ScopedReaderLock registryLock(impl->contextMutex, impl->threadingIsEnabled); std::vector result; - result.reserve(getImpl().dialects.size()); - for (auto &dialect : getImpl().dialects) + result.reserve(impl->dialects.size()); + for (auto &dialect : impl->dialects) result.push_back(dialect.get()); return result; } @@ -385,11 +428,15 @@ std::vector MLIRContext::getRegisteredDialects() { /// then return nullptr. Dialect *MLIRContext::getRegisteredDialect(StringRef name) { // Lock access to the context registry. - llvm::sys::SmartScopedReader registryLock(getImpl().contextMutex); - for (auto &dialect : getImpl().dialects) - if (name == dialect->getNamespace()) - return dialect.get(); - return nullptr; + ScopedReaderLock registryLock(impl->contextMutex, impl->threadingIsEnabled); + + // Dialects are sorted by name, so we can use binary search for lookup. + auto it = llvm::lower_bound( + impl->dialects, name, + [](const auto &lhs, StringRef rhs) { return lhs->getNamespace() < rhs; }); + return (it != impl->dialects.end() && (*it)->getNamespace() == name) + ? (*it).get() + : nullptr; } /// Register this dialect object with the specified context. The context @@ -399,15 +446,13 @@ void Dialect::registerDialect(MLIRContext *context) { std::unique_ptr dialect(this); // Lock access to the context registry. - llvm::sys::SmartScopedWriter registryLock(impl.contextMutex); + ScopedWriterLock registryLock(impl.contextMutex, impl.threadingIsEnabled); // Get the correct insertion position sorted by namespace. - auto insertPt = - llvm::lower_bound(impl.dialects, dialect, - [](const std::unique_ptr &lhs, - const std::unique_ptr &rhs) { - return lhs->getNamespace() < rhs->getNamespace(); - }); + auto insertPt = llvm::lower_bound( + impl.dialects, dialect, [](const auto &lhs, const auto &rhs) { + return lhs->getNamespace() < rhs->getNamespace(); + }); // Abort if dialect with namespace has already been registered. if (insertPt != impl.dialects.end() && @@ -426,6 +471,21 @@ void MLIRContext::allowUnregisteredDialects(bool allowing) { impl->allowUnregisteredDialects = allowing; } +/// Return true if multi-threading is disabled by the context. +bool MLIRContext::isMultithreadingEnabled() { + return impl->threadingIsEnabled && llvm::llvm_is_multithreaded(); +} + +/// Set the flag specifying if multi-threading is disabled by the context. +void MLIRContext::disableMultithreading(bool disable) { + impl->threadingIsEnabled = !disable; + + // Update the threading mode for each of the uniquers. + impl->affineUniquer.disableMultithreading(disable); + impl->attributeUniquer.disableMultithreading(disable); + impl->typeUniquer.disableMultithreading(disable); +} + /// Return true if we should attach the operation to diagnostics emitted via /// Operation::emit. bool MLIRContext::shouldPrintOpOnDiagnostic() { @@ -457,13 +517,13 @@ std::vector MLIRContext::getRegisteredOperations() { std::vector> opsToSort; { // Lock access to the context registry. - llvm::sys::SmartScopedReader registryLock(getImpl().contextMutex); + ScopedReaderLock registryLock(impl->contextMutex, impl->threadingIsEnabled); // We just have the operations in a non-deterministic hash table order. Dump // into a temporary array, then sort it by operation name to get a stable // ordering. llvm::StringMap ®isteredOps = - getImpl().registeredOperations; + impl->registeredOperations; opsToSort.reserve(registeredOps.size()); for (auto &elt : registeredOps) @@ -487,7 +547,7 @@ void Dialect::addOperation(AbstractOperation opInfo) { auto &impl = context->getImpl(); // Lock access to the context registry. - llvm::sys::SmartScopedWriter registryLock(impl.contextMutex); + ScopedWriterLock registryLock(impl.contextMutex, impl.threadingIsEnabled); if (!impl.registeredOperations.insert({opInfo.name, opInfo}).second) { llvm::errs() << "error: operation named '" << opInfo.name << "' is already registered.\n"; @@ -500,7 +560,7 @@ void Dialect::addSymbol(TypeID typeID) { auto &impl = context->getImpl(); // Lock access to the context registry. - llvm::sys::SmartScopedWriter registryLock(impl.contextMutex); + ScopedWriterLock registryLock(impl.contextMutex, impl.threadingIsEnabled); if (!impl.registeredDialectSymbols.insert({typeID, this}).second) { llvm::errs() << "error: dialect symbol already registered.\n"; abort(); @@ -514,7 +574,7 @@ const AbstractOperation *AbstractOperation::lookup(StringRef opName, auto &impl = context->getImpl(); // Lock access to the context registry. - llvm::sys::SmartScopedReader registryLock(impl.contextMutex); + ScopedReaderLock registryLock(impl.contextMutex, impl.threadingIsEnabled); auto it = impl.registeredOperations.find(opName); if (it != impl.registeredOperations.end()) return &it->second; @@ -529,7 +589,8 @@ const AbstractOperation *AbstractOperation::lookup(StringRef opName, Identifier Identifier::get(StringRef str, MLIRContext *context) { auto &impl = context->getImpl(); - { // Check for an existing identifier in read-only mode. + // Check for an existing identifier in read-only mode. + if (context->isMultithreadingEnabled()) { llvm::sys::SmartScopedReader contextLock(impl.identifierMutex); auto it = impl.identifiers.find(str); if (it != impl.identifiers.end()) @@ -544,7 +605,7 @@ Identifier Identifier::get(StringRef str, MLIRContext *context) { "Cannot create an identifier with a nul character"); // Acquire a writer-lock so that we can safely create the new instance. - llvm::sys::SmartScopedWriter contextLock(impl.identifierMutex); + ScopedWriterLock contextLock(impl.identifierMutex, impl.threadingIsEnabled); auto it = impl.identifiers.insert(str).first; return Identifier(&*it); } @@ -696,16 +757,18 @@ AffineMap AffineMap::getImpl(unsigned dimCount, unsigned symbolCount, auto key = std::make_tuple(dimCount, symbolCount, results); // Safely get or create an AffineMap instance. - return safeGetOrCreate(impl.affineMaps, key, impl.affineMutex, [&] { - auto *res = impl.affineAllocator.Allocate(); + return safeGetOrCreate( + impl.affineMaps, key, impl.affineMutex, impl.threadingIsEnabled, [&] { + auto *res = impl.affineAllocator.Allocate(); - // Copy the results into the bump pointer. - results = copyArrayRefInto(impl.affineAllocator, results); + // Copy the results into the bump pointer. + results = copyArrayRefInto(impl.affineAllocator, results); - // Initialize the memory using placement new. - new (res) detail::AffineMapStorage{dimCount, symbolCount, results, context}; - return AffineMap(res); - }); + // Initialize the memory using placement new. + new (res) + detail::AffineMapStorage{dimCount, symbolCount, results, context}; + return AffineMap(res); + }); } AffineMap AffineMap::get(MLIRContext *context) { @@ -760,12 +823,12 @@ IntegerSet IntegerSet::get(unsigned dimCount, unsigned symbolCount, if (constraints.size() < IntegerSet::kUniquingThreshold) { auto key = std::make_tuple(dimCount, symbolCount, constraints, eqFlags); return safeGetOrCreate(impl.integerSets, key, impl.affineMutex, - constructorFn); + impl.threadingIsEnabled, constructorFn); } // Otherwise, acquire a writer-lock so that we can safely create the new // instance. - llvm::sys::SmartScopedWriter affineLock(impl.affineMutex); + ScopedWriterLock affineLock(impl.affineMutex, impl.threadingIsEnabled); return constructorFn(); } diff --git a/mlir/lib/IR/Module.cpp b/mlir/lib/IR/Module.cpp index b441462821b68..1711b05c8533d 100644 --- a/mlir/lib/IR/Module.cpp +++ b/mlir/lib/IR/Module.cpp @@ -16,19 +16,19 @@ using namespace mlir; // Module Operation. //===----------------------------------------------------------------------===// -void ModuleOp::build(Builder *builder, OperationState &result, +void ModuleOp::build(OpBuilder &builder, OperationState &result, Optional name) { - ensureTerminator(*result.addRegion(), *builder, result.location); + ensureTerminator(*result.addRegion(), builder, result.location); if (name) - result.attributes.push_back(builder->getNamedAttr( - mlir::SymbolTable::getSymbolAttrName(), builder->getStringAttr(*name))); + result.attributes.push_back(builder.getNamedAttr( + mlir::SymbolTable::getSymbolAttrName(), builder.getStringAttr(*name))); } /// Construct a module from the given context. ModuleOp ModuleOp::create(Location loc, Optional name) { OperationState state(loc, "module"); - Builder builder(loc->getContext()); - ModuleOp::build(&builder, state, name); + OpBuilder builder(loc->getContext()); + ModuleOp::build(builder, state, name); return cast(Operation::create(state)); } @@ -83,7 +83,7 @@ LogicalResult ModuleOp::verify() { // Check that none of the attributes are non-dialect attributes, except for // the symbol related attributes. - for (auto attr : getOperation()->getAttrList().getAttrs()) { + for (auto attr : getOperation()->getMutableAttrDict().getAttrs()) { if (!attr.first.strref().contains('.') && !llvm::is_contained( ArrayRef{mlir::SymbolTable::getSymbolAttrName(), diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index a60e6bf385f2d..5b439d67ad678 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -65,21 +65,21 @@ Operation *Operation::create(Location location, OperationName name, ArrayRef successors, unsigned numRegions) { return create(location, name, resultTypes, operands, - NamedAttributeList(attributes), successors, numRegions); + MutableDictionaryAttr(attributes), successors, numRegions); } /// Create a new Operation from operation state. Operation *Operation::create(const OperationState &state) { - return Operation::create(state.location, state.name, state.types, - state.operands, NamedAttributeList(state.attributes), - state.successors, state.regions); + return Operation::create( + state.location, state.name, state.types, state.operands, + MutableDictionaryAttr(state.attributes), state.successors, state.regions); } /// Create a new Operation with the specific fields. Operation *Operation::create(Location location, OperationName name, ArrayRef resultTypes, ArrayRef operands, - NamedAttributeList attributes, + MutableDictionaryAttr attributes, ArrayRef successors, RegionRange regions) { unsigned numRegions = regions.size(); @@ -91,12 +91,12 @@ Operation *Operation::create(Location location, OperationName name, return op; } -/// Overload of create that takes an existing NamedAttributeList to avoid +/// Overload of create that takes an existing MutableDictionaryAttr to avoid /// unnecessarily uniquing a list of attributes. Operation *Operation::create(Location location, OperationName name, ArrayRef resultTypes, ArrayRef operands, - NamedAttributeList attributes, + MutableDictionaryAttr attributes, ArrayRef successors, unsigned numRegions) { // We only need to allocate additional memory for a subset of results. @@ -156,7 +156,8 @@ Operation *Operation::create(Location location, OperationName name, Operation::Operation(Location location, OperationName name, ArrayRef resultTypes, unsigned numSuccessors, - unsigned numRegions, const NamedAttributeList &attributes, + unsigned numRegions, + const MutableDictionaryAttr &attributes, bool hasOperandStorage) : location(location), numSuccs(numSuccessors), numRegions(numRegions), hasOperandStorage(hasOperandStorage), hasSingleResult(false), name(name), @@ -243,6 +244,25 @@ void Operation::setOperands(ValueRange operands) { assert(operands.empty() && "setting operands without an operand storage"); } +/// Replace the operands beginning at 'start' and ending at 'start' + 'length' +/// with the ones provided in 'operands'. 'operands' may be smaller or larger +/// than the range pointed to by 'start'+'length'. +void Operation::setOperands(unsigned start, unsigned length, + ValueRange operands) { + assert((start + length) <= getNumOperands() && + "invalid operand range specified"); + if (LLVM_LIKELY(hasOperandStorage)) + return getOperandStorage().setOperands(this, start, length, operands); + assert(operands.empty() && "setting operands without an operand storage"); +} + +/// Insert the given operands into the operand list at the given 'index'. +void Operation::insertOperands(unsigned index, ValueRange operands) { + if (LLVM_LIKELY(hasOperandStorage)) + return setOperands(index, /*length=*/0, operands); + assert(operands.empty() && "inserting operands without an operand storage"); +} + //===----------------------------------------------------------------------===// // Diagnostics //===----------------------------------------------------------------------===// @@ -984,7 +1004,7 @@ LogicalResult OpTrait::impl::verifyResultSizeAttr(Operation *op, // These functions are out-of-line implementations of the methods in BinaryOp, // which avoids them being template instantiated/duplicated. -void impl::buildBinaryOp(Builder *builder, OperationState &result, Value lhs, +void impl::buildBinaryOp(OpBuilder &builder, OperationState &result, Value lhs, Value rhs) { assert(lhs.getType() == rhs.getType()); result.addOperands({lhs, rhs}); @@ -1025,7 +1045,7 @@ void impl::printOneResultOp(Operation *op, OpAsmPrinter &p) { // CastOp implementation //===----------------------------------------------------------------------===// -void impl::buildCastOp(Builder *builder, OperationState &result, Value source, +void impl::buildCastOp(OpBuilder &builder, OperationState &result, Value source, Type destType) { result.addOperands(source); result.addTypes(destType); @@ -1066,7 +1086,7 @@ Value impl::foldCastOp(Operation *op) { /// terminator operation to insert. void impl::ensureRegionTerminator( Region ®ion, Location loc, - function_ref buildTerminatorOp) { + function_ref buildTerminatorOp) { if (region.empty()) region.push_back(new Block); @@ -1074,7 +1094,8 @@ void impl::ensureRegionTerminator( if (!block.empty() && block.back().isKnownTerminator()) return; - block.push_back(buildTerminatorOp()); + OpBuilder builder(loc.getContext()); + block.push_back(buildTerminatorOp(builder)); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp index 5f698b1bdc539..91842cf95e569 100644 --- a/mlir/lib/IR/OperationSupport.cpp +++ b/mlir/lib/IR/OperationSupport.cpp @@ -13,7 +13,9 @@ #include "mlir/IR/OperationSupport.h" #include "mlir/IR/Block.h" +#include "mlir/IR/OpDefinition.h" #include "mlir/IR/Operation.h" +#include "mlir/IR/StandardTypes.h" using namespace mlir; //===----------------------------------------------------------------------===// @@ -89,6 +91,55 @@ void detail::OperandStorage::setOperands(Operation *owner, ValueRange values) { storageOperands[i].set(values[i]); } +/// Replace the operands beginning at 'start' and ending at 'start' + 'length' +/// with the ones provided in 'operands'. 'operands' may be smaller or larger +/// than the range pointed to by 'start'+'length'. +void detail::OperandStorage::setOperands(Operation *owner, unsigned start, + unsigned length, ValueRange operands) { + // If the new size is the same, we can update inplace. + unsigned newSize = operands.size(); + if (newSize == length) { + MutableArrayRef storageOperands = getOperands(); + for (unsigned i = 0, e = length; i != e; ++i) + storageOperands[start + i].set(operands[i]); + return; + } + // If the new size is greater, remove the extra operands and set the rest + // inplace. + if (newSize < length) { + eraseOperands(start + operands.size(), length - newSize); + setOperands(owner, start, newSize, operands); + return; + } + // Otherwise, the new size is greater so we need to grow the storage. + auto storageOperands = resize(owner, size() + (newSize - length)); + + // Shift operands to the right to make space for the new operands. + unsigned rotateSize = storageOperands.size() - (start + length); + auto rbegin = storageOperands.rbegin(); + std::rotate(rbegin, std::next(rbegin, newSize - length), rbegin + rotateSize); + + // Update the operands inplace. + for (unsigned i = 0, e = operands.size(); i != e; ++i) + storageOperands[start + i].set(operands[i]); +} + +/// Erase an operand held by the storage. +void detail::OperandStorage::eraseOperands(unsigned start, unsigned length) { + TrailingOperandStorage &storage = getStorage(); + MutableArrayRef operands = storage.getOperands(); + assert((start + length) <= operands.size()); + storage.numOperands -= length; + + // Shift all operands down if the operand to remove is not at the end. + if (start != storage.numOperands) { + auto indexIt = std::next(operands.begin(), start); + std::rotate(indexIt, std::next(indexIt, length), operands.end()); + } + for (unsigned i = 0; i != length; ++i) + operands[storage.numOperands + i].~OpOperand(); +} + /// Resize the storage to the given size. Returns the array containing the new /// operands. MutableArrayRef detail::OperandStorage::resize(Operation *owner, @@ -149,20 +200,6 @@ MutableArrayRef detail::OperandStorage::resize(Operation *owner, return newOperands; } -/// Erase an operand held by the storage. -void detail::OperandStorage::eraseOperand(unsigned index) { - assert(index < size()); - TrailingOperandStorage &storage = getStorage(); - MutableArrayRef operands = storage.getOperands(); - --storage.numOperands; - - // Shift all operands down by 1 if the operand to remove is not at the end. - auto indexIt = std::next(operands.begin(), index); - if (index != storage.numOperands) - std::rotate(indexIt, std::next(indexIt), operands.end()); - operands[storage.numOperands].~OpOperand(); -} - //===----------------------------------------------------------------------===// // ResultStorage //===----------------------------------------------------------------------===// @@ -235,6 +272,95 @@ unsigned OperandRange::getBeginOperandIndex() const { return base->getOperandNumber(); } +//===----------------------------------------------------------------------===// +// MutableOperandRange + +/// Construct a new mutable range from the given operand, operand start index, +/// and range length. +MutableOperandRange::MutableOperandRange( + Operation *owner, unsigned start, unsigned length, + ArrayRef operandSegments) + : owner(owner), start(start), length(length), + operandSegments(operandSegments.begin(), operandSegments.end()) { + assert((start + length) <= owner->getNumOperands() && "invalid range"); +} +MutableOperandRange::MutableOperandRange(Operation *owner) + : MutableOperandRange(owner, /*start=*/0, owner->getNumOperands()) {} + +/// Slice this range into a sub range, with the additional operand segment. +MutableOperandRange +MutableOperandRange::slice(unsigned subStart, unsigned subLen, + Optional segment) { + assert((subStart + subLen) <= length && "invalid sub-range"); + MutableOperandRange subSlice(owner, start + subStart, subLen, + operandSegments); + if (segment) + subSlice.operandSegments.push_back(*segment); + return subSlice; +} + +/// Append the given values to the range. +void MutableOperandRange::append(ValueRange values) { + if (values.empty()) + return; + owner->insertOperands(start + length, values); + updateLength(length + values.size()); +} + +/// Assign this range to the given values. +void MutableOperandRange::assign(ValueRange values) { + owner->setOperands(start, length, values); + if (length != values.size()) + updateLength(/*newLength=*/values.size()); +} + +/// Assign the range to the given value. +void MutableOperandRange::assign(Value value) { + if (length == 1) { + owner->setOperand(start, value); + } else { + owner->setOperands(start, length, value); + updateLength(/*newLength=*/1); + } +} + +/// Erase the operands within the given sub-range. +void MutableOperandRange::erase(unsigned subStart, unsigned subLen) { + assert((subStart + subLen) <= length && "invalid sub-range"); + if (length == 0) + return; + owner->eraseOperands(start + subStart, subLen); + updateLength(length - subLen); +} + +/// Clear this range and erase all of the operands. +void MutableOperandRange::clear() { + if (length != 0) { + owner->eraseOperands(start, length); + updateLength(/*newLength=*/0); + } +} + +/// Allow implicit conversion to an OperandRange. +MutableOperandRange::operator OperandRange() const { + return owner->getOperands().slice(start, length); +} + +/// Update the length of this range to the one provided. +void MutableOperandRange::updateLength(unsigned newLength) { + int32_t diff = int32_t(newLength) - int32_t(length); + length = newLength; + + // Update any of the provided segment attributes. + for (OperandSegment &segment : operandSegments) { + auto attr = segment.second.second.cast(); + SmallVector segments(attr.getValues()); + segments[segment.first] += diff; + segment.second.second = DenseIntElementsAttr::get(attr.getType(), segments); + owner->setAttr(segment.second.first, segment.second.second); + } +} + //===----------------------------------------------------------------------===// // ResultRange @@ -281,3 +407,86 @@ Value ValueRange::dereference_iterator(const OwnerT &owner, ptrdiff_t index) { Operation *operation = reinterpret_cast(owner.ptr.get()); return operation->getResult(owner.startIndex + index); } + +//===----------------------------------------------------------------------===// +// Operation Equivalency +//===----------------------------------------------------------------------===// + +llvm::hash_code OperationEquivalence::computeHash(Operation *op, Flags flags) { + // Hash operations based upon their: + // - Operation Name + // - Attributes + llvm::hash_code hash = llvm::hash_combine( + op->getName(), op->getMutableAttrDict().getDictionary()); + + // - Result Types + ArrayRef resultTypes = op->getResultTypes(); + switch (resultTypes.size()) { + case 0: + // We don't need to add anything to the hash. + break; + case 1: + // Add in the result type. + hash = llvm::hash_combine(hash, resultTypes.front()); + break; + default: + // Use the type buffer as the hash, as we can guarantee it is the same for + // any given range of result types. This takes advantage of the fact the + // result types >1 are stored in a TupleType and uniqued. + hash = llvm::hash_combine(hash, resultTypes.data()); + break; + } + + // - Operands + bool ignoreOperands = flags & Flags::IgnoreOperands; + if (!ignoreOperands) { + // TODO: Allow commutative operations to have different ordering. + hash = llvm::hash_combine( + hash, llvm::hash_combine_range(op->operand_begin(), op->operand_end())); + } + return hash; +} + +bool OperationEquivalence::isEquivalentTo(Operation *lhs, Operation *rhs, + Flags flags) { + if (lhs == rhs) + return true; + + // Compare the operation name. + if (lhs->getName() != rhs->getName()) + return false; + // Check operand counts. + if (lhs->getNumOperands() != rhs->getNumOperands()) + return false; + // Compare attributes. + if (lhs->getMutableAttrDict() != rhs->getMutableAttrDict()) + return false; + // Compare result types. + ArrayRef lhsResultTypes = lhs->getResultTypes(); + ArrayRef rhsResultTypes = rhs->getResultTypes(); + if (lhsResultTypes.size() != rhsResultTypes.size()) + return false; + switch (lhsResultTypes.size()) { + case 0: + break; + case 1: + // Compare the single result type. + if (lhsResultTypes.front() != rhsResultTypes.front()) + return false; + break; + default: + // Use the type buffer for the comparison, as we can guarantee it is the + // same for any given range of result types. This takes advantage of the + // fact the result types >1 are stored in a TupleType and uniqued. + if (lhsResultTypes.data() != rhsResultTypes.data()) + return false; + break; + } + // Compare operands. + bool ignoreOperands = flags & Flags::IgnoreOperands; + if (ignoreOperands) + return true; + // TODO: Allow commutative operations to have different ordering. + return std::equal(lhs->operand_begin(), lhs->operand_end(), + rhs->operand_begin()); +} diff --git a/mlir/lib/IR/Region.cpp b/mlir/lib/IR/Region.cpp index 4f5054112a41a..aa2acc00dde4f 100644 --- a/mlir/lib/IR/Region.cpp +++ b/mlir/lib/IR/Region.cpp @@ -146,34 +146,32 @@ static bool isIsolatedAbove(Region ®ion, Region &limit, // Traverse all operations in the region. while (!pendingRegions.empty()) { - for (Block &block : *pendingRegions.pop_back_val()) { - for (Operation &op : block) { - for (Value operand : op.getOperands()) { - // operand should be non-null here if the IR is well-formed. But - // we don't assert here as this function is called from the verifier - // and so could be called on invalid IR. - if (!operand) { - if (noteLoc) - op.emitOpError("block's operand not defined").attachNote(noteLoc); - return false; - } + for (Operation &op : pendingRegions.pop_back_val()->getOps()) { + for (Value operand : op.getOperands()) { + // operand should be non-null here if the IR is well-formed. But + // we don't assert here as this function is called from the verifier + // and so could be called on invalid IR. + if (!operand) { + if (noteLoc) + op.emitOpError("block's operand not defined").attachNote(noteLoc); + return false; + } - // Check that any value that is used by an operation is defined in the - // same region as either an operation result or a block argument. - if (operand.getParentRegion()->isProperAncestor(&limit)) { - if (noteLoc) { - op.emitOpError("using value defined outside the region") - .attachNote(noteLoc) - << "required by region isolation constraints"; - } - return false; + // Check that any value that is used by an operation is defined in the + // same region as either an operation result or a block argument. + if (operand.getParentRegion()->isProperAncestor(&limit)) { + if (noteLoc) { + op.emitOpError("using value defined outside the region") + .attachNote(noteLoc) + << "required by region isolation constraints"; } + return false; } - // Schedule any regions the operations contain for further checking. - pendingRegions.reserve(pendingRegions.size() + op.getNumRegions()); - for (Region &subRegion : op.getRegions()) - pendingRegions.push_back(&subRegion); } + // Schedule any regions the operations contain for further checking. + pendingRegions.reserve(pendingRegions.size() + op.getNumRegions()); + for (Region &subRegion : op.getRegions()) + pendingRegions.push_back(&subRegion); } } return true; @@ -219,6 +217,40 @@ void llvm::ilist_traits<::mlir::Block>::transferNodesFromList( first->parentValidOpOrderPair.setPointer(curParent); } +//===----------------------------------------------------------------------===// +// Region::OpIterator +//===----------------------------------------------------------------------===// + +Region::OpIterator::OpIterator(Region *region, bool end) + : region(region), block(end ? region->end() : region->begin()) { + if (!region->empty()) + skipOverBlocksWithNoOps(); +} + +Region::OpIterator &Region::OpIterator::operator++() { + // We increment over operations, if we reach the last use then move to next + // block. + if (operation != block->end()) + ++operation; + if (operation == block->end()) { + ++block; + skipOverBlocksWithNoOps(); + } + return *this; +} + +void Region::OpIterator::skipOverBlocksWithNoOps() { + while (block != region->end() && block->empty()) + ++block; + + // If we are at the last block, then set the operation to first operation of + // next block (sentinel value used for end). + if (block == region->end()) + operation = {}; + else + operation = block->begin(); +} + //===----------------------------------------------------------------------===// // RegionRange //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/StandardTypes.cpp b/mlir/lib/IR/StandardTypes.cpp index c61e1d9e2ab95..3f4a7ec6ef6ef 100644 --- a/mlir/lib/IR/StandardTypes.cpp +++ b/mlir/lib/IR/StandardTypes.cpp @@ -89,6 +89,29 @@ bool Type::isIntOrFloat() { return isa() || isa(); } bool Type::isIntOrIndexOrFloat() { return isIntOrFloat() || isIndex(); } +//===----------------------------------------------------------------------===// +/// ComplexType +//===----------------------------------------------------------------------===// + +ComplexType ComplexType::get(Type elementType) { + return Base::get(elementType.getContext(), StandardTypes::Complex, + elementType); +} + +ComplexType ComplexType::getChecked(Type elementType, Location location) { + return Base::getChecked(location, StandardTypes::Complex, elementType); +} + +/// Verify the construction of an integer type. +LogicalResult ComplexType::verifyConstructionInvariants(Location loc, + Type elementType) { + if (!elementType.isIntOrFloat()) + return emitError(loc, "invalid element type for complex"); + return success(); +} + +Type ComplexType::getElementType() { return getImpl()->elementType; } + //===----------------------------------------------------------------------===// // Integer Type //===----------------------------------------------------------------------===// @@ -612,29 +635,6 @@ LogicalResult mlir::getStridesAndOffset(MemRefType t, return success(); } -//===----------------------------------------------------------------------===// -/// ComplexType -//===----------------------------------------------------------------------===// - -ComplexType ComplexType::get(Type elementType) { - return Base::get(elementType.getContext(), StandardTypes::Complex, - elementType); -} - -ComplexType ComplexType::getChecked(Type elementType, Location location) { - return Base::getChecked(location, StandardTypes::Complex, elementType); -} - -/// Verify the construction of an integer type. -LogicalResult ComplexType::verifyConstructionInvariants(Location loc, - Type elementType) { - if (!elementType.isa() && !elementType.isSignlessInteger()) - return emitError(loc, "invalid element type for complex"); - return success(); -} - -Type ComplexType::getElementType() { return getImpl()->elementType; } - //===----------------------------------------------------------------------===// /// TupleType //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/SymbolTable.cpp b/mlir/lib/IR/SymbolTable.cpp index 2b1d99b0a363f..1d2235b619368 100644 --- a/mlir/lib/IR/SymbolTable.cpp +++ b/mlir/lib/IR/SymbolTable.cpp @@ -146,11 +146,6 @@ void SymbolTable::insert(Operation *symbol, Block::iterator insertPt) { setSymbolName(symbol, nameBuffer); } -/// Returns true if the given operation defines a symbol. -bool SymbolTable::isSymbol(Operation *op) { - return op->hasTrait() || getNameIfSymbol(op).hasValue(); -} - /// Returns the name of the given symbol operation. StringRef SymbolTable::getSymbolName(Operation *symbol) { Optional name = getNameIfSymbol(symbol); @@ -212,6 +207,35 @@ Operation *SymbolTable::getNearestSymbolTable(Operation *from) { return from; } +/// Walks all symbol table operations nested within, and including, `op`. For +/// each symbol table operation, the provided callback is invoked with the op +/// and a boolean signifying if the symbols within that symbol table can be +/// treated as if all uses are visible. `allSymUsesVisible` identifies whether +/// all of the symbol uses of symbols within `op` are visible. +void SymbolTable::walkSymbolTables( + Operation *op, bool allSymUsesVisible, + function_ref callback) { + bool isSymbolTable = op->hasTrait(); + if (isSymbolTable) { + SymbolOpInterface symbol = dyn_cast(op); + allSymUsesVisible |= !symbol || symbol.isPrivate(); + } else { + // Otherwise if 'op' is not a symbol table, any nested symbols are + // guaranteed to be hidden. + allSymUsesVisible = true; + } + + for (Region ®ion : op->getRegions()) + for (Block &block : region) + for (Operation &nestedOp : block) + walkSymbolTables(&nestedOp, allSymUsesVisible, callback); + + // If 'op' had the symbol table trait, visit it after any nested symbol + // tables. + if (isSymbolTable) + callback(op, allSymUsesVisible); +} + /// Returns the operation registered with the given symbol name with the /// regions of 'symbolTableOp'. 'symbolTableOp' is required to be an operation /// with the 'OpTrait::SymbolTable' trait. Returns nullptr if no valid symbol @@ -221,11 +245,9 @@ Operation *SymbolTable::lookupSymbolIn(Operation *symbolTableOp, assert(symbolTableOp->hasTrait()); // Look for a symbol with the given name. - for (auto &block : symbolTableOp->getRegion(0)) { - for (auto &op : block) - if (getNameIfSymbol(&op) == symbol) - return &op; - } + for (auto &op : symbolTableOp->getRegion(0).front().without_terminator()) + if (getNameIfSymbol(&op) == symbol) + return &op; return nullptr; } Operation *SymbolTable::lookupSymbolIn(Operation *symbolTableOp, @@ -286,7 +308,7 @@ Operation *SymbolTable::lookupNearestSymbolFrom(Operation *from, // SymbolTable Trait Types //===----------------------------------------------------------------------===// -LogicalResult OpTrait::impl::verifySymbolTable(Operation *op) { +LogicalResult detail::verifySymbolTable(Operation *op) { if (op->getNumRegions() != 1) return op->emitOpError() << "Operations with a 'SymbolTable' must have exactly one region"; @@ -316,7 +338,7 @@ LogicalResult OpTrait::impl::verifySymbolTable(Operation *op) { return success(); } -LogicalResult OpTrait::impl::verifySymbol(Operation *op) { +LogicalResult detail::verifySymbol(Operation *op) { // Verify the name attribute. if (!op->getAttrOfType(mlir::SymbolTable::getSymbolAttrName())) return op->emitOpError() << "requires string attribute '" @@ -361,7 +383,7 @@ static WalkResult walkSymbolRefs( Operation *op, function_ref)> callback) { // Check to see if the operation has any attributes. - DictionaryAttr attrDict = op->getAttrList().getDictionary(); + DictionaryAttr attrDict = op->getMutableAttrDict().getDictionary(); if (!attrDict) return WalkResult::advance(); @@ -420,21 +442,19 @@ static Optional walkSymbolUses( function_ref)> callback) { SmallVector worklist(llvm::make_pointer_range(regions)); while (!worklist.empty()) { - for (Block &block : *worklist.pop_back_val()) { - for (Operation &op : block) { - if (walkSymbolRefs(&op, callback).wasInterrupted()) - return WalkResult::interrupt(); - - // Check that this isn't a potentially unknown symbol table. - if (isPotentiallyUnknownSymbolTable(&op)) - return llvm::None; - - // If this op defines a new symbol table scope, we can't traverse. Any - // symbol references nested within 'op' are different semantically. - if (!op.hasTrait()) { - for (Region ®ion : op.getRegions()) - worklist.push_back(®ion); - } + for (Operation &op : worklist.pop_back_val()->getOps()) { + if (walkSymbolRefs(&op, callback).wasInterrupted()) + return WalkResult::interrupt(); + + // Check that this isn't a potentially unknown symbol table. + if (isPotentiallyUnknownSymbolTable(&op)) + return llvm::None; + + // If this op defines a new symbol table scope, we can't traverse. Any + // symbol references nested within 'op' are different semantically. + if (!op.hasTrait()) { + for (Region ®ion : op.getRegions()) + worklist.push_back(®ion); } } } @@ -779,7 +799,7 @@ replaceAllSymbolUsesImpl(SymbolT symbol, StringRef newSymbol, IRUnitT *limit) { // Generate a new attribute dictionary for the current operation by replacing // references to the old symbol. auto generateNewAttrDict = [&] { - auto oldDict = curOp->getAttrList().getDictionary(); + auto oldDict = curOp->getMutableAttrDict().getDictionary(); auto newDict = rebuildAttrAfterRAUW(oldDict, accessChains, /*depth=*/0); return newDict.cast(); }; @@ -866,3 +886,10 @@ LogicalResult SymbolTable::replaceAllSymbolUses(Operation *oldSymbol, Region *from) { return replaceAllSymbolUsesImpl(oldSymbol, newSymbol, from); } + +//===----------------------------------------------------------------------===// +// Symbol Interfaces +//===----------------------------------------------------------------------===// + +/// Include the generated symbol interfaces. +#include "mlir/IR/SymbolInterfaces.cpp.inc" diff --git a/mlir/lib/IR/Value.cpp b/mlir/lib/IR/Value.cpp index fdc5ad6be8879..6467a7f2295b3 100644 --- a/mlir/lib/IR/Value.cpp +++ b/mlir/lib/IR/Value.cpp @@ -87,6 +87,13 @@ Region *Value::getParentRegion() { return cast().getOwner()->getParent(); } +/// Return the Block in which this Value is defined. +Block *Value::getParentBlock() { + if (Operation *op = getDefiningOp()) + return op->getBlock(); + return cast().getOwner(); +} + //===----------------------------------------------------------------------===// // Value::UseLists //===----------------------------------------------------------------------===// @@ -134,6 +141,13 @@ void Value::replaceUsesWithIf(Value newValue, use.set(newValue); } +/// Returns true if the value is used outside of the given block. +bool Value::isUsedOutsideOfBlock(Block *block) { + return llvm::any_of(getUsers(), [block](Operation *user) { + return user->getBlock() != block; + }); +} + //===--------------------------------------------------------------------===// // Uses diff --git a/mlir/lib/Analysis/Verifier.cpp b/mlir/lib/IR/Verifier.cpp similarity index 99% rename from mlir/lib/Analysis/Verifier.cpp rename to mlir/lib/IR/Verifier.cpp index e15d3513b7de5..763758193c5df 100644 --- a/mlir/lib/Analysis/Verifier.cpp +++ b/mlir/lib/IR/Verifier.cpp @@ -24,10 +24,10 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Analysis/Verifier.h" -#include "mlir/Analysis/Dominance.h" +#include "mlir/IR/Verifier.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Dialect.h" +#include "mlir/IR/Dominance.h" #include "mlir/IR/Operation.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/FormatVariadic.h" diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt index 093b4faf36137..61b64148db41a 100644 --- a/mlir/lib/Interfaces/CMakeLists.txt +++ b/mlir/lib/Interfaces/CMakeLists.txt @@ -16,9 +16,8 @@ add_mlir_library(MLIRCallInterfaces DEPENDS MLIRCallInterfacesIncGen - ) -target_link_libraries(MLIRCallInterfaces - PUBLIC + + LINK_LIBS PUBLIC MLIRIR ) @@ -30,9 +29,8 @@ add_mlir_library(MLIRControlFlowInterfaces DEPENDS MLIRControlFlowInterfacesIncGen - ) -target_link_libraries(MLIRControlFlowInterfaces - PUBLIC + + LINK_LIBS PUBLIC MLIRIR ) @@ -44,9 +42,8 @@ add_mlir_library(MLIRDerivedAttributeOpInterface DEPENDS MLIRDerivedAttributeOpInterfaceIncGen - ) -target_link_libraries(MLIRDerivedAttributeOpInterface - PUBLIC + + LINK_LIBS PUBLIC MLIRIR ) @@ -58,9 +55,8 @@ add_mlir_library(MLIRInferTypeOpInterface DEPENDS MLIRInferTypeOpInterfaceIncGen - ) -target_link_libraries(MLIRInferTypeOpInterface - PUBLIC + + LINK_LIBS PUBLIC MLIRIR ) @@ -72,9 +68,8 @@ add_mlir_library(MLIRLoopLikeInterface DEPENDS MLIRLoopLikeInterfaceIncGen - ) -target_link_libraries(MLIRLoopLikeInterface - PUBLIC + + LINK_LIBS PUBLIC MLIRIR ) @@ -86,9 +81,8 @@ add_mlir_library(MLIRSideEffects DEPENDS MLIRSideEffectOpInterfacesIncGen - ) -target_link_libraries(MLIRSideEffects - PUBLIC + + LINK_LIBS PUBLIC MLIRIR ) @@ -100,8 +94,7 @@ add_mlir_library(MLIRViewLikeInterface DEPENDS MLIRViewLikeInterfaceIncGen - ) -target_link_libraries(MLIRViewLikeInterface - PUBLIC + + LINK_LIBS PUBLIC MLIRIR ) diff --git a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp index 746dd402a35ac..c1fa833f26daf 100644 --- a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp +++ b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp @@ -21,39 +21,6 @@ using namespace mlir; // BranchOpInterface //===----------------------------------------------------------------------===// -/// Erase an operand from a branch operation that is used as a successor -/// operand. 'operandIndex' is the operand within 'operands' to be erased. -void mlir::detail::eraseBranchSuccessorOperand(OperandRange operands, - unsigned operandIndex, - Operation *op) { - assert(operandIndex < operands.size() && - "invalid index for successor operands"); - - // Erase the operand from the operation. - size_t fullOperandIndex = operands.getBeginOperandIndex() + operandIndex; - op->eraseOperand(fullOperandIndex); - - // If this operation has an OperandSegmentSizeAttr, keep it up to date. - auto operandSegmentAttr = - op->getAttrOfType("operand_segment_sizes"); - if (!operandSegmentAttr) - return; - - // Find the segment containing the full operand index and decrement it. - // TODO: This seems like a general utility that could be added somewhere. - SmallVector values(operandSegmentAttr.getValues()); - unsigned currentSize = 0; - for (unsigned i = 0, e = values.size(); i != e; ++i) { - currentSize += values[i]; - if (fullOperandIndex < currentSize) { - --values[i]; - break; - } - } - op->setAttr("operand_segment_sizes", - DenseIntElementsAttr::get(operandSegmentAttr.getType(), values)); -} - /// Returns the `BlockArgument` corresponding to operand `operandIndex` in some /// successor if 'operandIndex' is within the range of 'operands', or None if /// `operandIndex` isn't a successor operand index. diff --git a/mlir/lib/Parser/CMakeLists.txt b/mlir/lib/Parser/CMakeLists.txt index 019f8bafa0c00..b9ab3f33ba201 100644 --- a/mlir/lib/Parser/CMakeLists.txt +++ b/mlir/lib/Parser/CMakeLists.txt @@ -5,8 +5,7 @@ add_mlir_library(MLIRParser ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Parser - ) -target_link_libraries(MLIRParser - PUBLIC + + LINK_LIBS PUBLIC MLIRIR - MLIRAnalysis) + ) diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp index 8ba391de7baf7..d129b867fb0c4 100644 --- a/mlir/lib/Parser/Parser.cpp +++ b/mlir/lib/Parser/Parser.cpp @@ -12,7 +12,6 @@ #include "mlir/Parser.h" #include "Lexer.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/Attributes.h" @@ -25,6 +24,7 @@ #include "mlir/IR/Module.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/StandardTypes.h" +#include "mlir/IR/Verifier.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringExtras.h" @@ -1676,6 +1676,7 @@ Parser::parseAttributeDict(SmallVectorImpl &attributes) { if (parseToken(Token::l_brace, "expected '{' in attribute dictionary")) return failure(); + llvm::SmallDenseSet seenKeys; auto parseElt = [&]() -> ParseResult { // The name of an attribute can either be a bare identifier, or a string. Optional nameId; @@ -1686,6 +1687,8 @@ Parser::parseAttributeDict(SmallVectorImpl &attributes) { nameId = builder.getIdentifier(getTokenSpelling()); else return emitError("expected attribute name"); + if (!seenKeys.insert(*nameId).second) + return emitError("duplicate key in dictionary attribute"); consumeToken(); // Try to parse the '=' for the attribute value. @@ -5027,7 +5030,7 @@ ParseResult ModuleParser::parseModule(ModuleOp module) { if (nested && std::next(operations.begin(), 2) == operations.end()) { // Merge the data of the nested module operation into 'module'. module.setLoc(nested.getLoc()); - module.setAttrs(nested.getOperation()->getAttrList()); + module.setAttrs(nested.getOperation()->getMutableAttrDict()); bodyBlocks.splice(bodyBlocks.end(), nested.getBodyRegion().getBlocks()); // Erase the original module body. diff --git a/mlir/lib/Pass/CMakeLists.txt b/mlir/lib/Pass/CMakeLists.txt index 7e86864cf6c46..c012b056dba39 100644 --- a/mlir/lib/Pass/CMakeLists.txt +++ b/mlir/lib/Pass/CMakeLists.txt @@ -4,9 +4,11 @@ add_mlir_library(MLIRPass ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Pass - ) -target_link_libraries(MLIRPass - PUBLIC + + DEPENDS + mlir-generic-headers + + LINK_LIBS PUBLIC MLIRAnalysis MLIRIR - LLVMSupport) + ) diff --git a/mlir/lib/Pass/IRPrinting.cpp b/mlir/lib/Pass/IRPrinting.cpp index 3398005d8bfa3..842b83c5d0f7b 100644 --- a/mlir/lib/Pass/IRPrinting.cpp +++ b/mlir/lib/Pass/IRPrinting.cpp @@ -33,8 +33,9 @@ class OperationFingerPrint { // - Operation pointer addDataToHash(hasher, op); // - Attributes - addDataToHash(hasher, - op->getAttrList().getDictionary().getAsOpaquePointer()); + addDataToHash( + hasher, + op->getMutableAttrDict().getDictionary().getAsOpaquePointer()); // - Blocks in Regions for (Region ®ion : op->getRegions()) { for (Block &block : region) { @@ -98,7 +99,7 @@ class IRPrinterInstrumentation : public PassInstrumentation { /// Returns true if the given pass is hidden from IR printing. static bool isHiddenPass(Pass *pass) { - return isAdaptorPass(pass) || isa(pass); + return isa(pass) || isa(pass); } static void printIR(Operation *op, bool printModuleScope, raw_ostream &out, @@ -172,7 +173,7 @@ void IRPrinterInstrumentation::runAfterPass(Pass *pass, Operation *op) { } void IRPrinterInstrumentation::runAfterPassFailed(Pass *pass, Operation *op) { - if (isAdaptorPass(pass)) + if (isa(pass)) return; if (config->shouldPrintAfterOnlyOnChange()) beforePassFingerPrints.erase(pass); @@ -256,7 +257,8 @@ struct BasicIRPrinterConfig : public PassManager::IRPrinterConfig { /// Add an instrumentation to print the IR before and after pass execution, /// using the provided configuration. void PassManager::enableIRPrinting(std::unique_ptr config) { - if (config->shouldPrintAtModuleScope() && isMultithreadingEnabled()) + if (config->shouldPrintAtModuleScope() && + getContext()->isMultithreadingEnabled()) llvm::report_fatal_error("IR printing can't be setup on a pass-manager " "without disabling multi-threading first."); addInstrumentation( diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index 53ccd4f005a47..83855faca7552 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -12,16 +12,18 @@ #include "mlir/Pass/Pass.h" #include "PassDetail.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/Module.h" +#include "mlir/IR/Verifier.h" #include "mlir/Support/FileUtilities.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/CrashRecoveryContext.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Parallel.h" +#include "llvm/Support/Signals.h" #include "llvm/Support/Threading.h" #include "llvm/Support/ToolOutputFile.h" @@ -51,7 +53,7 @@ void Pass::copyOptionValuesFrom(const Pass *other) { /// an adaptor pass, print with the op_name(sub_pass,...) format. void Pass::printAsTextualPipeline(raw_ostream &os) { // Special case for adaptors to use the 'op_name(sub_passes)' format. - if (auto *adaptor = getAdaptorPassBase(this)) { + if (auto *adaptor = dyn_cast(this)) { llvm::interleaveComma(adaptor->getPassManagers(), os, [&](OpPassManager &pm) { os << pm.getOpName() << "("; @@ -116,28 +118,39 @@ void VerifierPass::runOnOperation() { namespace mlir { namespace detail { struct OpPassManagerImpl { - OpPassManagerImpl(OperationName name, bool disableThreads, bool verifyPasses) - : name(name), disableThreads(disableThreads), verifyPasses(verifyPasses) { - } + OpPassManagerImpl(OperationName name, bool verifyPasses) + : name(name), verifyPasses(verifyPasses) {} /// Merge the passes of this pass manager into the one provided. - void mergeInto(OpPassManagerImpl &rhs) { - assert(name == rhs.name && "merging unrelated pass managers"); - for (auto &pass : passes) - rhs.passes.push_back(std::move(pass)); - passes.clear(); + void mergeInto(OpPassManagerImpl &rhs); + + /// Nest a new operation pass manager for the given operation kind under this + /// pass manager. + OpPassManager &nest(const OperationName &nestedName); + OpPassManager &nest(StringRef nestedName) { + return nest(OperationName(nestedName, getContext())); } + /// Add the given pass to this pass manager. If this pass has a concrete + /// operation type, it must be the same type as this pass manager. + void addPass(std::unique_ptr pass); + /// Coalesce adjacent AdaptorPasses into one large adaptor. This runs /// recursively through the pipeline graph. void coalesceAdjacentAdaptorPasses(); + /// Split all of AdaptorPasses such that each adaptor only contains one leaf + /// pass. + void splitAdaptorPasses(); + + /// Return an instance of the context. + MLIRContext *getContext() const { + return name.getAbstractOperation()->dialect.getContext(); + } + /// The name of the operation that passes of this pass manager operate on. OperationName name; - /// Flag to disable multi-threading of passes. - bool disableThreads : 1; - /// Flag that specifies if the IR should be verified after each pass has run. bool verifyPasses : 1; @@ -147,20 +160,44 @@ struct OpPassManagerImpl { } // end namespace detail } // end namespace mlir -/// Coalesce adjacent AdaptorPasses into one large adaptor. This runs -/// recursively through the pipeline graph. +void OpPassManagerImpl::mergeInto(OpPassManagerImpl &rhs) { + assert(name == rhs.name && "merging unrelated pass managers"); + for (auto &pass : passes) + rhs.passes.push_back(std::move(pass)); + passes.clear(); +} + +OpPassManager &OpPassManagerImpl::nest(const OperationName &nestedName) { + OpPassManager nested(nestedName, verifyPasses); + auto *adaptor = new OpToOpPassAdaptor(std::move(nested)); + addPass(std::unique_ptr(adaptor)); + return adaptor->getPassManagers().front(); +} + +void OpPassManagerImpl::addPass(std::unique_ptr pass) { + // If this pass runs on a different operation than this pass manager, then + // implicitly nest a pass manager for this operation. + auto passOpName = pass->getOpName(); + if (passOpName && passOpName != name.getStringRef()) + return nest(*passOpName).addPass(std::move(pass)); + + passes.emplace_back(std::move(pass)); + if (verifyPasses) + passes.emplace_back(std::make_unique()); +} + void OpPassManagerImpl::coalesceAdjacentAdaptorPasses() { // Bail out early if there are no adaptor passes. if (llvm::none_of(passes, [](std::unique_ptr &pass) { - return isAdaptorPass(pass.get()); + return isa(pass.get()); })) return; // Walk the pass list and merge adjacent adaptors. - OpToOpPassAdaptorBase *lastAdaptor = nullptr; + OpToOpPassAdaptor *lastAdaptor = nullptr; for (auto it = passes.begin(), e = passes.end(); it != e; ++it) { // Check to see if this pass is an adaptor. - if (auto *currentAdaptor = getAdaptorPassBase(it->get())) { + if (auto *currentAdaptor = dyn_cast(it->get())) { // If it is the first adaptor in a possible chain, remember it and // continue. if (!lastAdaptor) { @@ -199,13 +236,37 @@ void OpPassManagerImpl::coalesceAdjacentAdaptorPasses() { llvm::erase_if(passes, std::logical_not>()); } +void OpPassManagerImpl::splitAdaptorPasses() { + std::vector> oldPasses; + std::swap(passes, oldPasses); + + for (std::unique_ptr &pass : oldPasses) { + // If this pass isn't an adaptor, move it directly to the new pass list. + auto *currentAdaptor = dyn_cast(pass.get()); + if (!currentAdaptor) { + passes.push_back(std::move(pass)); + continue; + } + + // Otherwise, split the adaptors of each manager within the adaptor. + for (OpPassManager &adaptorPM : currentAdaptor->getPassManagers()) { + adaptorPM.getImpl().splitAdaptorPasses(); + + // Add all non-verifier passes to this pass manager. + for (std::unique_ptr &nestedPass : adaptorPM.getImpl().passes) { + if (!isa(nestedPass.get())) + nest(adaptorPM.getOpName()).addPass(std::move(nestedPass)); + } + } + } +} + //===----------------------------------------------------------------------===// // OpPassManager //===----------------------------------------------------------------------===// -OpPassManager::OpPassManager(OperationName name, bool disableThreads, - bool verifyPasses) - : impl(new OpPassManagerImpl(name, disableThreads, verifyPasses)) { +OpPassManager::OpPassManager(OperationName name, bool verifyPasses) + : impl(new OpPassManagerImpl(name, verifyPasses)) { assert(name.getAbstractOperation() && "OpPassManager can only operate on registered operations"); assert(name.getAbstractOperation()->hasProperty( @@ -216,8 +277,7 @@ OpPassManager::OpPassManager(OperationName name, bool disableThreads, OpPassManager::OpPassManager(OpPassManager &&rhs) : impl(std::move(rhs.impl)) {} OpPassManager::OpPassManager(const OpPassManager &rhs) { *this = rhs; } OpPassManager &OpPassManager::operator=(const OpPassManager &rhs) { - impl.reset(new OpPassManagerImpl(rhs.impl->name, rhs.impl->disableThreads, - rhs.impl->verifyPasses)); + impl.reset(new OpPassManagerImpl(rhs.impl->name, rhs.impl->verifyPasses)); for (auto &pass : rhs.impl->passes) impl->passes.emplace_back(pass->clone()); return *this; @@ -242,36 +302,16 @@ LogicalResult OpPassManager::run(Operation *op, AnalysisManager am) { /// Nest a new operation pass manager for the given operation kind under this /// pass manager. OpPassManager &OpPassManager::nest(const OperationName &nestedName) { - OpPassManager nested(nestedName, impl->disableThreads, impl->verifyPasses); - - /// Create an adaptor for this pass. If multi-threading is disabled, then - /// create a synchronous adaptor. - if (impl->disableThreads || !llvm::llvm_is_multithreaded()) { - auto *adaptor = new OpToOpPassAdaptor(std::move(nested)); - addPass(std::unique_ptr(adaptor)); - return adaptor->getPassManagers().front(); - } - - auto *adaptor = new OpToOpPassAdaptorParallel(std::move(nested)); - addPass(std::unique_ptr(adaptor)); - return adaptor->getPassManagers().front(); + return impl->nest(nestedName); } OpPassManager &OpPassManager::nest(StringRef nestedName) { - return nest(OperationName(nestedName, getContext())); + return impl->nest(nestedName); } /// Add the given pass to this pass manager. If this pass has a concrete /// operation type, it must be the same type as this pass manager. void OpPassManager::addPass(std::unique_ptr pass) { - // If this pass runs on a different operation than this pass manager, then - // implicitly nest a pass manager for this operation. - auto passOpName = pass->getOpName(); - if (passOpName && passOpName != impl->name.getStringRef()) - return nest(*passOpName).addPass(std::move(pass)); - - impl->passes.emplace_back(std::move(pass)); - if (impl->verifyPasses) - impl->passes.emplace_back(std::make_unique()); + impl->addPass(std::move(pass)); } /// Returns the number of passes held by this manager. @@ -281,19 +321,17 @@ size_t OpPassManager::size() const { return impl->passes.size(); } OpPassManagerImpl &OpPassManager::getImpl() { return *impl; } /// Return an instance of the context. -MLIRContext *OpPassManager::getContext() const { - return impl->name.getAbstractOperation()->dialect.getContext(); -} +MLIRContext *OpPassManager::getContext() const { return impl->getContext(); } /// Return the operation name that this pass manager operates on. const OperationName &OpPassManager::getOpName() const { return impl->name; } -/// Prints out the passes of the pass manager as the textual representation -/// of pipelines. -void OpPassManager::printAsTextualPipeline(raw_ostream &os) { +/// Prints out the given passes as the textual representation of a pipeline. +static void printAsTextualPipeline(ArrayRef> passes, + raw_ostream &os) { // Filter out passes that are not part of the public pipeline. - auto filteredPasses = llvm::make_filter_range( - impl->passes, [](const std::unique_ptr &pass) { + auto filteredPasses = + llvm::make_filter_range(passes, [](const std::unique_ptr &pass) { return !isa(pass); }); llvm::interleaveComma(filteredPasses, os, @@ -302,6 +340,12 @@ void OpPassManager::printAsTextualPipeline(raw_ostream &os) { }); } +/// Prints out the passes of the pass manager as the textual representation +/// of pipelines. +void OpPassManager::printAsTextualPipeline(raw_ostream &os) { + ::printAsTextualPipeline(impl->passes, os); +} + //===----------------------------------------------------------------------===// // OpToOpPassAdaptor //===----------------------------------------------------------------------===// @@ -330,12 +374,12 @@ static OpPassManager *findPassManagerFor(MutableArrayRef mgrs, return it == mgrs.end() ? nullptr : &*it; } -OpToOpPassAdaptorBase::OpToOpPassAdaptorBase(OpPassManager &&mgr) { +OpToOpPassAdaptor::OpToOpPassAdaptor(OpPassManager &&mgr) { mgrs.emplace_back(std::move(mgr)); } /// Merge the current pass adaptor into given 'rhs'. -void OpToOpPassAdaptorBase::mergeInto(OpToOpPassAdaptorBase &rhs) { +void OpToOpPassAdaptor::mergeInto(OpToOpPassAdaptor &rhs) { for (auto &pm : mgrs) { // If an existing pass manager exists, then merge the given pass manager // into it. @@ -357,7 +401,7 @@ void OpToOpPassAdaptorBase::mergeInto(OpToOpPassAdaptorBase &rhs) { } /// Returns the adaptor pass name. -std::string OpToOpPassAdaptorBase::getName() { +std::string OpToOpPassAdaptor::getAdaptorName() { std::string name = "Pipeline Collection : ["; llvm::raw_string_ostream os(name); llvm::interleaveComma(getPassManagers(), os, [&](OpPassManager &pm) { @@ -367,11 +411,16 @@ std::string OpToOpPassAdaptorBase::getName() { return os.str(); } -OpToOpPassAdaptor::OpToOpPassAdaptor(OpPassManager &&mgr) - : OpToOpPassAdaptorBase(std::move(mgr)) {} - /// Run the held pipeline over all nested operations. void OpToOpPassAdaptor::runOnOperation() { + if (getContext().isMultithreadingEnabled()) + runOnOperationAsyncImpl(); + else + runOnOperationImpl(); +} + +/// Run this pass adaptor synchronously. +void OpToOpPassAdaptor::runOnOperationImpl() { auto am = getAnalysisManager(); PassInstrumentation::PipelineParentInfo parentInfo = {llvm::get_threadid(), this}; @@ -397,9 +446,6 @@ void OpToOpPassAdaptor::runOnOperation() { } } -OpToOpPassAdaptorParallel::OpToOpPassAdaptorParallel(OpPassManager &&mgr) - : OpToOpPassAdaptorBase(std::move(mgr)) {} - /// Utility functor that checks if the two ranges of pass managers have a size /// mismatch. static bool hasSizeMismatch(ArrayRef lhs, @@ -409,8 +455,8 @@ static bool hasSizeMismatch(ArrayRef lhs, [&](size_t i) { return lhs[i].size() != rhs[i].size(); }); } -// Run the held pipeline asynchronously across the functions within the module. -void OpToOpPassAdaptorParallel::runOnOperation() { +/// Run this pass adaptor synchronously. +void OpToOpPassAdaptor::runOnOperationAsyncImpl() { AnalysisManager am = getAnalysisManager(); // Create the async executors if they haven't been created, or if the main @@ -491,78 +537,171 @@ void OpToOpPassAdaptorParallel::runOnOperation() { signalPassFailure(); } -/// Utility function to convert the given class to the base adaptor it is an -/// adaptor pass, returns nullptr otherwise. -OpToOpPassAdaptorBase *mlir::detail::getAdaptorPassBase(Pass *pass) { - if (auto *adaptor = dyn_cast(pass)) - return adaptor; - if (auto *adaptor = dyn_cast(pass)) - return adaptor; - return nullptr; -} - //===----------------------------------------------------------------------===// // PassCrashReproducer //===----------------------------------------------------------------------===// -/// Safely run the pass manager over the given module, creating a reproducible -/// on failure or crash. -static LogicalResult runWithCrashRecovery(OpPassManager &pm, - ModuleAnalysisManager &am, - ModuleOp module, - StringRef crashReproducerFileName) { - /// Enable crash recovery. - llvm::CrashRecoveryContext::Enable(); +namespace { +/// This class contains all of the context for generating a recovery reproducer. +/// Each recovery context is registered globally to allow for generating +/// reproducers when a signal is raised, such as a segfault. +struct RecoveryReproducerContext { + RecoveryReproducerContext(MutableArrayRef> passes, + ModuleOp module, StringRef filename, + bool disableThreads, bool verifyPasses); + ~RecoveryReproducerContext(); + + /// Generate a reproducer with the current context. + LogicalResult generate(std::string &error); - // Grab the textual pipeline executing within the pass manager first, just in - // case the pass manager becomes compromised. +private: + /// This function is invoked in the event of a crash. + static void crashHandler(void *); + + /// Register a signal handler to run in the event of a crash. + static void registerSignalHandler(); + + /// The textual description of the currently executing pipeline. std::string pipeline; + + /// The MLIR module representing the IR before the crash. + OwningModuleRef module; + + /// The filename to use when generating the reproducer. + StringRef filename; + + /// Various pass manager and context flags. + bool disableThreads; + bool verifyPasses; + + /// The current set of active reproducer contexts. This is used in the event + /// of a crash. This is not thread_local as the pass manager may produce any + /// number of child threads. This uses a set to allow for multiple MLIR pass + /// managers to be running at the same time. + static llvm::ManagedStatic> reproducerMutex; + static llvm::ManagedStatic< + llvm::SmallSetVector> + reproducerSet; +}; +} // end anonymous namespace + +llvm::ManagedStatic> + RecoveryReproducerContext::reproducerMutex; +llvm::ManagedStatic> + RecoveryReproducerContext::reproducerSet; + +RecoveryReproducerContext::RecoveryReproducerContext( + MutableArrayRef> passes, ModuleOp module, + StringRef filename, bool disableThreads, bool verifyPasses) + : module(module.clone()), filename(filename), + disableThreads(disableThreads), verifyPasses(verifyPasses) { + // Grab the textual pipeline being executed.. { llvm::raw_string_ostream pipelineOS(pipeline); - pm.printAsTextualPipeline(pipelineOS); + ::printAsTextualPipeline(passes, pipelineOS); } - // Clone the initial module before running it through the pass pipeline. - OwningModuleRef reproducerModule = module.clone(); - - // Safely invoke the pass manager within a recovery context. - LogicalResult passManagerResult = failure(); - llvm::CrashRecoveryContext recoveryContext; - recoveryContext.RunSafelyOnThread( - [&] { passManagerResult = pm.run(module, am); }); + // Make sure that the handler is registered, and update the current context. + llvm::sys::SmartScopedLock producerLock(*reproducerMutex); + registerSignalHandler(); + reproducerSet->insert(this); +} - /// Disable crash recovery. - llvm::CrashRecoveryContext::Disable(); - if (succeeded(passManagerResult)) - return success(); +RecoveryReproducerContext::~RecoveryReproducerContext() { + llvm::sys::SmartScopedLock producerLock(*reproducerMutex); + reproducerSet->remove(this); +} - // The conversion failed, so generate a reproducible. - std::string error; +LogicalResult RecoveryReproducerContext::generate(std::string &error) { std::unique_ptr outputFile = - mlir::openOutputFile(crashReproducerFileName, &error); + mlir::openOutputFile(filename, &error); if (!outputFile) - return emitError(UnknownLoc::get(pm.getContext()), - ": ") - << error; + return failure(); auto &outputOS = outputFile->os(); // Output the current pass manager configuration. outputOS << "// configuration: -pass-pipeline='" << pipeline << "'"; - if (pm.getImpl().disableThreads) - outputOS << " -disable-pass-threading"; + if (disableThreads) + outputOS << " -mlir-disable-threading"; - // TODO(riverriddle) Should this also be configured with a pass manager flag? - outputOS << "\n// note: verifyPasses=" - << (pm.getImpl().verifyPasses ? "true" : "false") << "\n"; + // TODO: Should this also be configured with a pass manager flag? + outputOS << "\n// note: verifyPasses=" << (verifyPasses ? "true" : "false") + << "\n"; // Output the .mlir module. - reproducerModule->print(outputOS); + module->print(outputOS); outputFile->keep(); + return success(); +} + +void RecoveryReproducerContext::crashHandler(void *) { + // Walk the current stack of contexts and generate a reproducer for each one. + // We can't know for certain which one was the cause, so we need to generate + // a reproducer for all of them. + std::string ignored; + for (RecoveryReproducerContext *context : *reproducerSet) + context->generate(ignored); +} + +void RecoveryReproducerContext::registerSignalHandler() { + // Ensure that the handler is only registered once. + static bool registered = + (llvm::sys::AddSignalHandler(crashHandler, nullptr), false); + (void)registered; +} + +/// Run the pass manager with crash recover enabled. +LogicalResult PassManager::runWithCrashRecovery(ModuleOp module, + AnalysisManager am) { + // If this isn't a local producer, run all of the passes in recovery mode. + if (!localReproducer) + return runWithCrashRecovery(impl->passes, module, am); + + // Split the passes within adaptors to ensure that each pass can be run in + // isolation. + impl->splitAdaptorPasses(); + + // If this is a local producer, run each of the passes individually. If the + // verifier is enabled, each pass will have a verifier after. This is included + // in the recovery run. + unsigned stride = impl->verifyPasses ? 2 : 1; + MutableArrayRef> passes = impl->passes; + for (unsigned i = 0, e = passes.size(); i != e; i += stride) { + if (failed(runWithCrashRecovery(passes.slice(i, stride), module, am))) + return failure(); + } + return success(); +} - return reproducerModule->emitError() +/// Run the given passes with crash recover enabled. +LogicalResult +PassManager::runWithCrashRecovery(MutableArrayRef> passes, + ModuleOp module, AnalysisManager am) { + RecoveryReproducerContext context(passes, module, *crashReproducerFileName, + !getContext()->isMultithreadingEnabled(), + impl->verifyPasses); + + // Safely invoke the passes within a recovery context. + llvm::CrashRecoveryContext::Enable(); + LogicalResult passManagerResult = failure(); + llvm::CrashRecoveryContext recoveryContext; + recoveryContext.RunSafelyOnThread([&] { + for (std::unique_ptr &pass : passes) + if (failed(pass->run(module, am))) + return; + passManagerResult = success(); + }); + llvm::CrashRecoveryContext::Disable(); + if (succeeded(passManagerResult)) + return success(); + + std::string error; + if (failed(context.generate(error))) + return module.emitError(": ") << error; + return module.emitError() << "A failure has been detected while processing the MLIR module, a " "reproducer has been generated in '" - << crashReproducerFileName << "'"; + << *crashReproducerFileName << "'"; } //===----------------------------------------------------------------------===// @@ -571,8 +710,8 @@ static LogicalResult runWithCrashRecovery(OpPassManager &pm, PassManager::PassManager(MLIRContext *ctx, bool verifyPasses) : OpPassManager(OperationName(ModuleOp::getOperationName(), ctx), - /*disableThreads=*/false, verifyPasses), - passTiming(false) {} + verifyPasses), + passTiming(false), localReproducer(false) {} PassManager::~PassManager() {} @@ -587,10 +726,9 @@ LogicalResult PassManager::run(ModuleOp module) { // If reproducer generation is enabled, run the pass manager with crash // handling enabled. - LogicalResult result = - crashReproducerFileName - ? runWithCrashRecovery(*this, am, module, *crashReproducerFileName) - : OpPassManager::run(module, am); + LogicalResult result = crashReproducerFileName + ? runWithCrashRecovery(module, am) + : OpPassManager::run(module, am); // Dump all of the pass statistics if necessary. if (passStatisticsMode) @@ -598,20 +736,15 @@ LogicalResult PassManager::run(ModuleOp module) { return result; } -/// Disable support for multi-threading within the pass manager. -void PassManager::disableMultithreading(bool disable) { - getImpl().disableThreads = disable; -} - -bool PassManager::isMultithreadingEnabled() { - return !getImpl().disableThreads; -} - /// Enable support for the pass manager to generate a reproducer on the event /// of a crash or a pass failure. `outputFile` is a .mlir filename used to write -/// the generated reproducer. -void PassManager::enableCrashReproducerGeneration(StringRef outputFile) { +/// the generated reproducer. If `genLocalReproducer` is true, the pass manager +/// will attempt to generate a local reproducer that contains the smallest +/// pipeline. +void PassManager::enableCrashReproducerGeneration(StringRef outputFile, + bool genLocalReproducer) { crashReproducerFileName = std::string(outputFile); + localReproducer = genLocalReproducer; } /// Add the provided instrumentation to the pass manager. diff --git a/mlir/lib/Pass/PassDetail.h b/mlir/lib/Pass/PassDetail.h index 59d9a7a0576ff..2342a1a7af97d 100644 --- a/mlir/lib/Pass/PassDetail.h +++ b/mlir/lib/Pass/PassDetail.h @@ -27,69 +27,44 @@ class VerifierPass : public PassWrapper> { // OpToOpPassAdaptor //===----------------------------------------------------------------------===// -/// A base class for Op-to-Op adaptor passes. -class OpToOpPassAdaptorBase { -public: - OpToOpPassAdaptorBase(OpPassManager &&mgr); - OpToOpPassAdaptorBase(const OpToOpPassAdaptorBase &rhs) = default; - - /// Merge the current pass adaptor into given 'rhs'. - void mergeInto(OpToOpPassAdaptorBase &rhs); - - /// Returns the pass managers held by this adaptor. - MutableArrayRef getPassManagers() { return mgrs; } - - /// Returns the adaptor pass name. - std::string getName(); - -protected: - // A set of adaptors to run. - SmallVector mgrs; -}; - -/// An adaptor pass used to run operation passes over nested operations -/// synchronously on a single thread. +/// An adaptor pass used to run operation passes over nested operations. class OpToOpPassAdaptor - : public PassWrapper>, - public OpToOpPassAdaptorBase { + : public PassWrapper> { public: OpToOpPassAdaptor(OpPassManager &&mgr); + OpToOpPassAdaptor(const OpToOpPassAdaptor &rhs) = default; /// Run the held pipeline over all operations. void runOnOperation() override; -}; -/// An adaptor pass used to run operation passes over nested operations -/// asynchronously across multiple threads. -class OpToOpPassAdaptorParallel - : public PassWrapper>, - public OpToOpPassAdaptorBase { -public: - OpToOpPassAdaptorParallel(OpPassManager &&mgr); + /// Merge the current pass adaptor into given 'rhs'. + void mergeInto(OpToOpPassAdaptor &rhs); - /// Run the held pipeline over all operations. - void runOnOperation() override; + /// Returns the pass managers held by this adaptor. + MutableArrayRef getPassManagers() { return mgrs; } /// Return the async pass managers held by this parallel adaptor. MutableArrayRef> getParallelPassManagers() { return asyncExecutors; } + /// Returns the adaptor pass name. + std::string getAdaptorName(); + private: - // A set of executors, cloned from the main executor, that run asynchronously - // on different threads. - SmallVector, 8> asyncExecutors; -}; + /// Run this pass adaptor synchronously. + void runOnOperationImpl(); + + /// Run this pass adaptor asynchronously. + void runOnOperationAsyncImpl(); -/// Utility function to convert the given class to the base adaptor it is an -/// adaptor pass, returns nullptr otherwise. -OpToOpPassAdaptorBase *getAdaptorPassBase(Pass *pass); + /// A set of adaptors to run. + SmallVector mgrs; -/// Utility function to return if a pass refers to an adaptor pass. Adaptor -/// passes are those that internally execute a pipeline. -inline bool isAdaptorPass(Pass *pass) { - return isa(pass) || isa(pass); -} + /// A set of executors, cloned from the main executor, that run asynchronously + /// on different threads. This is used when threading is enabled. + SmallVector, 8> asyncExecutors; +}; } // end namespace detail } // end namespace mlir diff --git a/mlir/lib/Pass/PassManagerOptions.cpp b/mlir/lib/Pass/PassManagerOptions.cpp index e0c4df56cf725..b00f992eceb9f 100644 --- a/mlir/lib/Pass/PassManagerOptions.cpp +++ b/mlir/lib/Pass/PassManagerOptions.cpp @@ -23,13 +23,10 @@ struct PassManagerOptions { "pass-pipeline-crash-reproducer", llvm::cl::desc("Generate a .mlir reproducer file at the given output path" " if the pass manager crashes or fails")}; - - //===--------------------------------------------------------------------===// - // Multi-threading - //===--------------------------------------------------------------------===// - llvm::cl::opt disableThreads{ - "disable-pass-threading", - llvm::cl::desc("Disable multithreading in the pass manager"), + llvm::cl::opt localReproducer{ + "pass-pipeline-local-reproducer", + llvm::cl::desc("When generating a crash reproducer, attempt to generated " + "a reproducer with the smallest pipeline."), llvm::cl::init(false)}; //===--------------------------------------------------------------------===// @@ -141,7 +138,8 @@ void PassManagerOptions::addPrinterInstrumentation(PassManager &pm) { /// Add a pass timing instrumentation if enabled by 'pass-timing' flags. void PassManagerOptions::addTimingInstrumentation(PassManager &pm) { if (passTiming) - pm.enableTiming(passTimingDisplayMode); + pm.enableTiming( + std::make_unique(passTimingDisplayMode)); } void mlir::registerPassManagerCLOptions() { @@ -155,11 +153,8 @@ void mlir::applyPassManagerCLOptions(PassManager &pm) { // Generate a reproducer on crash/failure. if (options->reproducerFile.getNumOccurrences()) - pm.enableCrashReproducerGeneration(options->reproducerFile); - - // Disable multi-threading. - if (options->disableThreads) - pm.disableMultithreading(); + pm.enableCrashReproducerGeneration(options->reproducerFile, + options->localReproducer); // Enable statistics dumping. if (options->passStatistics) diff --git a/mlir/lib/Pass/PassStatistics.cpp b/mlir/lib/Pass/PassStatistics.cpp index 7ac54f7cf1afd..6ef0d3bbea6a8 100644 --- a/mlir/lib/Pass/PassStatistics.cpp +++ b/mlir/lib/Pass/PassStatistics.cpp @@ -60,7 +60,7 @@ static void printPassEntry(raw_ostream &os, unsigned indent, StringRef pass, static void printResultsAsList(raw_ostream &os, OpPassManager &pm) { llvm::StringMap> mergedStats; std::function addStats = [&](Pass *pass) { - auto *adaptor = getAdaptorPassBase(pass); + auto *adaptor = dyn_cast(pass); // If this is not an adaptor, add the stats to the list if there are any. if (!adaptor) { @@ -105,13 +105,12 @@ static void printResultsAsList(raw_ostream &os, OpPassManager &pm) { static void printResultsAsPipeline(raw_ostream &os, OpPassManager &pm) { std::function printPass = [&](unsigned indent, Pass *pass) { - // Handle the case of an adaptor pass. - if (auto *adaptor = getAdaptorPassBase(pass)) { + if (auto *adaptor = dyn_cast(pass)) { // If this adaptor has more than one internal pipeline, print an entry for // it. auto mgrs = adaptor->getPassManagers(); if (mgrs.size() > 1) { - printPassEntry(os, indent, adaptor->getName()); + printPassEntry(os, indent, adaptor->getAdaptorName()); indent += 2; } @@ -195,8 +194,8 @@ void OpPassManager::mergeStatisticsInto(OpPassManager &other) { Pass &pass = std::get<0>(passPair), &otherPass = std::get<1>(passPair); // If this is an adaptor, then recursively merge the pass managers. - if (auto *adaptorPass = getAdaptorPassBase(&pass)) { - auto *otherAdaptorPass = getAdaptorPassBase(&otherPass); + if (auto *adaptorPass = dyn_cast(&pass)) { + auto *otherAdaptorPass = cast(&otherPass); for (auto mgrs : llvm::zip(adaptorPass->getPassManagers(), otherAdaptorPass->getPassManagers())) std::get<0>(mgrs).mergeStatisticsInto(std::get<1>(mgrs)); @@ -217,18 +216,16 @@ void OpPassManager::mergeStatisticsInto(OpPassManager &other) { /// consumption(e.g. dumping). static void prepareStatistics(OpPassManager &pm) { for (Pass &pass : pm.getPasses()) { - OpToOpPassAdaptorBase *adaptor = getAdaptorPassBase(&pass); + OpToOpPassAdaptor *adaptor = dyn_cast(&pass); if (!adaptor) continue; MutableArrayRef nestedPms = adaptor->getPassManagers(); - // If this is a parallel adaptor, merge the statistics from the async - // pass managers into the main nested pass managers. - if (auto *parallelAdaptor = dyn_cast(&pass)) { - for (auto &asyncPM : parallelAdaptor->getParallelPassManagers()) { - for (unsigned i = 0, e = asyncPM.size(); i != e; ++i) - asyncPM[i].mergeStatisticsInto(nestedPms[i]); - } + // Merge the statistics from the async pass managers into the main nested + // pass managers. + for (auto &asyncPM : adaptor->getParallelPassManagers()) { + for (unsigned i = 0, e = asyncPM.size(); i != e; ++i) + asyncPM[i].mergeStatisticsInto(nestedPms[i]); } // Prepare the statistics of each of the nested passes. diff --git a/mlir/lib/Pass/PassTiming.cpp b/mlir/lib/Pass/PassTiming.cpp index 663cbdad7c39d..71bf822a864bc 100644 --- a/mlir/lib/Pass/PassTiming.cpp +++ b/mlir/lib/Pass/PassTiming.cpp @@ -160,7 +160,8 @@ struct Timer { }; struct PassTiming : public PassInstrumentation { - PassTiming(PassDisplayMode displayMode) : displayMode(displayMode) {} + PassTiming(std::unique_ptr config) + : config(std::move(config)) {} ~PassTiming() override { print(); } /// Setup the instrumentation hooks. @@ -231,8 +232,8 @@ struct PassTiming : public PassInstrumentation { /// A stack of the currently active pass timers per thread. DenseMap> activeThreadTimers; - /// The display mode to use when printing the timing results. - PassDisplayMode displayMode; + /// The configuration object to use when printing the timing results. + std::unique_ptr config; /// A mapping of pipeline timers that need to be merged into the parent /// collection. The timers are mapped to the parent info to merge into. @@ -276,17 +277,17 @@ void PassTiming::runAfterPipeline(const OperationName &name, /// Start a new timer for the given pass. void PassTiming::startPassTimer(Pass *pass) { - auto kind = isAdaptorPass(pass) ? TimerKind::PipelineCollection - : TimerKind::PassOrAnalysis; + auto kind = isa(pass) ? TimerKind::PipelineCollection + : TimerKind::PassOrAnalysis; Timer *timer = getTimer(pass, kind, [pass]() -> std::string { - if (auto *adaptor = getAdaptorPassBase(pass)) - return adaptor->getName(); + if (auto *adaptor = dyn_cast(pass)) + return adaptor->getAdaptorName(); return std::string(pass->getName()); }); // We don't actually want to time the adaptor passes, they gather their total // from their held passes. - if (!isAdaptorPass(pass)) + if (!isa(pass)) timer->start(); } @@ -301,9 +302,9 @@ void PassTiming::startAnalysisTimer(StringRef name, TypeID id) { void PassTiming::runAfterPass(Pass *pass, Operation *) { Timer *timer = popLastActiveTimer(); - // If this is an OpToOpPassAdaptorParallel, then we need to merge in the - // timing data for the pipelines running on other threads. - if (isa(pass)) { + // If this is a pass adaptor, then we need to merge in the timing data for the + // pipelines running on other threads. + if (isa(pass)) { auto toMerge = pipelinesToMerge.find({llvm::get_threadid(), pass}); if (toMerge != pipelinesToMerge.end()) { for (auto &it : toMerge->second) @@ -313,10 +314,7 @@ void PassTiming::runAfterPass(Pass *pass, Operation *) { return; } - // Adaptor passes aren't timed directly, so we don't need to stop their - // timers. - if (!isAdaptorPass(pass)) - timer->stop(); + timer->stop(); } /// Stop a timer. @@ -353,28 +351,37 @@ void PassTiming::print() { return; assert(rootTimers.size() == 1 && "expected one remaining root timer"); - auto &rootTimer = rootTimers.begin()->second; - auto os = llvm::CreateInfoOutputFile(); - - // Print the timer header. - TimeRecord totalTime = rootTimer->getTotalTime(); - printTimerHeader(*os, totalTime); - - // Defer to a specialized printer for each display mode. - switch (displayMode) { - case PassDisplayMode::List: - printResultsAsList(*os, rootTimer.get(), totalTime); - break; - case PassDisplayMode::Pipeline: - printResultsAsPipeline(*os, rootTimer.get(), totalTime); - break; - } - printTimeEntry(*os, 0, "Total", totalTime, totalTime); - os->flush(); - // Reset root timers. - rootTimers.clear(); - activeThreadTimers.clear(); + auto printCallback = [&](raw_ostream &os) { + auto &rootTimer = rootTimers.begin()->second; + // Print the timer header. + TimeRecord totalTime = rootTimer->getTotalTime(); + printTimerHeader(os, totalTime); + // Defer to a specialized printer for each display mode. + switch (config->getDisplayMode()) { + case PassDisplayMode::List: + printResultsAsList(os, rootTimer.get(), totalTime); + break; + case PassDisplayMode::Pipeline: + printResultsAsPipeline(os, rootTimer.get(), totalTime); + break; + } + printTimeEntry(os, 0, "Total", totalTime, totalTime); + os.flush(); + + // Reset root timers. + rootTimers.clear(); + activeThreadTimers.clear(); + }; + + config->printTiming(printCallback); +} + +// The default implementation for printTiming uses +// `llvm::CreateInfoOutputFile()` as stream, it can be overridden by clients +// to customize the output. +void PassManager::PassTimingConfig::printTiming(PrintCallbackFn printCallback) { + printCallback(*llvm::CreateInfoOutputFile()); } /// Print the timing result in list mode. @@ -449,16 +456,21 @@ void PassTiming::printResultsAsPipeline(raw_ostream &os, Timer *root, printTimer(0, topLevelTimer.second.get()); } +// Out-of-line as key function. +PassManager::PassTimingConfig::~PassTimingConfig() {} + //===----------------------------------------------------------------------===// // PassManager //===----------------------------------------------------------------------===// /// Add an instrumentation to time the execution of passes and the computation /// of analyses. -void PassManager::enableTiming(PassDisplayMode displayMode) { +void PassManager::enableTiming(std::unique_ptr config) { // Check if pass timing is already enabled. if (passTiming) return; - addInstrumentation(std::make_unique(displayMode)); + if (!config) + config = std::make_unique(); + addInstrumentation(std::make_unique(std::move(config))); passTiming = true; } diff --git a/mlir/lib/Support/CMakeLists.txt b/mlir/lib/Support/CMakeLists.txt index a21a8cc29e0f0..dcb28518b1a27 100644 --- a/mlir/lib/Support/CMakeLists.txt +++ b/mlir/lib/Support/CMakeLists.txt @@ -13,10 +13,11 @@ add_mlir_library(MLIRSupport ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Support - ) -target_link_libraries(MLIRSupport - PUBLIC - LLVMSupport + + LINK_COMPONENTS + Support + + LINK_LIBS PUBLIC ${LLVM_PTHREAD_LIB}) add_mlir_library(MLIROptLib @@ -24,20 +25,33 @@ add_mlir_library(MLIROptLib ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Support - ) -target_link_libraries(MLIROptLib - PUBLIC + + LINK_COMPONENTS + Support + + LINK_LIBS PUBLIC MLIRPass MLIRParser - LLVMSupport MLIRSupport ) -add_llvm_library(MLIRJitRunner +# Exclude from libMLIR.so because the JIT infrastructure +# is a big dependency which most don't need. +add_mlir_library(MLIRJitRunner JitRunner.cpp -) -target_link_libraries(MLIRJitRunner - PUBLIC + + DEPENDS + intrinsics_gen + + EXCLUDE_FROM_LIBMLIR + + LINK_COMPONENTS + Core + OrcJIT + JITLink + Support + + LINK_LIBS PUBLIC MLIRExecutionEngine MLIRIR MLIRParser @@ -46,6 +60,4 @@ target_link_libraries(MLIRJitRunner MLIRTransforms MLIRStandardToLLVM MLIRSupport - LLVMCore - LLVMSupport ) diff --git a/mlir/lib/Support/MlirOptMain.cpp b/mlir/lib/Support/MlirOptMain.cpp index 5c21e19c4bd20..25e197083b620 100644 --- a/mlir/lib/Support/MlirOptMain.cpp +++ b/mlir/lib/Support/MlirOptMain.cpp @@ -40,7 +40,14 @@ static LogicalResult performActions(raw_ostream &os, bool verifyDiagnostics, bool verifyPasses, SourceMgr &sourceMgr, MLIRContext *context, const PassPipelineCLParser &passPipeline) { + // Disable multi-threading when parsing the input file. This removes the + // unnecessary/costly context synchronization when parsing. + bool wasThreadingEnabled = context->isMultithreadingEnabled(); + context->disableMultithreading(); + + // Parse the input file and reset the context threading state. OwningModuleRef module(parseSourceFile(sourceMgr, context)); + context->enableMultithreading(wasThreadingEnabled); if (!module) return failure(); diff --git a/mlir/lib/Support/StorageUniquer.cpp b/mlir/lib/Support/StorageUniquer.cpp index d50c599a77764..40304a544c4fb 100644 --- a/mlir/lib/Support/StorageUniquer.cpp +++ b/mlir/lib/Support/StorageUniquer.cpp @@ -46,6 +46,8 @@ struct StorageUniquerImpl { function_ref isEqual, function_ref ctorFn) { LookupKey lookupKey{kind, hashValue, isEqual}; + if (!threadingIsEnabled) + return getOrCreateUnsafe(kind, hashValue, lookupKey, ctorFn); // Check for an existing instance in read-only mode. { @@ -57,9 +59,12 @@ struct StorageUniquerImpl { // Acquire a writer-lock so that we can safely create the new type instance. llvm::sys::SmartScopedWriter typeLock(mutex); - - // Check for an existing instance again here, because another writer thread - // may have already created one. + return getOrCreateUnsafe(kind, hashValue, lookupKey, ctorFn); + } + /// Get or create an instance of a complex derived type in an unsafe fashion. + BaseStorage * + getOrCreateUnsafe(unsigned kind, unsigned hashValue, LookupKey &lookupKey, + function_ref ctorFn) { auto existing = storageTypes.insert_as({}, lookupKey); if (!existing.second) return existing.first->storage; @@ -75,6 +80,9 @@ struct StorageUniquerImpl { BaseStorage * getOrCreate(unsigned kind, function_ref ctorFn) { + if (!threadingIsEnabled) + return getOrCreateUnsafe(kind, ctorFn); + // Check for an existing instance in read-only mode. { llvm::sys::SmartScopedReader typeLock(mutex); @@ -85,9 +93,12 @@ struct StorageUniquerImpl { // Acquire a writer-lock so that we can safely create the new type instance. llvm::sys::SmartScopedWriter typeLock(mutex); - - // Check for an existing instance again here, because another writer thread - // may have already created one. + return getOrCreateUnsafe(kind, ctorFn); + } + /// Get or create an instance of a simple derived type in an unsafe fashion. + BaseStorage * + getOrCreateUnsafe(unsigned kind, + function_ref ctorFn) { auto &result = simpleTypes[kind]; if (result) return result; @@ -152,18 +163,21 @@ struct StorageUniquerImpl { } }; - // Unique types with specific hashing or storage constraints. + /// Unique types with specific hashing or storage constraints. using StorageTypeSet = DenseSet; StorageTypeSet storageTypes; - // Unique types with just the kind. + /// Unique types with just the kind. DenseMap simpleTypes; - // Allocator to use when constructing derived type instances. + /// Allocator to use when constructing derived type instances. StorageUniquer::StorageAllocator allocator; - // A mutex to keep type uniquing thread-safe. + /// A mutex to keep type uniquing thread-safe. llvm::sys::SmartRWMutex mutex; + + /// Flag specifying if multi-threading is enabled within the uniquer. + bool threadingIsEnabled = true; }; } // end namespace detail } // namespace mlir @@ -171,6 +185,11 @@ struct StorageUniquerImpl { StorageUniquer::StorageUniquer() : impl(new StorageUniquerImpl()) {} StorageUniquer::~StorageUniquer() {} +/// Set the flag specifying if multi-threading is disabled within the uniquer. +void StorageUniquer::disableMultithreading(bool disable) { + impl->threadingIsEnabled = !disable; +} + /// Implementation for getting/creating an instance of a derived type with /// complex storage. auto StorageUniquer::getImpl( diff --git a/mlir/lib/TableGen/CMakeLists.txt b/mlir/lib/TableGen/CMakeLists.txt index a395fdb14a7af..780c98a5660d3 100644 --- a/mlir/lib/TableGen/CMakeLists.txt +++ b/mlir/lib/TableGen/CMakeLists.txt @@ -1,4 +1,8 @@ -add_llvm_library(LLVMMLIRTableGen +# This library is unusual, since mlir-tblgen depends on it. +# For non-obvious reasons, linking mlir-tblgen fails with +# LLVM_BUILD_LLVM_DYLIB and LLVM_LINK_LLVM_DYLIB unless +# DISABLE_LLVM_LINK_LLVM_DYLIB is set. +llvm_add_library(LLVMMLIRTableGen STATIC Argument.cpp Attribute.cpp Constraint.cpp @@ -16,10 +20,14 @@ add_llvm_library(LLVMMLIRTableGen Successor.cpp Type.cpp + DISABLE_LLVM_LINK_LLVM_DYLIB + ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/TableGen + + LINK_COMPONENTS + TableGen + Demangle ) -target_link_libraries(LLVMMLIRTableGen - PUBLIC - LLVMSupport - LLVMTableGen) + +mlir_check_all_link_libraries(LLVMMLIRTableGen) diff --git a/mlir/lib/TableGen/OpInterfaces.cpp b/mlir/lib/TableGen/OpInterfaces.cpp index c565547b2e09f..be3782c788099 100644 --- a/mlir/lib/TableGen/OpInterfaces.cpp +++ b/mlir/lib/TableGen/OpInterfaces.cpp @@ -92,6 +92,12 @@ llvm::Optional OpInterface::getExtraClassDeclaration() const { return value.empty() ? llvm::Optional() : value; } +// Return the traits extra class declaration code. +llvm::Optional OpInterface::getExtraTraitClassDeclaration() const { + auto value = def->getValueAsString("extraTraitClassDeclaration"); + return value.empty() ? llvm::Optional() : value; +} + // Return the body for this method if it has one. llvm::Optional OpInterface::getVerify() const { auto value = def->getValueAsString("verify"); diff --git a/mlir/lib/TableGen/OpTrait.cpp b/mlir/lib/TableGen/OpTrait.cpp index f8257bb3315dd..7a1e9cef0559b 100644 --- a/mlir/lib/TableGen/OpTrait.cpp +++ b/mlir/lib/TableGen/OpTrait.cpp @@ -63,3 +63,7 @@ llvm::StringRef InterfaceOpTrait::getTrait() const { bool InterfaceOpTrait::shouldDeclareMethods() const { return def->isSubClassOf("DeclareOpInterfaceMethods"); } + +std::vector InterfaceOpTrait::getAlwaysDeclaredMethods() const { + return def->getValueAsListOfStrings("alwaysOverriddenMethods"); +} diff --git a/mlir/lib/Target/CMakeLists.txt b/mlir/lib/Target/CMakeLists.txt index ab4008a717a6e..4a0af66a04b16 100644 --- a/mlir/lib/Target/CMakeLists.txt +++ b/mlir/lib/Target/CMakeLists.txt @@ -4,17 +4,18 @@ add_mlir_translation_library(MLIRTargetLLVMIRModuleTranslation ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR + DEPENDS intrinsics_gen - ) -target_link_libraries(MLIRTargetLLVMIRModuleTranslation - PUBLIC + + LINK_COMPONENTS + Core + FrontendOpenMP + TransformUtils + + LINK_LIBS PUBLIC MLIRLLVMIR MLIRLLVMIRTransforms - LLVMCore - LLVMIRReader - LLVMSupport - LLVMTransformUtils MLIRTranslation ) @@ -23,11 +24,14 @@ add_mlir_translation_library(MLIRTargetAVX512 ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR + DEPENDS MLIRLLVMAVX512ConversionsIncGen - ) -target_link_libraries(MLIRTargetAVX512 - PUBLIC + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC MLIRIR MLIRLLVMAVX512 MLIRLLVMIR @@ -40,9 +44,12 @@ add_mlir_translation_library(MLIRTargetLLVMIR ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR - ) -target_link_libraries(MLIRTargetLLVMIR - PUBLIC + + LINK_COMPONENTS + Core + IRReader + + LINK_LIBS PUBLIC MLIRTargetLLVMIRModuleTranslation ) @@ -51,11 +58,14 @@ add_mlir_translation_library(MLIRTargetNVVMIR ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR + DEPENDS intrinsics_gen - ) -target_link_libraries(MLIRTargetNVVMIR - PUBLIC + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC MLIRGPU MLIRIR MLIRLLVMIR @@ -68,11 +78,14 @@ add_mlir_translation_library(MLIRTargetROCDLIR ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR + DEPENDS intrinsics_gen - ) -target_link_libraries(MLIRTargetROCDLIR - PUBLIC + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC MLIRGPU MLIRIR MLIRLLVMIR diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp index ffc47fca24d90..5856c60c64d9b 100644 --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -720,7 +720,7 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) { op = b.create(loc, tys, b.getSymbolRefAttr(callee->getName()), ops); } else { - Value calledValue = processValue(ci->getCalledValue()); + Value calledValue = processValue(ci->getCalledOperand()); if (!calledValue) return failure(); ops.insert(ops.begin(), calledValue); @@ -766,7 +766,7 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) { ops, blocks[ii->getNormalDest()], normalArgs, blocks[ii->getUnwindDest()], unwindArgs); } else { - ops.insert(ops.begin(), processValue(ii->getCalledValue())); + ops.insert(ops.begin(), processValue(ii->getCalledOperand())); op = b.create(loc, tys, ops, blocks[ii->getNormalDest()], normalArgs, blocks[ii->getUnwindDest()], unwindArgs); diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp index 3b0709a838afb..74dd0d15f441e 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp @@ -110,10 +110,11 @@ DebugTranslation::translateLoc(Location loc, llvm::DILocalScope *scope, return nullptr; // Check for a cached instance. - const auto *&llvmLoc = locationToLoc[std::make_pair(loc, scope)]; - if (llvmLoc) - return llvmLoc; + auto existingIt = locationToLoc.find(std::make_pair(loc, scope)); + if (existingIt != locationToLoc.end()) + return existingIt->second; + const llvm::DILocation *llvmLoc = nullptr; switch (loc->getKind()) { case StandardAttributes::CallSiteLocation: { auto callLoc = loc.dyn_cast(); @@ -154,6 +155,7 @@ DebugTranslation::translateLoc(Location loc, llvm::DILocalScope *scope, default: llvm_unreachable("unknown location kind"); } + locationToLoc.try_emplace(std::make_pair(loc, scope), llvmLoc); return llvmLoc; } diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp new file mode 100644 index 0000000000000..24c228ec06571 --- /dev/null +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -0,0 +1,470 @@ +//===- BufferPlacement.cpp - the impl for buffer placement ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements logic for computing correct alloc and dealloc positions. +// The main class is the BufferPlacementPass class that implements the +// underlying algorithm. In order to put allocations and deallocations at safe +// positions, it is significantly important to put them into the correct blocks. +// However, the liveness analysis does not pay attention to aliases, which can +// occur due to branches (and their associated block arguments) in general. For +// this purpose, BufferPlacement firstly finds all possible aliases for a single +// value (using the BufferPlacementAliasAnalysis class). Consider the following +// example: +// +// ^bb0(%arg0): +// cond_br %cond, ^bb1, ^bb2 +// ^bb1: +// br ^exit(%arg0) +// ^bb2: +// %new_value = ... +// br ^exit(%new_value) +// ^exit(%arg1): +// return %arg1; +// +// Using liveness information on its own would cause us to place the allocs and +// deallocs in the wrong block. This is due to the fact that %new_value will not +// be liveOut of its block. Instead, we have to place the alloc for %new_value +// in bb0 and its associated dealloc in exit. Using the class +// BufferPlacementAliasAnalysis, we will find out that %new_value has a +// potential alias %arg1. In order to find the dealloc position we have to find +// all potential aliases, iterate over their uses and find the common +// post-dominator block. In this block we can safely be sure that %new_value +// will die and can use liveness information to determine the exact operation +// after which we have to insert the dealloc. Finding the alloc position is +// highly similar and non- obvious. Again, we have to consider all potential +// aliases and find the common dominator block to place the alloc. +// +// TODO: +// The current implementation does not support loops and the resulting code will +// be invalid with respect to program semantics. The only thing that is +// currently missing is a high-level loop analysis that allows us to move allocs +// and deallocs outside of the loop blocks. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Transforms/BufferPlacement.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/Passes.h" + +using namespace mlir; + +namespace { + +//===----------------------------------------------------------------------===// +// BufferPlacementAliasAnalysis +//===----------------------------------------------------------------------===// + +/// A straight-forward alias analysis which ensures that all aliases of all +/// values will be determined. This is a requirement for the BufferPlacement +/// class since you need to determine safe positions to place alloc and +/// deallocs. +class BufferPlacementAliasAnalysis { +public: + using ValueSetT = SmallPtrSet; + +public: + /// Constructs a new alias analysis using the op provided. + BufferPlacementAliasAnalysis(Operation *op) { build(op->getRegions()); } + + /// Finds all immediate and indirect aliases this value could potentially + /// have. Note that the resulting set will also contain the value provided as + /// it is an alias of itself. + ValueSetT resolve(Value value) const { + ValueSetT result; + resolveRecursive(value, result); + return result; + } + +private: + /// Recursively determines alias information for the given value. It stores + /// all newly found potential aliases in the given result set. + void resolveRecursive(Value value, ValueSetT &result) const { + if (!result.insert(value).second) + return; + auto it = aliases.find(value); + if (it == aliases.end()) + return; + for (Value alias : it->second) + resolveRecursive(alias, result); + } + + /// This function constructs a mapping from values to its immediate aliases. + /// It iterates over all blocks, gets their predecessors, determines the + /// values that will be passed to the corresponding block arguments and + /// inserts them into the underlying map. + void build(MutableArrayRef regions) { + for (Region ®ion : regions) { + for (Block &block : region) { + // Iterate over all predecessor and get the mapped values to their + // corresponding block arguments values. + for (auto it = block.pred_begin(), e = block.pred_end(); it != e; + ++it) { + unsigned successorIndex = it.getSuccessorIndex(); + // Get the terminator and the values that will be passed to our block. + auto branchInterface = + dyn_cast((*it)->getTerminator()); + if (!branchInterface) + continue; + // Query the branch op interace to get the successor operands. + auto successorOperands = + branchInterface.getSuccessorOperands(successorIndex); + if (successorOperands.hasValue()) { + // Build the actual mapping of values to their immediate aliases. + for (auto argPair : llvm::zip(block.getArguments(), + successorOperands.getValue())) { + aliases[std::get<1>(argPair)].insert(std::get<0>(argPair)); + } + } + } + } + } + } + + /// Maps values to all immediate aliases this value can have. + llvm::DenseMap aliases; +}; + +//===----------------------------------------------------------------------===// +// BufferPlacementPositions +//===----------------------------------------------------------------------===// + +/// Stores correct alloc and dealloc positions to place dialect-specific alloc +/// and dealloc operations. +struct BufferPlacementPositions { +public: + BufferPlacementPositions() + : allocPosition(nullptr), deallocPosition(nullptr) {} + + /// Creates a new positions tuple including alloc and dealloc positions. + BufferPlacementPositions(Operation *allocPosition, Operation *deallocPosition) + : allocPosition(allocPosition), deallocPosition(deallocPosition) {} + + /// Returns the alloc position before which the alloc operation has to be + /// inserted. + Operation *getAllocPosition() const { return allocPosition; } + + /// Returns the dealloc position after which the dealloc operation has to be + /// inserted. + Operation *getDeallocPosition() const { return deallocPosition; } + +private: + Operation *allocPosition; + Operation *deallocPosition; +}; + +//===----------------------------------------------------------------------===// +// BufferPlacementAnalysis +//===----------------------------------------------------------------------===// + +// The main buffer placement analysis used to place allocs and deallocs. +class BufferPlacementAnalysis { +public: + using DeallocSetT = SmallPtrSet; + +public: + BufferPlacementAnalysis(Operation *op) + : operation(op), liveness(op), dominators(op), postDominators(op), + aliases(op) {} + + /// Computes the actual positions to place allocs and deallocs for the given + /// value. + BufferPlacementPositions + computeAllocAndDeallocPositions(OpResult result) const { + if (result.use_empty()) + return BufferPlacementPositions(result.getOwner(), result.getOwner()); + // Get all possible aliases. + auto possibleValues = aliases.resolve(result); + return BufferPlacementPositions(getAllocPosition(result, possibleValues), + getDeallocPosition(result, possibleValues)); + } + + /// Finds all associated dealloc nodes for the alloc nodes using alias + /// information. + DeallocSetT findAssociatedDeallocs(OpResult allocResult) const { + DeallocSetT result; + auto possibleValues = aliases.resolve(allocResult); + for (Value alias : possibleValues) + for (Operation *op : alias.getUsers()) { + // Check for an existing memory effect interface. + auto effectInstance = dyn_cast(op); + if (!effectInstance) + continue; + // Check whether the associated value will be freed using the current + // operation. + SmallVector effects; + effectInstance.getEffectsOnValue(alias, effects); + if (llvm::any_of(effects, [=](MemoryEffects::EffectInstance &it) { + return isa(it.getEffect()); + })) + result.insert(op); + } + return result; + } + + /// Dumps the buffer placement information to the given stream. + void print(raw_ostream &os) const { + os << "// ---- Buffer Placement -----\n"; + + for (Region ®ion : operation->getRegions()) + for (Block &block : region) + for (Operation &operation : block) + for (OpResult result : operation.getResults()) { + BufferPlacementPositions positions = + computeAllocAndDeallocPositions(result); + os << "Positions for "; + result.print(os); + os << "\n Alloc: "; + positions.getAllocPosition()->print(os); + os << "\n Dealloc: "; + positions.getDeallocPosition()->print(os); + os << "\n"; + } + } + +private: + /// Finds a correct placement block to store alloc/dealloc node according to + /// the algorithm described at the top of the file. It supports dominator and + /// post-dominator analyses via template arguments. + template + Block * + findPlacementBlock(OpResult result, + const BufferPlacementAliasAnalysis::ValueSetT &aliases, + const DominatorT &doms) const { + // Start with the current block the value is defined in. + Block *dom = result.getOwner()->getBlock(); + // Iterate over all aliases and their uses to find a safe placement block + // according to the given dominator information. + for (Value alias : aliases) + for (Operation *user : alias.getUsers()) { + // Move upwards in the dominator tree to find an appropriate + // dominator block that takes the current use into account. + dom = doms.findNearestCommonDominator(dom, user->getBlock()); + } + return dom; + } + + /// Finds a correct alloc position according to the algorithm described at + /// the top of the file. + Operation *getAllocPosition( + OpResult result, + const BufferPlacementAliasAnalysis::ValueSetT &aliases) const { + // Determine the actual block to place the alloc and get liveness + // information. + Block *placementBlock = findPlacementBlock(result, aliases, dominators); + const LivenessBlockInfo *livenessInfo = + liveness.getLiveness(placementBlock); + + // We have to ensure that the alloc will be before the first use of all + // aliases of the given value. We first assume that there are no uses in the + // placementBlock and that we can safely place the alloc before the + // terminator at the end of the block. + Operation *startOperation = placementBlock->getTerminator(); + // Iterate over all aliases and ensure that the startOperation will point to + // the first operation of all potential aliases in the placementBlock. + for (Value alias : aliases) { + Operation *aliasStartOperation = livenessInfo->getStartOperation(alias); + // Check whether the aliasStartOperation lies in the desired block and + // whether it is before the current startOperation. If yes, this will be + // the new startOperation. + if (aliasStartOperation->getBlock() == placementBlock && + aliasStartOperation->isBeforeInBlock(startOperation)) + startOperation = aliasStartOperation; + } + // startOperation is the first operation before which we can safely store + // the alloc taking all potential aliases into account. + return startOperation; + } + + /// Finds a correct dealloc position according to the algorithm described at + /// the top of the file. + Operation *getDeallocPosition( + OpResult result, + const BufferPlacementAliasAnalysis::ValueSetT &aliases) const { + // Determine the actual block to place the dealloc and get liveness + // information. + Block *placementBlock = findPlacementBlock(result, aliases, postDominators); + const LivenessBlockInfo *livenessInfo = + liveness.getLiveness(placementBlock); + + // We have to ensure that the dealloc will be after the last use of all + // aliases of the given value. We first assume that there are no uses in the + // placementBlock and that we can safely place the dealloc at the beginning. + Operation *endOperation = &placementBlock->front(); + // Iterate over all aliases and ensure that the endOperation will point to + // the last operation of all potential aliases in the placementBlock. + for (Value alias : aliases) { + Operation *aliasEndOperation = + livenessInfo->getEndOperation(alias, endOperation); + // Check whether the aliasEndOperation lies in the desired block and + // whether it is behind the current endOperation. If yes, this will be the + // new endOperation. + if (aliasEndOperation->getBlock() == placementBlock && + endOperation->isBeforeInBlock(aliasEndOperation)) + endOperation = aliasEndOperation; + } + // endOperation is the last operation behind which we can safely store the + // dealloc taking all potential aliases into account. + return endOperation; + } + + /// The operation this transformation was constructed from. + Operation *operation; + + /// The underlying liveness analysis to compute fine grained information about + /// alloc and dealloc positions. + Liveness liveness; + + /// The dominator analysis to place allocs in the appropriate blocks. + DominanceInfo dominators; + + /// The post dominator analysis to place deallocs in the appropriate blocks. + PostDominanceInfo postDominators; + + /// The internal alias analysis to ensure that allocs and deallocs take all + /// their potential aliases into account. + BufferPlacementAliasAnalysis aliases; +}; + +//===----------------------------------------------------------------------===// +// BufferPlacementPass +//===----------------------------------------------------------------------===// + +/// The actual buffer placement pass that moves alloc and dealloc nodes into +/// the right positions. It uses the algorithm described at the top of the file. +struct BufferPlacementPass + : mlir::PassWrapper { + void runOnFunction() override { + // Get required analysis information first. + auto &analysis = getAnalysis(); + + // Compute an initial placement of all nodes. + llvm::SmallVector, 16> + placements; + getFunction().walk([&](MemoryEffectOpInterface op) { + // Try to find a single allocation result. + SmallVector effects; + op.getEffects(effects); + + SmallVector allocateResultEffects; + llvm::copy_if(effects, std::back_inserter(allocateResultEffects), + [=](MemoryEffects::EffectInstance &it) { + Value value = it.getValue(); + return isa(it.getEffect()) && + value && value.isa(); + }); + // If there is one result only, we will be able to move the allocation and + // (possibly existing) deallocation ops. + if (allocateResultEffects.size() == 1) { + // Insert allocation result. + auto allocResult = allocateResultEffects[0].getValue().cast(); + placements.emplace_back( + allocResult, analysis.computeAllocAndDeallocPositions(allocResult)); + } + }); + + // Move alloc (and dealloc - if any) nodes into the right places and insert + // dealloc nodes if necessary. + for (auto &entry : placements) { + // Find already associated dealloc nodes. + OpResult alloc = entry.first; + auto deallocs = analysis.findAssociatedDeallocs(alloc); + if (deallocs.size() > 1) { + emitError(alloc.getLoc(), + "not supported number of associated dealloc operations"); + return; + } + + // Move alloc node to the right place. + BufferPlacementPositions &positions = entry.second; + Operation *allocOperation = alloc.getOwner(); + allocOperation->moveBefore(positions.getAllocPosition()); + + // If there is an existing dealloc, move it to the right place. + Operation *nextOp = positions.getDeallocPosition()->getNextNode(); + assert(nextOp && "Invalid Dealloc operation position"); + if (deallocs.size()) { + (*deallocs.begin())->moveBefore(nextOp); + } else { + // If there is no dealloc node, insert one in the right place. + OpBuilder builder(nextOp); + builder.create(allocOperation->getLoc(), alloc); + } + } + }; +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// BufferAssignmentPlacer +//===----------------------------------------------------------------------===// + +/// Creates a new assignment placer. +BufferAssignmentPlacer::BufferAssignmentPlacer(Operation *op) : operation(op) {} + +/// Computes the actual position to place allocs for the given value. +OpBuilder::InsertPoint +BufferAssignmentPlacer::computeAllocPosition(OpResult result) { + Operation *owner = result.getOwner(); + return OpBuilder::InsertPoint(owner->getBlock(), Block::iterator(owner)); +} + +//===----------------------------------------------------------------------===// +// FunctionAndBlockSignatureConverter +//===----------------------------------------------------------------------===// + +// Performs the actual signature rewriting step. +LogicalResult FunctionAndBlockSignatureConverter::matchAndRewrite( + FuncOp funcOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + if (!converter) { + funcOp.emitError("The type converter has not been defined for " + "FunctionAndBlockSignatureConverter"); + return failure(); + } + // Converting shaped type arguments to memref type. + auto funcType = funcOp.getType(); + TypeConverter::SignatureConversion conversion(funcType.getNumInputs()); + for (auto argType : llvm::enumerate(funcType.getInputs())) + conversion.addInputs(argType.index(), + converter->convertType(argType.value())); + // Adding function results to the arguments of the converted function as + // memref type. The converted function will be a void function. + for (Type resType : funcType.getResults()) + conversion.addInputs(converter->convertType((resType))); + rewriter.updateRootInPlace(funcOp, [&] { + funcOp.setType( + rewriter.getFunctionType(conversion.getConvertedTypes(), llvm::None)); + rewriter.applySignatureConversion(&funcOp.getBody(), conversion); + }); + return success(); +} + +//===----------------------------------------------------------------------===// +// BufferAssignmentTypeConverter +//===----------------------------------------------------------------------===// + +/// Registers conversions into BufferAssignmentTypeConverter +BufferAssignmentTypeConverter::BufferAssignmentTypeConverter() { + // Keep all types unchanged. + addConversion([](Type type) { return type; }); + // A type conversion that converts ranked-tensor type to memref type. + addConversion([](RankedTensorType type) { + return (Type)MemRefType::get(type.getShape(), type.getElementType()); + }); +} + +//===----------------------------------------------------------------------===// +// BufferPlacementPass construction +//===----------------------------------------------------------------------===// + +std::unique_ptr mlir::createBufferPlacementPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt index f77690b5563f5..97e71a578070b 100644 --- a/mlir/lib/Transforms/CMakeLists.txt +++ b/mlir/lib/Transforms/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(Utils) add_mlir_library(MLIRTransforms + BufferPlacement.cpp Canonicalizer.cpp CSE.cpp DialectConversion.cpp @@ -25,10 +26,8 @@ add_mlir_library(MLIRTransforms DEPENDS MLIRStandardOpsIncGen MLIRTransformsPassIncGen - ) -target_link_libraries(MLIRTransforms - PUBLIC + LINK_LIBS PUBLIC MLIRAffineOps MLIRAnalysis MLIRLoopLikeInterface diff --git a/mlir/lib/Transforms/CSE.cpp b/mlir/lib/Transforms/CSE.cpp index 8309099e92a70..f7cf7f1c8e95b 100644 --- a/mlir/lib/Transforms/CSE.cpp +++ b/mlir/lib/Transforms/CSE.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "PassDetail.h" -#include "mlir/Analysis/Dominance.h" +#include "mlir/IR/Dominance.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/Passes.h" #include "mlir/Transforms/Utils.h" @@ -26,18 +26,9 @@ using namespace mlir; namespace { -// TODO(riverriddle) Handle commutative operations. struct SimpleOperationInfo : public llvm::DenseMapInfo { static unsigned getHashValue(const Operation *opC) { - auto *op = const_cast(opC); - // Hash the operations based upon their: - // - Operation Name - // - Attributes - // - Result Types - // - Operands - return llvm::hash_combine( - op->getName(), op->getAttrList().getDictionary(), op->getResultTypes(), - llvm::hash_combine_range(op->operand_begin(), op->operand_end())); + return OperationEquivalence::computeHash(const_cast(opC)); } static bool isEqual(const Operation *lhsC, const Operation *rhsC) { auto *lhs = const_cast(lhsC); @@ -47,24 +38,8 @@ struct SimpleOperationInfo : public llvm::DenseMapInfo { if (lhs == getTombstoneKey() || lhs == getEmptyKey() || rhs == getTombstoneKey() || rhs == getEmptyKey()) return false; - - // Compare the operation name. - if (lhs->getName() != rhs->getName()) - return false; - // Check operand and result type counts. - if (lhs->getNumOperands() != rhs->getNumOperands() || - lhs->getNumResults() != rhs->getNumResults()) - return false; - // Compare attributes. - if (lhs->getAttrList() != rhs->getAttrList()) - return false; - // Compare operands. - if (!std::equal(lhs->operand_begin(), lhs->operand_end(), - rhs->operand_begin())) - return false; - // Compare result types. - return std::equal(lhs->result_type_begin(), lhs->result_type_end(), - rhs->result_type_begin()); + return OperationEquivalence::isEquivalentTo(const_cast(lhsC), + const_cast(rhsC)); } }; } // end anonymous namespace diff --git a/mlir/lib/Transforms/DialectConversion.cpp b/mlir/lib/Transforms/DialectConversion.cpp index 63db2c80c800f..00afd1c8ad92c 100644 --- a/mlir/lib/Transforms/DialectConversion.cpp +++ b/mlir/lib/Transforms/DialectConversion.cpp @@ -468,7 +468,7 @@ class OperationTransactionState { public: OperationTransactionState() = default; OperationTransactionState(Operation *op) - : op(op), loc(op->getLoc()), attrs(op->getAttrList()), + : op(op), loc(op->getLoc()), attrs(op->getMutableAttrDict()), operands(op->operand_begin(), op->operand_end()), successors(op->successor_begin(), op->successor_end()) {} @@ -488,7 +488,7 @@ class OperationTransactionState { private: Operation *op; LocationAttr loc; - NamedAttributeList attrs; + MutableDictionaryAttr attrs; SmallVector operands; SmallVector successors; }; @@ -954,12 +954,8 @@ Value ConversionPatternRewriter::getRemappedValue(Value key) { } /// PatternRewriter hook for creating a new block with the given arguments. -Block *ConversionPatternRewriter::createBlock(Region *parent, - Region::iterator insertPtr, - TypeRange argTypes) { - Block *block = PatternRewriter::createBlock(parent, insertPtr, argTypes); +void ConversionPatternRewriter::notifyBlockCreated(Block *block) { impl->notifyCreatedBlock(block); - return block; } /// PatternRewriter hook for splitting a block into two parts. @@ -1001,13 +997,12 @@ void ConversionPatternRewriter::cloneRegionBefore( } /// PatternRewriter hook for creating a new operation. -Operation *ConversionPatternRewriter::insert(Operation *op) { +void ConversionPatternRewriter::notifyOperationInserted(Operation *op) { LLVM_DEBUG({ impl->logger.startLine() << "** Insert : '" << op->getName() << "'(" << op << ")\n"; }); impl->createdOps.push_back(op); - return OpBuilder::insert(op); } /// PatternRewriter hook for updating the root operation in-place. @@ -1541,9 +1536,8 @@ struct OperationConverter { explicit OperationConverter(ConversionTarget &target, const OwningRewritePatternList &patterns, OpConversionMode mode, - DenseSet *legalizableOps = nullptr) - : opLegalizer(target, patterns), mode(mode), - legalizableOps(legalizableOps) {} + DenseSet *trackedOps = nullptr) + : opLegalizer(target, patterns), mode(mode), trackedOps(trackedOps) {} /// Converts the given operations to the conversion target. LogicalResult convertOperations(ArrayRef ops, @@ -1563,9 +1557,11 @@ struct OperationConverter { /// The conversion mode to use when legalizing operations. OpConversionMode mode; - /// A set of pre-existing operations that were found to be legalizable to the - /// target. This field is only used when mode == OpConversionMode::Analysis. - DenseSet *legalizableOps; + /// A set of pre-existing operations. When mode == OpConversionMode::Analysis, + /// this is populated with ops found to be legalizable to the target. + /// When mode == OpConversionMode::Partial, this is populated with ops found + /// *not* to be legalizable to the target. + DenseSet *trackedOps; }; } // end anonymous namespace @@ -1594,17 +1590,22 @@ LogicalResult OperationConverter::convert(ConversionPatternRewriter &rewriter, return op->emitError() << "failed to legalize operation '" << op->getName() << "'"; /// Partial conversions allow conversions to fail iff the operation was not - /// explicitly marked as illegal. - if (mode == OpConversionMode::Partial && opLegalizer.isIllegal(op)) - return op->emitError() - << "failed to legalize operation '" << op->getName() - << "' that was explicitly marked illegal"; + /// explicitly marked as illegal. If the user provided a nonlegalizableOps + /// set, non-legalizable ops are included. + if (mode == OpConversionMode::Partial) { + if (opLegalizer.isIllegal(op)) + return op->emitError() + << "failed to legalize operation '" << op->getName() + << "' that was explicitly marked illegal"; + if (trackedOps) + trackedOps->insert(op); + } } else { /// Analysis conversions don't fail if any operations fail to legalize, /// they are only interested in the operations that were successfully /// legalized. if (mode == OpConversionMode::Analysis) - legalizableOps->insert(op); + trackedOps->insert(op); // If legalization succeeded, convert the types any of the blocks within // this operation. @@ -1932,21 +1933,30 @@ auto ConversionTarget::getOpInfo(OperationName op) const // Op Conversion Entry Points //===----------------------------------------------------------------------===// -/// Apply a partial conversion on the given operations, and all nested +/// Apply a partial conversion on the given operations and all nested /// operations. This method converts as many operations to the target as -/// possible, ignoring operations that failed to legalize. +/// possible, ignoring operations that failed to legalize. This method only +/// returns failure if there ops explicitly marked as illegal. If `converter` is +/// provided, the signatures of blocks and regions are also converted. +/// If an `unconvertedOps` set is provided, all operations that are found not +/// to be legalizable to the given `target` are placed within that set. (Note +/// that if there is an op explicitly marked as illegal, the conversion +/// terminates and the `unconvertedOps` set will not necessarily be complete.) LogicalResult mlir::applyPartialConversion( ArrayRef ops, ConversionTarget &target, - const OwningRewritePatternList &patterns, TypeConverter *converter) { - OperationConverter opConverter(target, patterns, OpConversionMode::Partial); + const OwningRewritePatternList &patterns, TypeConverter *converter, + DenseSet *unconvertedOps) { + OperationConverter opConverter(target, patterns, OpConversionMode::Partial, + unconvertedOps); return opConverter.convertOperations(ops, converter); } LogicalResult mlir::applyPartialConversion(Operation *op, ConversionTarget &target, const OwningRewritePatternList &patterns, - TypeConverter *converter) { + TypeConverter *converter, + DenseSet *unconvertedOps) { return applyPartialConversion(llvm::makeArrayRef(op), target, patterns, - converter); + converter, unconvertedOps); } /// Apply a complete conversion on the given operations, and all nested diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp index 10ad848a5bb9a..f8f48d86b5622 100644 --- a/mlir/lib/Transforms/Inliner.cpp +++ b/mlir/lib/Transforms/Inliner.cpp @@ -31,50 +31,6 @@ using namespace mlir; // Symbol Use Tracking //===----------------------------------------------------------------------===// -/// Returns true if this operation can be discarded if it is a symbol and has no -/// uses. 'allUsesVisible' corresponds to if the parent symbol table is hidden -/// from above. -static bool canDiscardSymbolOnUseEmpty(Operation *op, bool allUsesVisible) { - if (!SymbolTable::isSymbol(op)) - return false; - - // TODO: This is essentially the same logic from SymbolDCE. Remove this when - // we have a 'Symbol' interface. - // Private symbols are always initially considered dead. - SymbolTable::Visibility visibility = SymbolTable::getSymbolVisibility(op); - if (visibility == mlir::SymbolTable::Visibility::Private) - return true; - // We only include nested visibility here if all uses are visible. - if (allUsesVisible && visibility == SymbolTable::Visibility::Nested) - return true; - // Otherwise, public symbols are never removable. - return false; -} - -/// Walk all of the symbol table operations nested with 'op' along with a -/// boolean signifying if the symbols within can be treated as if all uses are -/// visible. The provided callback is invoked with the symbol table operation, -/// and a boolean signaling if all of the uses within the symbol table are -/// visible. -static void walkSymbolTables(Operation *op, bool allSymUsesVisible, - function_ref callback) { - if (op->hasTrait()) { - allSymUsesVisible = allSymUsesVisible || !SymbolTable::isSymbol(op) || - SymbolTable::getSymbolVisibility(op) == - SymbolTable::Visibility::Private; - callback(op, allSymUsesVisible); - } else { - // Otherwise if 'op' is not a symbol table, any nested symbols are - // guaranteed to be hidden. - allSymUsesVisible = true; - } - - for (Region ®ion : op->getRegions()) - for (Block &block : region) - for (Operation &nested : block) - walkSymbolTables(&nested, allSymUsesVisible, callback); -} - /// Walk all of the used symbol callgraph nodes referenced with the given op. static void walkReferencedSymbolNodes( Operation *op, CallGraph &cg, @@ -166,23 +122,25 @@ CGUseList::CGUseList(Operation *op, CallGraph &cg) { // Walk each of the symbol tables looking for discardable callgraph nodes. auto walkFn = [&](Operation *symbolTableOp, bool allUsesVisible) { - for (Block &block : symbolTableOp->getRegion(0)) { - for (Operation &op : block) { - // If this is a callgraph operation, check to see if it is discardable. - if (auto callable = dyn_cast(&op)) { - if (auto *node = cg.lookupNode(callable.getCallableRegion())) { - if (canDiscardSymbolOnUseEmpty(&op, allUsesVisible)) - discardableSymNodeUses.try_emplace(node, 0); - continue; + for (Operation &op : symbolTableOp->getRegion(0).getOps()) { + // If this is a callgraph operation, check to see if it is discardable. + if (auto callable = dyn_cast(&op)) { + if (auto *node = cg.lookupNode(callable.getCallableRegion())) { + SymbolOpInterface symbol = dyn_cast(&op); + if (symbol && (allUsesVisible || symbol.isPrivate()) && + symbol.canDiscardOnUseEmpty()) { + discardableSymNodeUses.try_emplace(node, 0); } + continue; } - // Otherwise, check for any referenced nodes. These will be always-live. - walkReferencedSymbolNodes(&op, cg, alwaysLiveNodes, - [](CallGraphNode *, Operation *) {}); } + // Otherwise, check for any referenced nodes. These will be always-live. + walkReferencedSymbolNodes(&op, cg, alwaysLiveNodes, + [](CallGraphNode *, Operation *) {}); } }; - walkSymbolTables(op, /*allSymUsesVisible=*/!op->getBlock(), walkFn); + SymbolTable::walkSymbolTables(op, /*allSymUsesVisible=*/!op->getBlock(), + walkFn); // Drop the use information for any discardable nodes that are always live. for (auto &it : alwaysLiveNodes) @@ -224,7 +182,7 @@ void CGUseList::eraseNode(CallGraphNode *node) { bool CGUseList::isDead(CallGraphNode *node) const { // If the parent operation isn't a symbol, simply check normal SSA deadness. Operation *nodeOp = node->getCallableRegion()->getParentOp(); - if (!SymbolTable::isSymbol(nodeOp)) + if (!isa(nodeOp)) return MemoryEffectOpInterface::hasNoEffect(nodeOp) && nodeOp->use_empty(); // Otherwise, check the number of symbol uses. @@ -235,7 +193,7 @@ bool CGUseList::isDead(CallGraphNode *node) const { bool CGUseList::hasOneUseAndDiscardable(CallGraphNode *node) const { // If this isn't a symbol node, check for side-effects and SSA use count. Operation *nodeOp = node->getCallableRegion()->getParentOp(); - if (!SymbolTable::isSymbol(nodeOp)) + if (!isa(nodeOp)) return MemoryEffectOpInterface::hasNoEffect(nodeOp) && nodeOp->hasOneUse(); // Otherwise, check the number of symbol uses. @@ -536,22 +494,28 @@ static void canonicalizeSCC(CallGraph &cg, CGUseList &useList, // NOTE: This is simple now, because we don't enable canonicalizing nodes // within children. When we remove this restriction, this logic will need to // be reworked. - ParallelDiagnosticHandler canonicalizationHandler(context); - llvm::parallel::for_each_n( - llvm::parallel::par, /*Begin=*/size_t(0), - /*End=*/nodesToCanonicalize.size(), [&](size_t index) { - // Set the order for this thread so that diagnostics will be properly - // ordered. - canonicalizationHandler.setOrderIDForThread(index); - - // Apply the canonicalization patterns to this region. - auto *node = nodesToCanonicalize[index]; - applyPatternsAndFoldGreedily(*node->getCallableRegion(), canonPatterns); - - // Make sure to reset the order ID for the diagnostic handler, as this - // thread may be used in a different context. - canonicalizationHandler.eraseOrderIDForThread(); - }); + if (context->isMultithreadingEnabled()) { + ParallelDiagnosticHandler canonicalizationHandler(context); + llvm::parallel::for_each_n( + llvm::parallel::par, /*Begin=*/size_t(0), + /*End=*/nodesToCanonicalize.size(), [&](size_t index) { + // Set the order for this thread so that diagnostics will be properly + // ordered. + canonicalizationHandler.setOrderIDForThread(index); + + // Apply the canonicalization patterns to this region. + auto *node = nodesToCanonicalize[index]; + applyPatternsAndFoldGreedily(*node->getCallableRegion(), + canonPatterns); + + // Make sure to reset the order ID for the diagnostic handler, as this + // thread may be used in a different context. + canonicalizationHandler.eraseOrderIDForThread(); + }); + } else { + for (CallGraphNode *node : nodesToCanonicalize) + applyPatternsAndFoldGreedily(*node->getCallableRegion(), canonPatterns); + } // Recompute the uses held by each of the nodes. for (CallGraphNode *node : nodesToCanonicalize) diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp index ae71d939bc338..a93ffd6746533 100644 --- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp +++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp @@ -15,10 +15,10 @@ #include "PassDetail.h" #include "mlir/Analysis/AffineAnalysis.h" -#include "mlir/Analysis/Dominance.h" #include "mlir/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Dominance.h" #include "mlir/Transforms/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include diff --git a/mlir/lib/Transforms/SCCP.cpp b/mlir/lib/Transforms/SCCP.cpp index 1d0a279cc592b..2b21bd42b2cc4 100644 --- a/mlir/lib/Transforms/SCCP.cpp +++ b/mlir/lib/Transforms/SCCP.cpp @@ -116,12 +116,56 @@ class LatticeValue { Dialect *constantDialect; }; +/// This class contains various state used when computing the lattice of a +/// callable operation. +class CallableLatticeState { +public: + /// Build a lattice state with a given callable region, and a specified number + /// of results to be initialized to the default lattice value (Unknown). + CallableLatticeState(Region *callableRegion, unsigned numResults) + : callableArguments(callableRegion->front().getArguments()), + resultLatticeValues(numResults) {} + + /// Returns the arguments to the callable region. + Block::BlockArgListType getCallableArguments() const { + return callableArguments; + } + + /// Returns the lattice value for the results of the callable region. + MutableArrayRef getResultLatticeValues() { + return resultLatticeValues; + } + + /// Add a call to this callable. This is only used if the callable defines a + /// symbol. + void addSymbolCall(Operation *op) { symbolCalls.push_back(op); } + + /// Return the calls that reference this callable. This is only used + /// if the callable defines a symbol. + ArrayRef getSymbolCalls() const { return symbolCalls; } + +private: + /// The arguments of the callable region. + Block::BlockArgListType callableArguments; + + /// The lattice state for each of the results of this region. The return + /// values of the callable aren't SSA values, so we need to track them + /// separately. + SmallVector resultLatticeValues; + + /// The calls referencing this callable if this callable defines a symbol. + /// This removes the need to recompute symbol references during propagation. + /// Value based references are trivial to resolve, so they can be done + /// in-place. + SmallVector symbolCalls; +}; + /// This class represents the solver for the SCCP analysis. This class acts as /// the propagation engine for computing which values form constants. class SCCPSolver { public: - /// Initialize the solver with a given set of regions. - SCCPSolver(MutableArrayRef regions); + /// Initialize the solver with the given top-level operation. + SCCPSolver(Operation *op); /// Run the solver until it converges. void solve(); @@ -132,6 +176,11 @@ class SCCPSolver { void rewrite(MLIRContext *context, MutableArrayRef regions); private: + /// Initialize the set of symbol defining callables that can have their + /// arguments and results tracked. 'op' is the top-level operation that SCCP + /// is operating on. + void initializeSymbolCallables(Operation *op); + /// Replace the given value with a constant if the corresponding lattice /// represents a constant. Returns success if the value was replaced, failure /// otherwise. @@ -149,6 +198,13 @@ class SCCPSolver { /// Visit the given operation and compute any necessary lattice state. void visitOperation(Operation *op); + /// Visit the given call operation and compute any necessary lattice state. + void visitCallOperation(CallOpInterface op); + + /// Visit the given callable operation and compute any necessary lattice + /// state. + void visitCallableOperation(Operation *op); + /// Visit the given operation, which defines regions, and compute any /// necessary lattice state. This also resolves the lattice state of both the /// operation results and any nested regions. @@ -168,6 +224,11 @@ class SCCPSolver { void visitTerminatorOperation(Operation *op, ArrayRef constantOperands); + /// Visit the given terminator operation that exits a callable region. These + /// are terminators with no CFG successors. + void visitCallableTerminatorOperation(Operation *callable, + Operation *terminator); + /// Visit the given block and compute any necessary lattice state. void visitBlock(Block *block); @@ -235,11 +296,20 @@ class SCCPSolver { /// A worklist of operations that need to be processed. SmallVector opWorklist; + + /// The callable operations that have their argument/result state tracked. + DenseMap callableLatticeState; + + /// A map between a call operation and the resolved symbol callable. This + /// avoids re-resolving symbol references during propagation. Value based + /// callables are trivial to resolve, so they can be done in-place. + DenseMap callToSymbolCallable; }; } // end anonymous namespace -SCCPSolver::SCCPSolver(MutableArrayRef regions) { - for (Region ®ion : regions) { +SCCPSolver::SCCPSolver(Operation *op) { + /// Initialize the solver with the regions within this operation. + for (Region ®ion : op->getRegions()) { if (region.empty()) continue; Block *entryBlock = ®ion.front(); @@ -251,6 +321,7 @@ SCCPSolver::SCCPSolver(MutableArrayRef regions) { // as overdefined. markAllOverdefined(entryBlock->getArguments()); } + initializeSymbolCallables(op); } void SCCPSolver::solve() { @@ -310,6 +381,73 @@ void SCCPSolver::rewrite(MLIRContext *context, } } +void SCCPSolver::initializeSymbolCallables(Operation *op) { + // Initialize the set of symbol callables that can have their state tracked. + // This tracks which symbol callable operations we can propagate within and + // out of. + auto walkFn = [&](Operation *symTable, bool allUsesVisible) { + Region &symbolTableRegion = symTable->getRegion(0); + Block *symbolTableBlock = &symbolTableRegion.front(); + for (auto callable : symbolTableBlock->getOps()) { + // We won't be able to track external callables. + Region *callableRegion = callable.getCallableRegion(); + if (!callableRegion) + continue; + // We only care about symbol defining callables here. + auto symbol = dyn_cast(callable.getOperation()); + if (!symbol) + continue; + callableLatticeState.try_emplace(callable, callableRegion, + callable.getCallableResults().size()); + + // If not all of the uses of this symbol are visible, we can't track the + // state of the arguments. + if (symbol.isPublic() || (!allUsesVisible && symbol.isNested())) + markAllOverdefined(callableRegion->front().getArguments()); + } + if (callableLatticeState.empty()) + return; + + // After computing the valid callables, walk any symbol uses to check + // for non-call references. We won't be able to track the lattice state + // for arguments to these callables, as we can't guarantee that we can see + // all of its calls. + Optional uses = + SymbolTable::getSymbolUses(&symbolTableRegion); + if (!uses) { + // If we couldn't gather the symbol uses, conservatively assume that + // we can't track information for any nested symbols. + op->walk([&](CallableOpInterface op) { callableLatticeState.erase(op); }); + return; + } + + for (const SymbolTable::SymbolUse &use : *uses) { + // If the use is a call, track it to avoid the need to recompute the + // reference later. + if (auto callOp = dyn_cast(use.getUser())) { + Operation *symCallable = callOp.resolveCallable(); + auto callableLatticeIt = callableLatticeState.find(symCallable); + if (callableLatticeIt != callableLatticeState.end()) { + callToSymbolCallable.try_emplace(callOp, symCallable); + + // We only need to record the call in the lattice if it produces any + // values. + if (callOp.getOperation()->getNumResults()) + callableLatticeIt->second.addSymbolCall(callOp); + } + continue; + } + // This use isn't a call, so don't we know all of the callers. + auto *symbol = SymbolTable::lookupSymbolIn(op, use.getSymbolRef()); + auto it = callableLatticeState.find(symbol); + if (it != callableLatticeState.end()) + markAllOverdefined(it->second.getCallableArguments()); + } + }; + SymbolTable::walkSymbolTables(op, /*allSymUsesVisible=*/!op->getBlock(), + walkFn); +} + LogicalResult SCCPSolver::replaceWithConstant(OpBuilder &builder, OperationFolder &folder, Value value) { @@ -347,6 +485,16 @@ void SCCPSolver::visitOperation(Operation *op) { if (op->isKnownTerminator()) visitTerminatorOperation(op, operandConstants); + // Process call operations. The call visitor processes result values, so we + // can exit afterwards. + if (CallOpInterface call = dyn_cast(op)) + return visitCallOperation(call); + + // Process callable operations. These are specially handled region operations + // that track dataflow via calls. + if (isa(op)) + return visitCallableOperation(op); + // Process region holding operations. The region visitor processes result // values, so we can exit afterwards. if (op->getNumRegions()) @@ -366,7 +514,7 @@ void SCCPSolver::visitOperation(Operation *op) { // in-place. The constant passed in may not correspond to the real runtime // value, so in-place updates are not allowed. SmallVector originalOperands(op->getOperands()); - NamedAttributeList originalAttrs = op->getAttrList(); + MutableDictionaryAttr originalAttrs = op->getMutableAttrDict(); // Simulate the result of folding this operation to a constant. If folding // fails or was an in-place fold, mark the results as overdefined. @@ -399,6 +547,62 @@ void SCCPSolver::visitOperation(Operation *op) { } } +void SCCPSolver::visitCallableOperation(Operation *op) { + // Mark the regions as executable. + bool isTrackingLatticeState = callableLatticeState.count(op); + for (Region ®ion : op->getRegions()) { + if (region.empty()) + continue; + Block *entryBlock = ®ion.front(); + markBlockExecutable(entryBlock); + + // If we aren't tracking lattice state for this callable, mark all of the + // region arguments as overdefined. + if (!isTrackingLatticeState) + markAllOverdefined(entryBlock->getArguments()); + } + + // TODO: Add support for non-symbol callables when necessary. If the callable + // has non-call uses we would mark overdefined, otherwise allow for + // propagating the return values out. + markAllOverdefined(op, op->getResults()); +} + +void SCCPSolver::visitCallOperation(CallOpInterface op) { + ResultRange callResults = op.getOperation()->getResults(); + + // Resolve the callable operation for this call. + Operation *callableOp = nullptr; + if (Value callableValue = op.getCallableForCallee().dyn_cast()) + callableOp = callableValue.getDefiningOp(); + else + callableOp = callToSymbolCallable.lookup(op); + + // The callable of this call can't be resolved, mark any results overdefined. + if (!callableOp) + return markAllOverdefined(op, callResults); + + // If this callable is tracking state, merge the argument operands with the + // arguments of the callable. + auto callableLatticeIt = callableLatticeState.find(callableOp); + if (callableLatticeIt == callableLatticeState.end()) + return markAllOverdefined(op, callResults); + + OperandRange callOperands = op.getArgOperands(); + auto callableArgs = callableLatticeIt->second.getCallableArguments(); + for (auto it : llvm::zip(callOperands, callableArgs)) { + BlockArgument callableArg = std::get<1>(it); + if (latticeValues[callableArg].meet(latticeValues[std::get<0>(it)])) + visitUsers(callableArg); + } + + // Merge in the lattice state for the callable results as well. + auto callableResults = callableLatticeIt->second.getResultLatticeValues(); + for (auto it : llvm::zip(callResults, callableResults)) + meet(/*owner=*/op, /*to=*/latticeValues[std::get<0>(it)], + /*from=*/std::get<1>(it)); +} + void SCCPSolver::visitRegionOperation(Operation *op, ArrayRef constantOperands) { // Check to see if we can reason about the internal control flow of this @@ -509,9 +713,14 @@ void SCCPSolver::visitTerminatorOperation( Operation *op, ArrayRef constantOperands) { // If this operation has no successors, we treat it as an exiting terminator. if (op->getNumSuccessors() == 0) { - // Check to see if the parent tracks region control flow. Region *parentRegion = op->getParentRegion(); Operation *parentOp = parentRegion->getParentOp(); + + // Check to see if this is a terminator for a callable region. + if (isa(parentOp)) + return visitCallableTerminatorOperation(parentOp, op); + + // Otherwise, check to see if the parent tracks region control flow. auto regionInterface = dyn_cast(parentOp); if (!regionInterface || !isBlockExecutable(parentOp->getBlock())) return; @@ -552,6 +761,42 @@ void SCCPSolver::visitTerminatorOperation( markEdgeExecutable(block, succ); } +void SCCPSolver::visitCallableTerminatorOperation(Operation *callable, + Operation *terminator) { + // If there are no exiting values, we have nothing to track. + if (terminator->getNumOperands() == 0) + return; + + // If this callable isn't tracking any lattice state there is nothing to do. + auto latticeIt = callableLatticeState.find(callable); + if (latticeIt == callableLatticeState.end()) + return; + assert(callable->getNumResults() == 0 && "expected symbol callable"); + + // If this terminator is not "return-like", conservatively mark all of the + // call-site results as overdefined. + auto callableResultLattices = latticeIt->second.getResultLatticeValues(); + if (!terminator->hasTrait()) { + for (auto &it : callableResultLattices) + it.markOverdefined(); + for (Operation *call : latticeIt->second.getSymbolCalls()) + markAllOverdefined(call, call->getResults()); + return; + } + + // Merge the terminator operands into the results. + bool anyChanged = false; + for (auto it : llvm::zip(terminator->getOperands(), callableResultLattices)) + anyChanged |= std::get<1>(it).meet(latticeValues[std::get<0>(it)]); + if (!anyChanged) + return; + + // If any of the result lattices changed, update the callers. + for (Operation *call : latticeIt->second.getSymbolCalls()) + for (auto it : llvm::zip(call->getResults(), callableResultLattices)) + meet(call, latticeValues[std::get<0>(it)], std::get<1>(it)); +} + void SCCPSolver::visitBlock(Block *block) { // If the block is not the entry block we need to compute the lattice state // for the block arguments. Entry block argument lattices are computed @@ -663,7 +908,7 @@ void SCCP::runOnOperation() { Operation *op = getOperation(); // Solve for SCCP constraints within nested regions. - SCCPSolver solver(op->getRegions()); + SCCPSolver solver(op); solver.solve(); // Cleanup any operations using the solver analysis. diff --git a/mlir/lib/Transforms/SymbolDCE.cpp b/mlir/lib/Transforms/SymbolDCE.cpp index 581857a6a92e3..56997b6d2af7d 100644 --- a/mlir/lib/Transforms/SymbolDCE.cpp +++ b/mlir/lib/Transforms/SymbolDCE.cpp @@ -43,10 +43,9 @@ void SymbolDCE::runOnOperation() { // A flag that signals if the top level symbol table is hidden, i.e. not // accessible from parent scopes. bool symbolTableIsHidden = true; - if (symbolTableOp->getParentOp() && SymbolTable::isSymbol(symbolTableOp)) { - symbolTableIsHidden = SymbolTable::getSymbolVisibility(symbolTableOp) == - SymbolTable::Visibility::Private; - } + SymbolOpInterface symbol = dyn_cast(symbolTableOp); + if (symbolTableOp->getParentOp() && symbol) + symbolTableIsHidden = symbol.isPrivate(); // Compute the set of live symbols within the symbol table. DenseSet liveSymbols; @@ -61,7 +60,7 @@ void SymbolDCE::runOnOperation() { for (auto &block : nestedSymbolTable->getRegion(0)) { for (Operation &op : llvm::make_early_inc_range(block.without_terminator())) { - if (SymbolTable::isSymbol(&op) && !liveSymbols.count(&op)) + if (isa(&op) && !liveSymbols.count(&op)) op.erase(); } } @@ -80,30 +79,16 @@ LogicalResult SymbolDCE::computeLiveness(Operation *symbolTableOp, // Walk the symbols within the current symbol table, marking the symbols that // are known to be live. for (auto &block : symbolTableOp->getRegion(0)) { + // Add all non-symbols or symbols that can't be discarded. for (Operation &op : block.without_terminator()) { - // Always add non symbol operations to the worklist. - if (!SymbolTable::isSymbol(&op)) { + SymbolOpInterface symbol = dyn_cast(&op); + if (!symbol) { worklist.push_back(&op); continue; } - - // Check the visibility to see if this symbol may be referenced - // externally. - SymbolTable::Visibility visibility = - SymbolTable::getSymbolVisibility(&op); - - // Private symbols are always initially considered dead. - if (visibility == mlir::SymbolTable::Visibility::Private) - continue; - // We only include nested visibility here if the symbol table isn't - // hidden. - if (symbolTableIsHidden && visibility == SymbolTable::Visibility::Nested) - continue; - - // TODO(riverriddle) Add hooks here to allow symbols to provide additional - // information, e.g. linkage can be used to drop some symbols that may - // otherwise be considered "live". - if (liveSymbols.insert(&op).second) + bool isDiscardable = (symbolTableIsHidden || symbol.isPrivate()) && + symbol.canDiscardOnUseEmpty(); + if (!isDiscardable && liveSymbols.insert(&op).second) worklist.push_back(&op); } } @@ -117,10 +102,9 @@ LogicalResult SymbolDCE::computeLiveness(Operation *symbolTableOp, if (op->hasTrait()) { // The internal symbol table is hidden if the parent is, if its not a // symbol, or if it is a private symbol. - bool symbolIsHidden = symbolTableIsHidden || !SymbolTable::isSymbol(op) || - SymbolTable::getSymbolVisibility(op) == - SymbolTable::Visibility::Private; - if (failed(computeLiveness(op, symbolIsHidden, liveSymbols))) + SymbolOpInterface symbol = dyn_cast(op); + bool symIsHidden = symbolTableIsHidden || !symbol || symbol.isPrivate(); + if (failed(computeLiveness(op, symIsHidden, liveSymbols))) return failure(); } diff --git a/mlir/lib/Transforms/Utils/CMakeLists.txt b/mlir/lib/Transforms/Utils/CMakeLists.txt index 1e0442179bf41..a06523ed4aaa8 100644 --- a/mlir/lib/Transforms/Utils/CMakeLists.txt +++ b/mlir/lib/Transforms/Utils/CMakeLists.txt @@ -12,10 +12,8 @@ add_mlir_library(MLIRTransformUtils DEPENDS MLIRStandardOpsIncGen - ) -target_link_libraries(MLIRTransformUtils - PUBLIC + LINK_LIBS PUBLIC MLIRAffineOps MLIRAnalysis MLIRLoopAnalysis diff --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp index 2ebf1d6a47d74..f4022a4e5bde4 100644 --- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp +++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp @@ -77,10 +77,7 @@ class GreedyPatternRewriteDriver : public PatternRewriter { protected: // Implement the hook for inserting operations, and make sure that newly // inserted ops are added to the worklist for processing. - Operation *insert(Operation *op) override { - addToWorklist(op); - return OpBuilder::insert(op); - } + void notifyOperationInserted(Operation *op) override { addToWorklist(op); } // If an operation is about to be removed, make sure it is not in our // worklist anymore because we'd get dangling references to it. @@ -266,9 +263,6 @@ class OpPatternRewriteDriver : public PatternRewriter { bool simplifyLocally(Operation *op, int maxIterations, bool &erased); - /// No additional action needed other than inserting the op. - Operation *insert(Operation *op) override { return OpBuilder::insert(op); } - // These are hooks implemented for PatternRewriter. protected: /// If an operation is about to be removed, mark it so that we can let clients diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index dec0c4f7c4ebe..35581eb2a3925 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -24,6 +24,7 @@ #include "mlir/IR/Function.h" #include "mlir/IR/IntegerSet.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/Support/MathExtras.h" #include "mlir/Transforms/RegionUtils.h" #include "mlir/Transforms/Utils.h" #include "llvm/ADT/DenseMap.h" @@ -118,6 +119,34 @@ static void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor, lb.erase(); } +// Build the IR that performs ceil division of a positive value by a constant: +// ceildiv(a, B) = divis(a + (B-1), B) +// where divis is rounding-to-zero division. +static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, + int64_t divisor) { + assert(divisor > 0 && "expected positive divisor"); + assert(dividend.getType().isIndex() && "expected index-typed value"); + + Value divisorMinusOneCst = builder.create(loc, divisor - 1); + Value divisorCst = builder.create(loc, divisor); + Value sum = builder.create(loc, dividend, divisorMinusOneCst); + return builder.create(loc, sum, divisorCst); +} + +// Build the IR that performs ceil division of a positive value by another +// positive value: +// ceildiv(a, b) = divis(a + (b - 1), b) +// where divis is rounding-to-zero division. +static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, + Value divisor) { + assert(dividend.getType().isIndex() && "expected index-typed value"); + + Value cstOne = builder.create(loc, 1); + Value divisorMinusOne = builder.create(loc, divisor, cstOne); + Value sum = builder.create(loc, dividend, divisorMinusOne); + return builder.create(loc, sum, divisor); +} + /// Promotes the loop body of a forOp to its containing block if the forOp /// was known to have a single iteration. // TODO(bondhugula): extend this for arbitrary affine bounds. @@ -161,6 +190,35 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) { return success(); } +/// Promotes the loop body of a forOp to its containing block if the forOp +/// it can be determined that the loop has a single iteration. +LogicalResult mlir::promoteIfSingleIteration(loop::ForOp forOp) { + auto lbCstOp = + dyn_cast_or_null(forOp.lowerBound().getDefiningOp()); + auto ubCstOp = + dyn_cast_or_null(forOp.upperBound().getDefiningOp()); + auto stepCstOp = + dyn_cast_or_null(forOp.step().getDefiningOp()); + if (!lbCstOp || !ubCstOp || !stepCstOp || lbCstOp.getValue() < 0 || + ubCstOp.getValue() < 0 || stepCstOp.getValue() < 0) + return failure(); + int64_t tripCount = mlir::ceilDiv(ubCstOp.getValue() - lbCstOp.getValue(), + stepCstOp.getValue()); + if (tripCount != 1) + return failure(); + auto iv = forOp.getInductionVar(); + iv.replaceAllUsesWith(lbCstOp); + + // Move the loop body operations, except for its terminator, to the loop's + // containing block. + auto *parentBlock = forOp.getOperation()->getBlock(); + forOp.getBody()->back().erase(); + parentBlock->getOperations().splice(Block::iterator(forOp), + forOp.getBody()->getOperations()); + forOp.erase(); + return success(); +} + /// Promotes all single iteration 'for' ops in `f`, i.e., moves /// their body into the containing Block. void mlir::promoteSingleIterationLoops(FuncOp f) { @@ -192,7 +250,7 @@ static AffineForOp generateShiftedLoop( BlockAndValueMapping operandMap; - OpBuilder bodyBuilder = loopChunk.getBodyBuilder(); + auto bodyBuilder = OpBuilder::atBlockTerminator(loopChunk.getBody()); for (auto it = opGroupQueue.begin() + offset, e = opGroupQueue.end(); it != e; ++it) { uint64_t shift = it->first; @@ -416,6 +474,37 @@ LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp, return loopUnrollByFactor(forOp, unrollFactor); } +// Generates unrolled copies of AffineForOp or loop::ForOp 'loopBodyBlock', with +// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap +// 'forOpIV' for each unrolled body. +static void generateUnrolledLoop( + Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, + function_ref ivRemapFn) { + // Builder to insert unrolled bodies just before the terminator of the body of + // 'forOp'. + auto builder = OpBuilder::atBlockTerminator(loopBodyBlock); + + // Keep a pointer to the last non-terminator operation in the original block + // so that we know what to clone (since we are doing this in-place). + Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2); + + // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies). + for (unsigned i = 1; i < unrollFactor; i++) { + BlockAndValueMapping operandMap; + + // If the induction variable is used, create a remapping to the value for + // this unrolled instance. + if (!forOpIV.use_empty()) { + Value ivUnroll = ivRemapFn(i, forOpIV, builder); + operandMap.map(forOpIV, ivUnroll); + } + + // Clone the original body of 'forOp'. + for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) + builder.clone(*it, operandMap); + } +} + /// Unrolls this loop by the specified factor. Returns success if the loop /// is successfully unrolled. LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp, @@ -467,38 +556,114 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp, // Scale the step of loop being unrolled by unroll factor. int64_t step = forOp.getStep(); forOp.setStep(step * unrollFactor); + generateUnrolledLoop(forOp.getBody(), forOp.getInductionVar(), unrollFactor, + [&](unsigned i, Value iv, OpBuilder b) { + // iv' = iv + i * step + auto d0 = b.getAffineDimExpr(0); + auto bumpMap = AffineMap::get(1, 0, d0 + i * step); + return b.create(forOp.getLoc(), bumpMap, + iv); + }); - // Builder to insert unrolled bodies just before the terminator of the body of - // 'forOp'. - OpBuilder builder = forOp.getBodyBuilder(); + // Promote the loop body up if this has turned into a single iteration loop. + promoteIfSingleIteration(forOp); + return success(); +} - // Keep a pointer to the last non-terminator operation in the original block - // so that we know what to clone (since we are doing this in-place). - Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end(), 2); +/// Unrolls 'forOp' by 'unrollFactor', returns success if the loop is unrolled. +LogicalResult mlir::loopUnrollByFactor(loop::ForOp forOp, + uint64_t unrollFactor) { + assert(unrollFactor > 0 && "expected positive unroll factor"); + if (unrollFactor == 1) + return promoteIfSingleIteration(forOp); - // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies). - auto forOpIV = forOp.getInductionVar(); - for (unsigned i = 1; i < unrollFactor; i++) { - BlockAndValueMapping operandMap; + // Return if the loop body is empty. + if (llvm::hasSingleElement(forOp.getBody()->getOperations())) + return success(); - // If the induction variable is used, create a remapping to the value for - // this unrolled instance. - if (!forOpIV.use_empty()) { - // iv' = iv + 1/2/3...unrollFactor-1; - auto d0 = builder.getAffineDimExpr(0); - auto bumpMap = AffineMap::get(1, 0, d0 + i * step); - auto ivUnroll = - builder.create(forOp.getLoc(), bumpMap, forOpIV); - operandMap.map(forOpIV, ivUnroll); - } + // Compute tripCount = ceilDiv((upperBound - lowerBound), step) and populate + // 'upperBoundUnrolled' and 'stepUnrolled' for static and dynamic cases. + OpBuilder boundsBuilder(forOp); + auto loc = forOp.getLoc(); + auto step = forOp.step(); + Value upperBoundUnrolled; + Value stepUnrolled; + bool generateEpilogueLoop = true; + + auto lbCstOp = + dyn_cast_or_null(forOp.lowerBound().getDefiningOp()); + auto ubCstOp = + dyn_cast_or_null(forOp.upperBound().getDefiningOp()); + auto stepCstOp = + dyn_cast_or_null(forOp.step().getDefiningOp()); + if (lbCstOp && ubCstOp && stepCstOp) { + // Constant loop bounds computation. + int64_t lbCst = lbCstOp.getValue(); + int64_t ubCst = ubCstOp.getValue(); + int64_t stepCst = stepCstOp.getValue(); + assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 && + "expected positive loop bounds and step"); + int64_t tripCount = mlir::ceilDiv(ubCst - lbCst, stepCst); + int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor); + int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst; + assert(upperBoundUnrolledCst <= ubCst); + int64_t stepUnrolledCst = stepCst * unrollFactor; + + // Create constant for 'upperBoundUnrolled' and set epilogue loop flag. + generateEpilogueLoop = upperBoundUnrolledCst < ubCst; + if (generateEpilogueLoop) + upperBoundUnrolled = + boundsBuilder.create(loc, upperBoundUnrolledCst); + else + upperBoundUnrolled = ubCstOp; + + // Create constant for 'stepUnrolled'. + stepUnrolled = + stepCst == stepUnrolledCst + ? step + : boundsBuilder.create(loc, stepUnrolledCst); + } else { + // Dynamic loop bounds computation. + // TODO(andydavis) Add dynamic asserts for negative lb/ub/step, or + // consider using ceilDiv from AffineApplyExpander. + auto lowerBound = forOp.lowerBound(); + auto upperBound = forOp.upperBound(); + Value diff = boundsBuilder.create(loc, upperBound, lowerBound); + Value tripCount = ceilDivPositive(boundsBuilder, loc, diff, step); + Value unrollFactorCst = + boundsBuilder.create(loc, unrollFactor); + Value tripCountRem = + boundsBuilder.create(loc, tripCount, unrollFactorCst); + // Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor) + Value tripCountEvenMultiple = + boundsBuilder.create(loc, tripCount, tripCountRem); + // Compute upperBoundUnrolled = lowerBound + tripCountEvenMultiple * step + upperBoundUnrolled = boundsBuilder.create( + loc, lowerBound, + boundsBuilder.create(loc, tripCountEvenMultiple, step)); + // Scale 'step' by 'unrollFactor'. + stepUnrolled = boundsBuilder.create(loc, step, unrollFactorCst); + } - // Clone the original body of 'forOp'. - for (auto it = forOp.getBody()->begin(); it != std::next(srcBlockEnd); - it++) { - builder.clone(*it, operandMap); - } + // Create epilogue clean up loop starting at 'upperBoundUnrolled'. + if (generateEpilogueLoop) { + OpBuilder epilogueBuilder(forOp.getOperation()->getBlock(), + std::next(Block::iterator(forOp))); + auto epilogueForOp = cast(epilogueBuilder.clone(*forOp)); + epilogueForOp.setLowerBound(upperBoundUnrolled); + promoteIfSingleIteration(epilogueForOp); } + // Create unrolled loop. + forOp.setUpperBound(upperBoundUnrolled); + forOp.setStep(stepUnrolled); + generateUnrolledLoop(forOp.getBody(), forOp.getInductionVar(), unrollFactor, + [&](unsigned i, Value iv, OpBuilder b) { + // iv' = iv + step * i; + auto stride = b.create( + loc, step, b.create(loc, i)); + return b.create(loc, iv, stride); + }); // Promote the loop body up if this has turned into a single iteration loop. promoteIfSingleIteration(forOp); return success(); @@ -906,7 +1071,7 @@ stripmineSink(AffineForOp forOp, uint64_t factor, SmallVector innerLoops; for (auto t : targets) { // Insert newForOp before the terminator of `t`. - OpBuilder b = t.getBodyBuilder(); + auto b = OpBuilder::atBlockTerminator(t.getBody()); auto newForOp = b.create(t.getLoc(), lbOperands, lbMap, ubOperands, ubMap, originalStep); auto begin = t.getBody()->begin(); @@ -938,7 +1103,7 @@ static Loops stripmineSink(loop::ForOp forOp, Value factor, auto nOps = t.getBody()->getOperations().size(); // Insert newForOp before the terminator of `t`. - OpBuilder b(t.getBodyBuilder()); + auto b = OpBuilder::atBlockTerminator((t.getBody())); Value stepped = b.create(t.getLoc(), iv, forOp.step()); Value less = b.create(t.getLoc(), CmpIPredicate::slt, forOp.upperBound(), stepped); @@ -1032,34 +1197,6 @@ Loops mlir::tilePerfectlyNested(loop::ForOp rootForOp, ArrayRef sizes) { return ::tile(forOps, sizes, forOps.back()); } -// Build the IR that performs ceil division of a positive value by a constant: -// ceildiv(a, B) = divis(a + (B-1), B) -// where divis is rounding-to-zero division. -static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, - int64_t divisor) { - assert(divisor > 0 && "expected positive divisor"); - assert(dividend.getType().isIndex() && "expected index-typed value"); - - Value divisorMinusOneCst = builder.create(loc, divisor - 1); - Value divisorCst = builder.create(loc, divisor); - Value sum = builder.create(loc, dividend, divisorMinusOneCst); - return builder.create(loc, sum, divisorCst); -} - -// Build the IR that performs ceil division of a positive value by another -// positive value: -// ceildiv(a, b) = divis(a + (b - 1), b) -// where divis is rounding-to-zero division. -static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, - Value divisor) { - assert(dividend.getType().isIndex() && "expected index-typed value"); - - Value cstOne = builder.create(loc, 1); - Value divisorMinusOne = builder.create(loc, divisor, cstOne); - Value sum = builder.create(loc, dividend, divisorMinusOne); - return builder.create(loc, sum, divisor); -} - // Hoist the ops within `outer` that appear before `inner`. // Such ops include the ops that have been introduced by parametric tiling. // Ops that come from triangular loops (i.e. that belong to the program slice @@ -1510,7 +1647,7 @@ generatePointWiseCopy(Location loc, Value memref, Value fastMemRef, if (d == 0) copyNestRoot = forOp; - b = forOp.getBodyBuilder(); + b = OpBuilder::atBlockTerminator(forOp.getBody()); auto fastBufOffsetMap = AffineMap::get(lbOperands.size(), 0, fastBufOffsets[d]); @@ -2309,7 +2446,7 @@ createFullTiles(MutableArrayRef inputNest, AffineForOp fullTileLoop = createCanonicalizedAffineForOp( b, loop.getLoc(), lbVmap.getOperands(), lbVmap.getAffineMap(), ubVmap.getOperands(), ubVmap.getAffineMap()); - b = fullTileLoop.getBodyBuilder(); + b = OpBuilder::atBlockTerminator(fullTileLoop.getBody()); fullTileLoops.push_back(fullTileLoop); } @@ -2318,7 +2455,7 @@ createFullTiles(MutableArrayRef inputNest, for (auto loopEn : llvm::enumerate(inputNest)) operandMap.map(loopEn.value().getInductionVar(), fullTileLoops[loopEn.index()].getInductionVar()); - b = fullTileLoops.back().getBodyBuilder(); + b = OpBuilder::atBlockTerminator(fullTileLoops.back().getBody()); for (auto &op : inputNest.back().getBody()->without_terminator()) b.clone(op, operandMap); return success(); diff --git a/mlir/lib/Transforms/Utils/RegionUtils.cpp b/mlir/lib/Transforms/Utils/RegionUtils.cpp index 162091cd53de7..76b393183708d 100644 --- a/mlir/lib/Transforms/Utils/RegionUtils.cpp +++ b/mlir/lib/Transforms/Utils/RegionUtils.cpp @@ -209,7 +209,7 @@ static void propagateTerminatorLiveness(Operation *op, LiveMap &liveMap) { // Check to see if we can reason about the successor operands and mutate them. BranchOpInterface branchInterface = dyn_cast(op); - if (!branchInterface || !branchInterface.canEraseSuccessorOperand()) { + if (!branchInterface) { for (Block *successor : op->getSuccessors()) for (BlockArgument arg : successor->getArguments()) liveMap.setProvedLive(arg); @@ -219,7 +219,7 @@ static void propagateTerminatorLiveness(Operation *op, LiveMap &liveMap) { // If we can't reason about the operands to a successor, conservatively mark // all arguments as live. for (unsigned i = 0, e = op->getNumSuccessors(); i != e; ++i) { - if (!branchInterface.getSuccessorOperands(i)) + if (!branchInterface.getMutableSuccessorOperands(i)) for (BlockArgument arg : op->getSuccessor(i)->getArguments()) liveMap.setProvedLive(arg); } @@ -278,7 +278,8 @@ static void eraseTerminatorSuccessorOperands(Operation *terminator, // since it will promote later operands of the terminator being erased // first, reducing the quadratic-ness. unsigned succ = succE - succI - 1; - Optional succOperands = branchOp.getSuccessorOperands(succ); + Optional succOperands = + branchOp.getMutableSuccessorOperands(succ); if (!succOperands) continue; Block *successor = terminator->getSuccessor(succ); @@ -288,7 +289,7 @@ static void eraseTerminatorSuccessorOperands(Operation *terminator, // shifting later args when earlier args are erased. unsigned arg = argE - argI - 1; if (!liveMap.wasProvenLive(successor->getArgument(arg))) - branchOp.eraseSuccessorOperand(succ, arg); + succOperands->erase(arg); } } } @@ -366,6 +367,324 @@ static LogicalResult runRegionDCE(MutableArrayRef regions) { return deleteDeadness(regions, liveMap); } +//===----------------------------------------------------------------------===// +// Block Merging +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// BlockEquivalenceData + +namespace { +/// This class contains the information for comparing the equivalencies of two +/// blocks. Blocks are considered equivalent if they contain the same operations +/// in the same order. The only allowed divergence is for operands that come +/// from sources outside of the parent block, i.e. the uses of values produced +/// within the block must be equivalent. +/// e.g., +/// Equivalent: +/// ^bb1(%arg0: i32) +/// return %arg0, %foo : i32, i32 +/// ^bb2(%arg1: i32) +/// return %arg1, %bar : i32, i32 +/// Not Equivalent: +/// ^bb1(%arg0: i32) +/// return %foo, %arg0 : i32, i32 +/// ^bb2(%arg1: i32) +/// return %arg1, %bar : i32, i32 +struct BlockEquivalenceData { + BlockEquivalenceData(Block *block); + + /// Return the order index for the given value that is within the block of + /// this data. + unsigned getOrderOf(Value value) const; + + /// The block this data refers to. + Block *block; + /// A hash value for this block. + llvm::hash_code hash; + /// A map of result producing operations to their relative orders within this + /// block. The order of an operation is the number of defined values that are + /// produced within the block before this operation. + DenseMap opOrderIndex; +}; +} // end anonymous namespace + +BlockEquivalenceData::BlockEquivalenceData(Block *block) + : block(block), hash(0) { + unsigned orderIt = block->getNumArguments(); + for (Operation &op : *block) { + if (unsigned numResults = op.getNumResults()) { + opOrderIndex.try_emplace(&op, orderIt); + orderIt += numResults; + } + auto opHash = OperationEquivalence::computeHash( + &op, OperationEquivalence::Flags::IgnoreOperands); + hash = llvm::hash_combine(hash, opHash); + } +} + +unsigned BlockEquivalenceData::getOrderOf(Value value) const { + assert(value.getParentBlock() == block && "expected value of this block"); + + // Arguments use the argument number as the order index. + if (BlockArgument arg = value.dyn_cast()) + return arg.getArgNumber(); + + // Otherwise, the result order is offset from the parent op's order. + OpResult result = value.cast(); + auto opOrderIt = opOrderIndex.find(result.getDefiningOp()); + assert(opOrderIt != opOrderIndex.end() && "expected op to have an order"); + return opOrderIt->second + result.getResultNumber(); +} + +//===----------------------------------------------------------------------===// +// BlockMergeCluster + +namespace { +/// This class represents a cluster of blocks to be merged together. +class BlockMergeCluster { +public: + BlockMergeCluster(BlockEquivalenceData &&leaderData) + : leaderData(std::move(leaderData)) {} + + /// Attempt to add the given block to this cluster. Returns success if the + /// block was merged, failure otherwise. + LogicalResult addToCluster(BlockEquivalenceData &blockData); + + /// Try to merge all of the blocks within this cluster into the leader block. + LogicalResult merge(); + +private: + /// The equivalence data for the leader of the cluster. + BlockEquivalenceData leaderData; + + /// The set of blocks that can be merged into the leader. + llvm::SmallSetVector blocksToMerge; + + /// A set of operand+index pairs that correspond to operands that need to be + /// replaced by arguments when the cluster gets merged. + std::set> operandsToMerge; + + /// A map of operations with external uses to a replacement within the leader + /// block. + DenseMap opsToReplace; +}; +} // end anonymous namespace + +LogicalResult BlockMergeCluster::addToCluster(BlockEquivalenceData &blockData) { + if (leaderData.hash != blockData.hash) + return failure(); + Block *leaderBlock = leaderData.block, *mergeBlock = blockData.block; + if (leaderBlock->getArgumentTypes() != mergeBlock->getArgumentTypes()) + return failure(); + + // A set of operands that mismatch between the leader and the new block. + SmallVector, 8> mismatchedOperands; + SmallVector, 2> newOpsToReplace; + auto lhsIt = leaderBlock->begin(), lhsE = leaderBlock->end(); + auto rhsIt = blockData.block->begin(), rhsE = blockData.block->end(); + for (int opI = 0; lhsIt != lhsE && rhsIt != rhsE; ++lhsIt, ++rhsIt, ++opI) { + // Check that the operations are equivalent. + if (!OperationEquivalence::isEquivalentTo( + &*lhsIt, &*rhsIt, OperationEquivalence::Flags::IgnoreOperands)) + return failure(); + + // Compare the operands of the two operations. If the operand is within + // the block, it must refer to the same operation. + auto lhsOperands = lhsIt->getOperands(), rhsOperands = rhsIt->getOperands(); + for (int operand : llvm::seq(0, lhsIt->getNumOperands())) { + Value lhsOperand = lhsOperands[operand]; + Value rhsOperand = rhsOperands[operand]; + if (lhsOperand == rhsOperand) + continue; + + // Check that these uses are both external, or both internal. + bool lhsIsInBlock = lhsOperand.getParentBlock() == leaderBlock; + bool rhsIsInBlock = rhsOperand.getParentBlock() == mergeBlock; + if (lhsIsInBlock != rhsIsInBlock) + return failure(); + // Let the operands differ if they are defined in a different block. These + // will become new arguments if the blocks get merged. + if (!lhsIsInBlock) { + mismatchedOperands.emplace_back(opI, operand); + continue; + } + + // Otherwise, these operands must have the same logical order within the + // parent block. + if (leaderData.getOrderOf(lhsOperand) != blockData.getOrderOf(rhsOperand)) + return failure(); + } + + // If the rhs has external uses, it will need to be replaced. + if (rhsIt->isUsedOutsideOfBlock(mergeBlock)) + newOpsToReplace.emplace_back(&*rhsIt, &*lhsIt); + } + // Make sure that the block sizes are equivalent. + if (lhsIt != lhsE || rhsIt != rhsE) + return failure(); + + // If we get here, the blocks are equivalent and can be merged. + operandsToMerge.insert(mismatchedOperands.begin(), mismatchedOperands.end()); + opsToReplace.insert(newOpsToReplace.begin(), newOpsToReplace.end()); + blocksToMerge.insert(blockData.block); + return success(); +} + +/// Returns true if the predecessor terminators of the given block can not have +/// their operands updated. +static bool ableToUpdatePredOperands(Block *block) { + for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) { + auto branch = dyn_cast((*it)->getTerminator()); + if (!branch || !branch.getMutableSuccessorOperands(it.getSuccessorIndex())) + return false; + } + return true; +} + +LogicalResult BlockMergeCluster::merge() { + // Don't consider clusters that don't have blocks to merge. + if (blocksToMerge.empty()) + return failure(); + + Block *leaderBlock = leaderData.block; + if (!operandsToMerge.empty()) { + // If the cluster has operands to merge, verify that the predecessor + // terminators of each of the blocks can have their successor operands + // updated. + // TODO: We could try and sub-partition this cluster if only some blocks + // cause the mismatch. + if (!ableToUpdatePredOperands(leaderBlock) || + !llvm::all_of(blocksToMerge, ableToUpdatePredOperands)) + return failure(); + + // Replace any necessary operations. + for (std::pair &it : opsToReplace) + it.first->replaceAllUsesWith(it.second); + + // Collect the iterators for each of the blocks to merge. We will walk all + // of the iterators at once to avoid operand index invalidation. + SmallVector blockIterators; + blockIterators.reserve(blocksToMerge.size() + 1); + blockIterators.push_back(leaderBlock->begin()); + for (Block *mergeBlock : blocksToMerge) + blockIterators.push_back(mergeBlock->begin()); + + // Update each of the predecessor terminators with the new arguments. + SmallVector, 2> newArguments( + 1 + blocksToMerge.size(), + SmallVector(operandsToMerge.size())); + unsigned curOpIndex = 0; + for (auto it : llvm::enumerate(operandsToMerge)) { + unsigned nextOpOffset = it.value().first - curOpIndex; + curOpIndex = it.value().first; + + // Process the operand for each of the block iterators. + for (unsigned i = 0, e = blockIterators.size(); i != e; ++i) { + Block::iterator &blockIter = blockIterators[i]; + std::advance(blockIter, nextOpOffset); + auto &operand = blockIter->getOpOperand(it.value().second); + newArguments[i][it.index()] = operand.get(); + + // Update the operand and insert an argument if this is the leader. + if (i == 0) + operand.set(leaderBlock->addArgument(operand.get().getType())); + } + } + // Update the predecessors for each of the blocks. + auto updatePredecessors = [&](Block *block, unsigned clusterIndex) { + for (auto predIt = block->pred_begin(), predE = block->pred_end(); + predIt != predE; ++predIt) { + auto branch = cast((*predIt)->getTerminator()); + unsigned succIndex = predIt.getSuccessorIndex(); + branch.getMutableSuccessorOperands(succIndex)->append( + newArguments[clusterIndex]); + } + }; + updatePredecessors(leaderBlock, /*clusterIndex=*/0); + for (unsigned i = 0, e = blocksToMerge.size(); i != e; ++i) + updatePredecessors(blocksToMerge[i], /*clusterIndex=*/i + 1); + } + + // Replace all uses of the merged blocks with the leader and erase them. + for (Block *block : blocksToMerge) { + block->replaceAllUsesWith(leaderBlock); + block->erase(); + } + return success(); +} + +/// Identify identical blocks within the given region and merge them, inserting +/// new block arguments as necessary. Returns success if any blocks were merged, +/// failure otherwise. +static LogicalResult mergeIdenticalBlocks(Region ®ion) { + if (region.empty() || llvm::hasSingleElement(region)) + return failure(); + + // Identify sets of blocks, other than the entry block, that branch to the + // same successors. We will use these groups to create clusters of equivalent + // blocks. + DenseMap> matchingSuccessors; + for (Block &block : llvm::drop_begin(region, 1)) + matchingSuccessors[block.getSuccessors()].push_back(&block); + + bool mergedAnyBlocks = false; + for (ArrayRef blocks : llvm::make_second_range(matchingSuccessors)) { + if (blocks.size() == 1) + continue; + + SmallVector clusters; + for (Block *block : blocks) { + BlockEquivalenceData data(block); + + // Don't allow merging if this block has any regions. + // TODO: Add support for regions if necessary. + bool hasNonEmptyRegion = llvm::any_of(*block, [](Operation &op) { + return llvm::any_of(op.getRegions(), + [](Region ®ion) { return !region.empty(); }); + }); + if (hasNonEmptyRegion) + continue; + + // Try to add this block to an existing cluster. + bool addedToCluster = false; + for (auto &cluster : clusters) + if ((addedToCluster = succeeded(cluster.addToCluster(data)))) + break; + if (!addedToCluster) + clusters.emplace_back(std::move(data)); + } + for (auto &cluster : clusters) + mergedAnyBlocks |= succeeded(cluster.merge()); + } + + return success(mergedAnyBlocks); +} + +/// Identify identical blocks within the given regions and merge them, inserting +/// new block arguments as necessary. +static LogicalResult mergeIdenticalBlocks(MutableArrayRef regions) { + llvm::SmallSetVector worklist; + for (auto ®ion : regions) + worklist.insert(®ion); + bool anyChanged = false; + while (!worklist.empty()) { + Region *region = worklist.pop_back_val(); + if (succeeded(mergeIdenticalBlocks(*region))) { + worklist.insert(region); + anyChanged = true; + } + + // Add any nested regions to the worklist. + for (Block &block : *region) + for (auto &op : block) + for (auto &nestedRegion : op.getRegions()) + worklist.insert(&nestedRegion); + } + + return success(anyChanged); +} + //===----------------------------------------------------------------------===// // Region Simplification //===----------------------------------------------------------------------===// @@ -375,7 +694,9 @@ static LogicalResult runRegionDCE(MutableArrayRef regions) { /// elimination, as well as some other DCE. This function returns success if any /// of the regions were simplified, failure otherwise. LogicalResult mlir::simplifyRegions(MutableArrayRef regions) { - LogicalResult eliminatedBlocks = eraseUnreachableBlocks(regions); - LogicalResult eliminatedOpsOrArgs = runRegionDCE(regions); - return success(succeeded(eliminatedBlocks) || succeeded(eliminatedOpsOrArgs)); + bool eliminatedBlocks = succeeded(eraseUnreachableBlocks(regions)); + bool eliminatedOpsOrArgs = succeeded(runRegionDCE(regions)); + bool mergedIdenticalBlocks = succeeded(mergeIdenticalBlocks(regions)); + return success(eliminatedBlocks || eliminatedOpsOrArgs || + mergedIdenticalBlocks); } diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp index 5cc83456a9fe2..91ee3b4e3ce62 100644 --- a/mlir/lib/Transforms/Utils/Utils.cpp +++ b/mlir/lib/Transforms/Utils/Utils.cpp @@ -15,10 +15,10 @@ #include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Analysis/AffineStructures.h" -#include "mlir/Analysis/Dominance.h" #include "mlir/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/Dominance.h" #include "mlir/IR/Function.h" #include "mlir/IR/Module.h" #include "mlir/Support/MathExtras.h" diff --git a/mlir/lib/Translation/CMakeLists.txt b/mlir/lib/Translation/CMakeLists.txt index 2cd1a7c9ee3ee..579de292fdb22 100644 --- a/mlir/lib/Translation/CMakeLists.txt +++ b/mlir/lib/Translation/CMakeLists.txt @@ -3,10 +3,8 @@ add_mlir_library(MLIRTranslation ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Translation - ) -target_link_libraries(MLIRTranslation - PUBLIC - LLVMSupport + + LINK_LIBS PUBLIC MLIRIR MLIRParser ) diff --git a/mlir/lib/Translation/Translation.cpp b/mlir/lib/Translation/Translation.cpp index 487e39ac939cb..f48c5b3e1ec52 100644 --- a/mlir/lib/Translation/Translation.cpp +++ b/mlir/lib/Translation/Translation.cpp @@ -11,8 +11,8 @@ //===----------------------------------------------------------------------===// #include "mlir/Translation.h" -#include "mlir/Analysis/Verifier.h" #include "mlir/IR/Module.h" +#include "mlir/IR/Verifier.h" #include "mlir/Parser.h" #include "mlir/Support/LLVM.h" #include "llvm/Support/SourceMgr.h" diff --git a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir new file mode 100644 index 0000000000000..f0c9b414c9c84 --- /dev/null +++ b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir @@ -0,0 +1,231 @@ +// RUN: mlir-opt -allow-unregistered-dialect --convert-gpu-to-nvvm --split-input-file %s | FileCheck --check-prefix=NVVM %s +// RUN: mlir-opt -allow-unregistered-dialect --convert-gpu-to-rocdl --split-input-file %s | FileCheck --check-prefix=ROCDL %s + +gpu.module @kernel { + // NVVM-LABEL: llvm.func @private + gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, 5>) { + // Allocate private memory inside the function. + // NVVM: %[[size:.*]] = llvm.mlir.constant(4 : i64) : !llvm.i64 + // NVVM: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> + + // ROCDL: %[[size:.*]] = llvm.mlir.constant(4 : i64) : !llvm.i64 + // ROCDL: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm<"float addrspace(5)*"> + + // Populate the memref descriptor. + // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + // NVVM: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] + // NVVM: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] + // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 + // NVVM: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] + // NVVM: %[[c4:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 + // NVVM: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] + // NVVM: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 + // NVVM: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] + + // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(5)*, float addrspace(5)*, i64, [1 x i64], [1 x i64] }"> + // ROCDL: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] + // ROCDL: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] + // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 + // ROCDL: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] + // ROCDL: %[[c4:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 + // ROCDL: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] + // ROCDL: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 + // ROCDL: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] + + // "Store" lowering should work just as any other memref, only check that + // we emit some core instructions. + // NVVM: llvm.extractvalue %[[descr6:.*]] + // NVVM: llvm.getelementptr + // NVVM: llvm.store + + // ROCDL: llvm.extractvalue %[[descr6:.*]] + // ROCDL: llvm.getelementptr + // ROCDL: llvm.store + %c0 = constant 0 : index + store %arg0, %arg1[%c0] : memref<4xf32, 5> + + "terminator"() : () -> () + } +} + +// ----- + +gpu.module @kernel { + // Workgroup buffers are allocated as globals. + // NVVM: llvm.mlir.global internal @[[buffer:.*]]() + // NVVM-SAME: addr_space = 3 + // NVVM-SAME: !llvm<"[4 x float]"> + + // ROCDL: llvm.mlir.global internal @[[buffer:.*]]() + // ROCDL-SAME: addr_space = 3 + // ROCDL-SAME: !llvm<"[4 x float]"> + + // NVVM-LABEL: llvm.func @workgroup + // NVVM-SAME: { + + // ROCDL-LABEL: llvm.func @workgroup + // ROCDL-SAME: { + gpu.func @workgroup(%arg0: f32) workgroup(%arg1: memref<4xf32, 3>) { + // Get the address of the first element in the global array. + // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // NVVM: %[[addr:.*]] = llvm.mlir.addressof @[[buffer]] : !llvm<"[4 x float] addrspace(3)*"> + // NVVM: %[[raw:.*]] = llvm.getelementptr %[[addr]][%[[c0]], %[[c0]]] + // NVVM-SAME: !llvm<"float addrspace(3)*"> + + // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // ROCDL: %[[addr:.*]] = llvm.mlir.addressof @[[buffer]] : !llvm<"[4 x float] addrspace(3)*"> + // ROCDL: %[[raw:.*]] = llvm.getelementptr %[[addr]][%[[c0]], %[[c0]]] + // ROCDL-SAME: !llvm<"float addrspace(3)*"> + + // Populate the memref descriptor. + // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [1 x i64], [1 x i64] }"> + // NVVM: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] + // NVVM: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] + // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 + // NVVM: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] + // NVVM: %[[c4:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 + // NVVM: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] + // NVVM: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 + // NVVM: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] + + // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [1 x i64], [1 x i64] }"> + // ROCDL: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] + // ROCDL: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] + // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 + // ROCDL: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] + // ROCDL: %[[c4:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 + // ROCDL: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] + // ROCDL: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 + // ROCDL: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] + + // "Store" lowering should work just as any other memref, only check that + // we emit some core instructions. + // NVVM: llvm.extractvalue %[[descr6:.*]] + // NVVM: llvm.getelementptr + // NVVM: llvm.store + + // ROCDL: llvm.extractvalue %[[descr6:.*]] + // ROCDL: llvm.getelementptr + // ROCDL: llvm.store + %c0 = constant 0 : index + store %arg0, %arg1[%c0] : memref<4xf32, 3> + + "terminator"() : () -> () + } +} + +// ----- + +gpu.module @kernel { + // Check that the total size was computed correctly. + // NVVM: llvm.mlir.global internal @[[buffer:.*]]() + // NVVM-SAME: addr_space = 3 + // NVVM-SAME: !llvm<"[48 x float]"> + + // ROCDL: llvm.mlir.global internal @[[buffer:.*]]() + // ROCDL-SAME: addr_space = 3 + // ROCDL-SAME: !llvm<"[48 x float]"> + + // NVVM-LABEL: llvm.func @workgroup3d + // ROCDL-LABEL: llvm.func @workgroup3d + gpu.func @workgroup3d(%arg0: f32) workgroup(%arg1: memref<4x2x6xf32, 3>) { + // Get the address of the first element in the global array. + // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // NVVM: %[[addr:.*]] = llvm.mlir.addressof @[[buffer]] : !llvm<"[48 x float] addrspace(3)*"> + // NVVM: %[[raw:.*]] = llvm.getelementptr %[[addr]][%[[c0]], %[[c0]]] + // NVVM-SAME: !llvm<"float addrspace(3)*"> + + // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // ROCDL: %[[addr:.*]] = llvm.mlir.addressof @[[buffer]] : !llvm<"[48 x float] addrspace(3)*"> + // ROCDL: %[[raw:.*]] = llvm.getelementptr %[[addr]][%[[c0]], %[[c0]]] + // ROCDL-SAME: !llvm<"float addrspace(3)*"> + + // Populate the memref descriptor. + // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [3 x i64], [3 x i64] }"> + // NVVM: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] + // NVVM: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] + // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 + // NVVM: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] + // NVVM: %[[c4:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 + // NVVM: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] + // NVVM: %[[c12:.*]] = llvm.mlir.constant(12 : index) : !llvm.i64 + // NVVM: %[[descr6:.*]] = llvm.insertvalue %[[c12]], %[[descr5]][4, 0] + // NVVM: %[[c2:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 + // NVVM: %[[descr7:.*]] = llvm.insertvalue %[[c2]], %[[descr6]][3, 1] + // NVVM: %[[c6:.*]] = llvm.mlir.constant(6 : index) : !llvm.i64 + // NVVM: %[[descr8:.*]] = llvm.insertvalue %[[c6]], %[[descr7]][4, 1] + // NVVM: %[[c6:.*]] = llvm.mlir.constant(6 : index) : !llvm.i64 + // NVVM: %[[descr9:.*]] = llvm.insertvalue %[[c6]], %[[descr8]][3, 2] + // NVVM: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 + // NVVM: %[[descr10:.*]] = llvm.insertvalue %[[c1]], %[[descr9]][4, 2] + + // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [3 x i64], [3 x i64] }"> + // ROCDL: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] + // ROCDL: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] + // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 + // ROCDL: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] + // ROCDL: %[[c4:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 + // ROCDL: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] + // ROCDL: %[[c12:.*]] = llvm.mlir.constant(12 : index) : !llvm.i64 + // ROCDL: %[[descr6:.*]] = llvm.insertvalue %[[c12]], %[[descr5]][4, 0] + // ROCDL: %[[c2:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 + // ROCDL: %[[descr7:.*]] = llvm.insertvalue %[[c2]], %[[descr6]][3, 1] + // ROCDL: %[[c6:.*]] = llvm.mlir.constant(6 : index) : !llvm.i64 + // ROCDL: %[[descr8:.*]] = llvm.insertvalue %[[c6]], %[[descr7]][4, 1] + // ROCDL: %[[c6:.*]] = llvm.mlir.constant(6 : index) : !llvm.i64 + // ROCDL: %[[descr9:.*]] = llvm.insertvalue %[[c6]], %[[descr8]][3, 2] + // ROCDL: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 + // ROCDL: %[[descr10:.*]] = llvm.insertvalue %[[c1]], %[[descr9]][4, 2] + + %c0 = constant 0 : index + store %arg0, %arg1[%c0,%c0,%c0] : memref<4x2x6xf32, 3> + "terminator"() : () -> () + } +} + +// ----- + +gpu.module @kernel { + // Check that several buffers are defined. + // NVVM: llvm.mlir.global internal @[[buffer1:.*]]() + // NVVM-SAME: !llvm<"[1 x float]"> + // NVVM: llvm.mlir.global internal @[[buffer2:.*]]() + // NVVM-SAME: !llvm<"[2 x float]"> + + // ROCDL: llvm.mlir.global internal @[[buffer1:.*]]() + // ROCDL-SAME: !llvm<"[1 x float]"> + // ROCDL: llvm.mlir.global internal @[[buffer2:.*]]() + // ROCDL-SAME: !llvm<"[2 x float]"> + + // NVVM-LABEL: llvm.func @multiple + // ROCDL-LABEL: llvm.func @multiple + gpu.func @multiple(%arg0: f32) + workgroup(%arg1: memref<1xf32, 3>, %arg2: memref<2xf32, 3>) + private(%arg3: memref<3xf32, 5>, %arg4: memref<4xf32, 5>) { + + // Workgroup buffers. + // NVVM: llvm.mlir.addressof @[[buffer1]] + // NVVM: llvm.mlir.addressof @[[buffer2]] + + // ROCDL: llvm.mlir.addressof @[[buffer1]] + // ROCDL: llvm.mlir.addressof @[[buffer2]] + + // Private buffers. + // NVVM: %[[c3:.*]] = llvm.mlir.constant(3 : i64) + // NVVM: llvm.alloca %[[c3]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> + // NVVM: %[[c4:.*]] = llvm.mlir.constant(4 : i64) + // NVVM: llvm.alloca %[[c4]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> + + // ROCDL: %[[c3:.*]] = llvm.mlir.constant(3 : i64) + // ROCDL: llvm.alloca %[[c3]] x !llvm.float : (!llvm.i64) -> !llvm<"float addrspace(5)*"> + // ROCDL: %[[c4:.*]] = llvm.mlir.constant(4 : i64) + // ROCDL: llvm.alloca %[[c4]] x !llvm.float : (!llvm.i64) -> !llvm<"float addrspace(5)*"> + + %c0 = constant 0 : index + store %arg0, %arg1[%c0] : memref<1xf32, 3> + store %arg0, %arg2[%c0] : memref<2xf32, 3> + store %arg0, %arg3[%c0] : memref<3xf32, 5> + store %arg0, %arg4[%c0] : memref<4xf32, 5> + "terminator"() : () -> () + } +} diff --git a/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir b/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir deleted file mode 100644 index 68b615725ad43..0000000000000 --- a/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir +++ /dev/null @@ -1,145 +0,0 @@ -// RUN: mlir-opt -allow-unregistered-dialect --convert-gpu-to-nvvm --split-input-file %s | FileCheck %s - -gpu.module @kernel { - // CHECK-LABEL: llvm.func @private - gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, 5>) { - // Allocate private memory inside the function. - // CHECK: %[[size:.*]] = llvm.mlir.constant(4 : i64) : !llvm.i64 - // CHECK: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> - - // Populate the memref descriptor. - // CHECK: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - // CHECK: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] - // CHECK: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] - // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 - // CHECK: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] - // CHECK: %[[c4:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 - // CHECK: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] - // CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 - // CHECK: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] - - // "Store" lowering should work just as any other memref, only check that - // we emit some core instructions. - // CHECK: llvm.extractvalue %[[descr6:.*]] - // CHECK: llvm.getelementptr - // CHECK: llvm.store - %c0 = constant 0 : index - store %arg0, %arg1[%c0] : memref<4xf32, 5> - - "terminator"() : () -> () - } -} - -// ----- - -gpu.module @kernel { - // Workgroup buffers are allocated as globals. - // CHECK: llvm.mlir.global internal @[[buffer:.*]]() - // CHECK-SAME: addr_space = 3 - // CHECK-SAME: !llvm<"[4 x float]"> - - // CHECK-LABEL: llvm.func @workgroup - // CHECK-SAME: { - gpu.func @workgroup(%arg0: f32) workgroup(%arg1: memref<4xf32, 3>) { - // Get the address of the first element in the global array. - // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[addr:.*]] = llvm.mlir.addressof @[[buffer]] : !llvm<"[4 x float] addrspace(3)*"> - // CHECK: %[[raw:.*]] = llvm.getelementptr %[[addr]][%[[c0]], %[[c0]]] - // CHECK-SAME: !llvm<"float addrspace(3)*"> - - // Populate the memref descriptor. - // CHECK: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [1 x i64], [1 x i64] }"> - // CHECK: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] - // CHECK: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] - // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 - // CHECK: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] - // CHECK: %[[c4:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 - // CHECK: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] - // CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 - // CHECK: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] - - // "Store" lowering should work just as any other memref, only check that - // we emit some core instructions. - // CHECK: llvm.extractvalue %[[descr6:.*]] - // CHECK: llvm.getelementptr - // CHECK: llvm.store - %c0 = constant 0 : index - store %arg0, %arg1[%c0] : memref<4xf32, 3> - - "terminator"() : () -> () - } -} - -// ----- - -gpu.module @kernel { - // Check that the total size was computed correctly. - // CHECK: llvm.mlir.global internal @[[buffer:.*]]() - // CHECK-SAME: addr_space = 3 - // CHECK-SAME: !llvm<"[48 x float]"> - - // CHECK-LABEL: llvm.func @workgroup3d - gpu.func @workgroup3d(%arg0: f32) workgroup(%arg1: memref<4x2x6xf32, 3>) { - // Get the address of the first element in the global array. - // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[addr:.*]] = llvm.mlir.addressof @[[buffer]] : !llvm<"[48 x float] addrspace(3)*"> - // CHECK: %[[raw:.*]] = llvm.getelementptr %[[addr]][%[[c0]], %[[c0]]] - // CHECK-SAME: !llvm<"float addrspace(3)*"> - - // Populate the memref descriptor. - // CHECK: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [3 x i64], [3 x i64] }"> - // CHECK: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] - // CHECK: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] - // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 - // CHECK: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] - // CHECK: %[[c4:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 - // CHECK: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] - // CHECK: %[[c12:.*]] = llvm.mlir.constant(12 : index) : !llvm.i64 - // CHECK: %[[descr6:.*]] = llvm.insertvalue %[[c12]], %[[descr5]][4, 0] - // CHECK: %[[c2:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 - // CHECK: %[[descr7:.*]] = llvm.insertvalue %[[c2]], %[[descr6]][3, 1] - // CHECK: %[[c6:.*]] = llvm.mlir.constant(6 : index) : !llvm.i64 - // CHECK: %[[descr8:.*]] = llvm.insertvalue %[[c6]], %[[descr7]][4, 1] - // CHECK: %[[c6:.*]] = llvm.mlir.constant(6 : index) : !llvm.i64 - // CHECK: %[[descr9:.*]] = llvm.insertvalue %[[c6]], %[[descr8]][3, 2] - // CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 - // CHECK: %[[descr10:.*]] = llvm.insertvalue %[[c1]], %[[descr9]][4, 2] - - %c0 = constant 0 : index - store %arg0, %arg1[%c0,%c0,%c0] : memref<4x2x6xf32, 3> - "terminator"() : () -> () - } -} - -// ----- - -gpu.module @kernel { - // Check that several buffers are defined. - // CHECK: llvm.mlir.global internal @[[buffer1:.*]]() - // CHECK-SAME: !llvm<"[1 x float]"> - // CHECK: llvm.mlir.global internal @[[buffer2:.*]]() - // CHECK-SAME: !llvm<"[2 x float]"> - - // CHECK-LABEL: llvm.func @multiple - gpu.func @multiple(%arg0: f32) - workgroup(%arg1: memref<1xf32, 3>, %arg2: memref<2xf32, 3>) - private(%arg3: memref<3xf32, 5>, %arg4: memref<4xf32, 5>) { - - // Workgroup buffers. - // CHECK: llvm.mlir.addressof @[[buffer1]] - // CHECK: llvm.mlir.addressof @[[buffer2]] - - // Private buffers. - // CHECK: %[[c3:.*]] = llvm.mlir.constant(3 : i64) - // CHECK: llvm.alloca %[[c3]] x !llvm.float - // CHECK: %[[c4:.*]] = llvm.mlir.constant(4 : i64) - // CHECK: llvm.alloca %[[c4]] x !llvm.float - - %c0 = constant 0 : index - store %arg0, %arg1[%c0] : memref<1xf32, 3> - store %arg0, %arg2[%c0] : memref<2xf32, 3> - store %arg0, %arg3[%c0] : memref<3xf32, 5> - store %arg0, %arg4[%c0] : memref<4xf32, 5> - "terminator"() : () -> () - } -} diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index 7400d4f0bb1e7..4404cebec8531 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -1,9 +1,10 @@ -// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s --dump-input-on-failure -gpu.module @kernel_module { +gpu.module @test_module { // CHECK-LABEL: func @gpu_index_ops() func @gpu_index_ops() - attributes { gpu.kernel } { + -> (index, index, index, index, index, index, + index, index, index, index, index, index) { // CHECK: rocdl.workitem.id.x : !llvm.i32 %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index) // CHECK: rocdl.workitem.id.y : !llvm.i32 @@ -32,68 +33,82 @@ gpu.module @kernel_module { // CHECK: rocdl.grid.dim.z : !llvm.i32 %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index) + std.return %tIdX, %tIdY, %tIdZ, %bDimX, %bDimY, %bDimZ, + %bIdX, %bIdY, %bIdZ, %gDimX, %gDimY, %gDimZ + : index, index, index, index, index, index, + index, index, index, index, index, index + } +} + +// ----- + +gpu.module @test_module { + // CHECK-LABEL: func @gpu_sync() + func @gpu_sync() { + // CHECK: rocdl.barrier + gpu.barrier std.return } } // ----- -gpu.module @kernel_module { +gpu.module @test_module { // CHECK: llvm.func @__ocml_fabs_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_fabs_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_fabs - func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) { + func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { %result32 = std.absf %arg_f32 : f32 // CHECK: llvm.call @__ocml_fabs_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.absf %arg_f64 : f64 // CHECK: llvm.call @__ocml_fabs_f64(%{{.*}}) : (!llvm.double) -> !llvm.double - std.return + std.return %result32, %result64 : f32, f64 } } // ----- -gpu.module @kernel_module { +gpu.module @test_module { // CHECK: llvm.func @__ocml_ceil_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_ceil_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_ceil - func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) { + func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { %result32 = std.ceilf %arg_f32 : f32 // CHECK: llvm.call @__ocml_ceil_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.ceilf %arg_f64 : f64 // CHECK: llvm.call @__ocml_ceil_f64(%{{.*}}) : (!llvm.double) -> !llvm.double - std.return + std.return %result32, %result64 : f32, f64 } } // ----- -gpu.module @kernel_module { +gpu.module @test_module { // CHECK: llvm.func @__ocml_cos_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_cos_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_cos - func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) { + func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { %result32 = std.cos %arg_f32 : f32 // CHECK: llvm.call @__ocml_cos_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.cos %arg_f64 : f64 // CHECK: llvm.call @__ocml_cos_f64(%{{.*}}) : (!llvm.double) -> !llvm.double - std.return + std.return %result32, %result64 : f32, f64 } } // ----- -gpu.module @kernel_module { +gpu.module @test_module { // CHECK: llvm.func @__ocml_exp_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_exp_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_exp - func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) { + func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { %exp_f32 = std.exp %arg_f32 : f32 // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float - %result_f32 = std.exp %exp_f32 : f32 + %result32 = std.exp %exp_f32 : f32 // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.exp %arg_f64 : f64 // CHECK: llvm.call @__ocml_exp_f64(%{{.*}}) : (!llvm.double) -> !llvm.double - std.return + std.return %result32, %result64 : f32, f64 } } @@ -101,20 +116,20 @@ gpu.module @kernel_module { // ----- // Test that we handled properly operation with SymbolTable other than module op -gpu.module @kernel_module { +gpu.module @test_module { "test.symbol_scope"() ({ // CHECK: test.symbol_scope // CHECK: llvm.func @__ocml_exp_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_exp_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_exp - func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) { + func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { %exp_f32 = std.exp %arg_f32 : f32 // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float - %result_f32 = std.exp %exp_f32 : f32 + %result32 = std.exp %exp_f32 : f32 // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.exp %arg_f64 : f64 // CHECK: llvm.call @__ocml_exp_f64(%{{.*}}) : (!llvm.double) -> !llvm.double - std.return + std.return %result32, %result64 : f32, f64 } "test.finish" () : () -> () }) : () -> () @@ -122,60 +137,60 @@ gpu.module @kernel_module { // ----- -gpu.module @kernel_module { +gpu.module @test_module { // CHECK: llvm.func @__ocml_log_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_log_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_log - func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) { + func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { %result32 = std.log %arg_f32 : f32 // CHECK: llvm.call @__ocml_log_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.log %arg_f64 : f64 // CHECK: llvm.call @__ocml_log_f64(%{{.*}}) : (!llvm.double) -> !llvm.double - std.return + std.return %result32, %result64 : f32, f64 } } // ----- -gpu.module @kernel_module { +gpu.module @test_module { // CHECK: llvm.func @__ocml_log10_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_log10_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_log10 - func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) { + func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { %result32 = std.log10 %arg_f32 : f32 // CHECK: llvm.call @__ocml_log10_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.log10 %arg_f64 : f64 // CHECK: llvm.call @__ocml_log10_f64(%{{.*}}) : (!llvm.double) -> !llvm.double - std.return + std.return %result32, %result64 : f32, f64 } } // ----- -gpu.module @kernel_module { +gpu.module @test_module { // CHECK: llvm.func @__ocml_log2_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_log2_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_log2 - func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) { + func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { %result32 = std.log2 %arg_f32 : f32 // CHECK: llvm.call @__ocml_log2_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.log2 %arg_f64 : f64 // CHECK: llvm.call @__ocml_log2_f64(%{{.*}}) : (!llvm.double) -> !llvm.double - std.return + std.return %result32, %result64 : f32, f64 } } // ----- -gpu.module @kernel_module { +gpu.module @test_module { // CHECK: llvm.func @__ocml_tanh_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_tanh_f64(!llvm.double) -> !llvm.double // CHECK-LABEL: func @gpu_tanh - func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) { + func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { %result32 = std.tanh %arg_f32 : f32 // CHECK: llvm.call @__ocml_tanh_f32(%{{.*}}) : (!llvm.float) -> !llvm.float %result64 = std.tanh %arg_f64 : f64 // CHECK: llvm.call @__ocml_tanh_f64(%{{.*}}) : (!llvm.double) -> !llvm.double - std.return + std.return %result32, %result64 : f32, f64 } } diff --git a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir index 330165b567798..b7cb13e51ca2c 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir @@ -65,6 +65,24 @@ func @simple_loop() { return } +// CHECK-LABEL: llvm.func @complex_numbers() +// CHECK-NEXT: %[[REAL0:.*]] = llvm.mlir.constant(1.200000e+00 : f32) : !llvm.float +// CHECK-NEXT: %[[IMAG0:.*]] = llvm.mlir.constant(3.400000e+00 : f32) : !llvm.float +// CHECK-NEXT: %[[CPLX0:.*]] = llvm.mlir.undef : !llvm<"{ float, float }"> +// CHECK-NEXT: %[[CPLX1:.*]] = llvm.insertvalue %[[REAL0]], %[[CPLX0]][0] : !llvm<"{ float, float }"> +// CHECK-NEXT: %[[CPLX2:.*]] = llvm.insertvalue %[[IMAG0]], %[[CPLX1]][1] : !llvm<"{ float, float }"> +// CHECK-NEXT: %[[REAL1:.*]] = llvm.extractvalue %[[CPLX2:.*]][0] : !llvm<"{ float, float }"> +// CHECK-NEXT: %[[IMAG1:.*]] = llvm.extractvalue %[[CPLX2:.*]][1] : !llvm<"{ float, float }"> +// CHECK-NEXT: llvm.return +func @complex_numbers() { + %real0 = constant 1.2 : f32 + %imag0 = constant 3.4 : f32 + %cplx2 = create_complex %real0, %imag0 : complex + %real1 = re %cplx2 : complex + %imag1 = im %cplx2 : complex + return +} + // CHECK-LABEL: func @simple_caller() { // CHECK-NEXT: llvm.call @simple_loop() : () -> () // CHECK-NEXT: llvm.return @@ -367,6 +385,12 @@ func @more_imperfectly_nested_loops() { func @get_i64() -> (i64) // CHECK-LABEL: func @get_f32() -> !llvm.float func @get_f32() -> (f32) +// CHECK-LABEL: func @get_c16() -> !llvm<"{ half, half }"> +func @get_c16() -> (complex) +// CHECK-LABEL: func @get_c32() -> !llvm<"{ float, float }"> +func @get_c32() -> (complex) +// CHECK-LABEL: func @get_c64() -> !llvm<"{ double, double }"> +func @get_c64() -> (complex) // CHECK-LABEL: func @get_memref() -> !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }"> // CHECK32-LABEL: func @get_memref() -> !llvm<"{ float*, float*, i32, [4 x i32], [4 x i32] }"> func @get_memref() -> (memref<42x?x10x?xf32>) @@ -811,8 +835,10 @@ func @subview(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>>, %arg // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> @@ -828,8 +854,10 @@ func @subview(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>>, %arg // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> @@ -849,6 +877,66 @@ func @subview(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>>, %arg return } +// CHECK-LABEL: func @subview_non_zero_addrspace( +// CHECK-COUNT-2: !llvm<"float addrspace(3)*">, +// CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: !llvm.i64, +// CHECK: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.i64, +// CHECK: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.i64, +// CHECK: %[[ARG2:.*]]: !llvm.i64) +// CHECK32-LABEL: func @subview_non_zero_addrspace( +// CHECK32-COUNT-2: !llvm<"float addrspace(3)*">, +// CHECK32-COUNT-5: {{%[a-zA-Z0-9]*}}: !llvm.i32, +// CHECK32: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.i32, +// CHECK32: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.i32, +// CHECK32: %[[ARG2:.*]]: !llvm.i32) +func @subview_non_zero_addrspace(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>, 3>, %arg0 : index, %arg1 : index, %arg2 : index) { + // The last "insertvalue" that populates the memref descriptor from the function arguments. + // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] + // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] + + // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float addrspace(3)*"> to !llvm<"float addrspace(3)*"> + // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float addrspace(3)*"> to !llvm<"float addrspace(3)*"> + // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 + // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i64 + // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i64 + // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i64 + // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i64 + // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 + // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float addrspace(3)*"> to !llvm<"float addrspace(3)*"> + // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float addrspace(3)*"> to !llvm<"float addrspace(3)*"> + // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i32 + // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i32 + // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i32 + // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i32 + // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i32 + // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i32 + + %1 = subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] : + memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>, 3> to memref (d0 * s1 + d1 * s2 + s0)>, 3> + return +} + // CHECK-LABEL: func @subview_const_size( // CHECK32-LABEL: func @subview_const_size( func @subview_const_size(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>>, %arg0 : index, %arg1 : index, %arg2 : index) { @@ -857,8 +945,10 @@ func @subview_const_size(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) @@ -876,8 +966,10 @@ func @subview_const_size(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) @@ -907,8 +999,10 @@ func @subview_const_stride(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> @@ -924,8 +1018,10 @@ func @subview_const_stride(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) // CHECK: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> @@ -953,8 +1049,10 @@ func @subview_const_stride_and_offset(%0 : memref<64x4xf32, affine_map<(d0, d1) // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[CST62:.*]] = llvm.mlir.constant(62 : i64) @@ -968,8 +1066,10 @@ func @subview_const_stride_and_offset(%0 : memref<64x4xf32, affine_map<(d0, d1) // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) // CHECK: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> + // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> // CHECK32: %[[CST62:.*]] = llvm.mlir.constant(62 : i64) @@ -1010,25 +1110,6 @@ func @atomic_rmw(%I : memref<10xi32>, %ival : i32, %F : memref<10xf32>, %fval : // ----- -// CHECK-LABEL: func @cmpxchg -func @cmpxchg(%F : memref<10xf32>, %fval : f32, %i : index) -> f32 { - %x = atomic_rmw "maxf" %fval, %F[%i] : (f32, memref<10xf32>) -> f32 - // CHECK: %[[init:.*]] = llvm.load %{{.*}} : !llvm<"float*"> - // CHECK-NEXT: llvm.br ^bb1(%[[init]] : !llvm.float) - // CHECK-NEXT: ^bb1(%[[loaded:.*]]: !llvm.float): - // CHECK-NEXT: %[[cmp:.*]] = llvm.fcmp "ogt" %[[loaded]], %{{.*}} : !llvm.float - // CHECK-NEXT: %[[max:.*]] = llvm.select %[[cmp]], %[[loaded]], %{{.*}} : !llvm.i1, !llvm.float - // CHECK-NEXT: %[[pair:.*]] = llvm.cmpxchg %{{.*}}, %[[loaded]], %[[max]] acq_rel monotonic : !llvm.float - // CHECK-NEXT: %[[new:.*]] = llvm.extractvalue %[[pair]][0] : !llvm<"{ float, i1 }"> - // CHECK-NEXT: %[[ok:.*]] = llvm.extractvalue %[[pair]][1] : !llvm<"{ float, i1 }"> - // CHECK-NEXT: llvm.cond_br %[[ok]], ^bb2, ^bb1(%[[new]] : !llvm.float) - // CHECK-NEXT: ^bb2: - return %x : f32 - // CHECK-NEXT: llvm.return %[[new]] -} - -// ----- - // CHECK-LABEL: func @generic_atomic_rmw func @generic_atomic_rmw(%I : memref<10xf32>, %i : index) -> f32 { %x = generic_atomic_rmw %I[%i] : memref<10xf32> { diff --git a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir index 6abdde44e3e57..3dcc3cdb2115d 100644 --- a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir @@ -52,6 +52,8 @@ func @float32_unary_scalar(%arg0: f32) { %7 = sqrt %arg0 : f32 // CHECK: spv.GLSL.Tanh %{{.*}}: f32 %8 = tanh %arg0 : f32 + // CHECK: spv.GLSL.Sin %{{.*}}: f32 + %9 = sin %arg0 : f32 return } @@ -285,6 +287,15 @@ func @cmpi(%arg0 : i32, %arg1 : i32) { return } +// CHECK-LABEL: @boolcmpi +func @boolcmpi(%arg0 : i1, %arg1 : i1) { + // CHECK: spv.LogicalEqual + %0 = cmpi "eq", %arg0, %arg1 : i1 + // CHECK: spv.LogicalNotEqual + %1 = cmpi "ne", %arg0, %arg1 : i1 + return +} + } // end module // ----- @@ -492,6 +503,34 @@ func @sitofp2(%arg0 : i64) -> f64 { return %0 : f64 } +// CHECK-LABEL: @zexti1 +func @zexti1(%arg0: i16) -> i64 { + // CHECK: spv.UConvert %{{.*}} : i16 to i64 + %0 = std.zexti %arg0 : i16 to i64 + return %0 : i64 +} + +// CHECK-LABEL: @zexti2 +func @zexti2(%arg0 : i32) -> i64 { + // CHECK: spv.UConvert %{{.*}} : i32 to i64 + %0 = std.zexti %arg0 : i32 to i64 + return %0 : i64 +} + +// CHECK-LABEL: @trunci1 +func @trunci1(%arg0 : i64) -> i16 { + // CHECK: spv.SConvert %{{.*}} : i64 to i16 + %0 = std.trunci %arg0 : i64 to i16 + return %0 : i16 +} + +// CHECK-LABEL: @trunci2 +func @trunci2(%arg0: i32) -> i16 { + // CHECK: spv.SConvert %{{.*}} : i32 to i16 + %0 = std.trunci %arg0 : i32 to i16 + return %0 : i16 +} + } // end module // ----- @@ -610,3 +649,210 @@ func @load_store_zero_rank_int(%arg0: memref, %arg1: memref) { } } // end module + +// ----- + +// Check that access chain indices are properly adjusted if non-32-bit types are +// emulated via 32-bit types. +// TODO: Test i1 and i64 types. +module attributes { + spv.target_env = #spv.target_env< + #spv.vce, + {max_compute_workgroup_invocations = 128 : i32, + max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> +} { + +// CHECK-LABEL: @load_i8 +func @load_i8(%arg0: memref) { + // CHECK: %[[ZERO:.+]] = spv.constant 0 : i32 + // CHECK: %[[FOUR1:.+]] = spv.constant 4 : i32 + // CHECK: %[[QUOTIENT:.+]] = spv.SDiv %[[ZERO]], %[[FOUR1]] : i32 + // CHECK: %[[PTR:.+]] = spv.AccessChain %{{.+}}[%[[ZERO]], %[[QUOTIENT]]] + // CHECK: %[[LOAD:.+]] = spv.Load "StorageBuffer" %[[PTR]] + // CHECK: %[[FOUR2:.+]] = spv.constant 4 : i32 + // CHECK: %[[EIGHT:.+]] = spv.constant 8 : i32 + // CHECK: %[[IDX:.+]] = spv.SMod %[[ZERO]], %[[FOUR2]] : i32 + // CHECK: %[[BITS:.+]] = spv.IMul %[[IDX]], %[[EIGHT]] : i32 + // CHECK: %[[VALUE:.+]] = spv.ShiftRightArithmetic %[[LOAD]], %[[BITS]] : i32, i32 + // CHECK: %[[MASK:.+]] = spv.constant 255 : i32 + // CHECK: spv.BitwiseAnd %[[VALUE]], %[[MASK]] : i32 + %0 = load %arg0[] : memref + return +} + +// CHECK-LABEL: @load_i16 +// CHECK: (%[[ARG0:.+]]: {{.*}}, %[[ARG1:.+]]: i32) +func @load_i16(%arg0: memref<10xi16>, %index : index) { + // CHECK: %[[ONE:.+]] = spv.constant 1 : i32 + // CHECK: %[[FLAT_IDX:.+]] = spv.IMul %[[ONE]], %[[ARG1]] : i32 + // CHECK: %[[ZERO:.+]] = spv.constant 0 : i32 + // CHECK: %[[TWO1:.+]] = spv.constant 2 : i32 + // CHECK: %[[QUOTIENT:.+]] = spv.SDiv %[[FLAT_IDX]], %[[TWO1]] : i32 + // CHECK: %[[PTR:.+]] = spv.AccessChain %{{.+}}[%[[ZERO]], %[[QUOTIENT]]] + // CHECK: %[[LOAD:.+]] = spv.Load "StorageBuffer" %[[PTR]] + // CHECK: %[[TWO2:.+]] = spv.constant 2 : i32 + // CHECK: %[[SIXTEEN:.+]] = spv.constant 16 : i32 + // CHECK: %[[IDX:.+]] = spv.SMod %[[FLAT_IDX]], %[[TWO2]] : i32 + // CHECK: %[[BITS:.+]] = spv.IMul %[[IDX]], %[[SIXTEEN]] : i32 + // CHECK: %[[VALUE:.+]] = spv.ShiftRightArithmetic %[[LOAD]], %[[BITS]] : i32, i32 + // CHECK: %[[MASK:.+]] = spv.constant 65535 : i32 + // CHECK: spv.BitwiseAnd %[[VALUE]], %[[MASK]] : i32 + %0 = load %arg0[%index] : memref<10xi16> + return +} + +// CHECK-LABEL: @load_i32 +func @load_i32(%arg0: memref) { + // CHECK-NOT: spv.SDiv + // CHECK: spv.Load + // CHECK-NOT: spv.ShiftRightArithmetic + %0 = load %arg0[] : memref + return +} + +// CHECK-LABEL: @load_f32 +func @load_f32(%arg0: memref) { + // CHECK-NOT: spv.SDiv + // CHECK: spv.Load + // CHECK-NOT: spv.ShiftRightArithmetic + %0 = load %arg0[] : memref + return +} + +// CHECK-LABEL: @store_i8 +// CHECK: (%[[ARG0:.+]]: {{.*}}, %[[ARG1:.+]]: i32) +func @store_i8(%arg0: memref, %value: i8) { + // CHECK: %[[ZERO:.+]] = spv.constant 0 : i32 + // CHECK: %[[FOUR:.+]] = spv.constant 4 : i32 + // CHECK: %[[EIGHT:.+]] = spv.constant 8 : i32 + // CHECK: %[[IDX:.+]] = spv.SMod %[[ZERO]], %[[FOUR]] : i32 + // CHECK: %[[OFFSET:.+]] = spv.IMul %[[IDX]], %[[EIGHT]] : i32 + // CHECK: %[[MASK1:.+]] = spv.constant 255 : i32 + // CHECK: %[[TMP1:.+]] = spv.ShiftLeftLogical %[[MASK1]], %[[OFFSET]] : i32, i32 + // CHECK: %[[MASK:.+]] = spv.Not %[[TMP1]] : i32 + // CHECK: %[[CLAMPED_VAL:.+]] = spv.BitwiseAnd %[[ARG1]], %[[MASK1]] : i32 + // CHECK: %[[STORE_VAL:.+]] = spv.ShiftLeftLogical %[[CLAMPED_VAL]], %[[OFFSET]] : i32, i32 + // CHECK: %[[FOUR2:.+]] = spv.constant 4 : i32 + // CHECK: %[[ACCESS_IDX:.+]] = spv.SDiv %[[ZERO]], %[[FOUR2]] : i32 + // CHECK: %[[PTR:.+]] = spv.AccessChain %[[ARG0]][%[[ZERO]], %[[ACCESS_IDX]]] + // CHECK: spv.AtomicAnd "Device" "AcquireRelease" %[[PTR]], %[[MASK]] + // CHECK: spv.AtomicOr "Device" "AcquireRelease" %[[PTR]], %[[STORE_VAL]] + store %value, %arg0[] : memref + return +} + +// CHECK-LABEL: @store_i16 +// CHECK: (%[[ARG0:.+]]: {{.*}}, %[[ARG1:.+]]: i32, %[[ARG2:.+]]: i32) +func @store_i16(%arg0: memref<10xi16>, %index: index, %value: i16) { + // CHECK: %[[ONE:.+]] = spv.constant 1 : i32 + // CHECK: %[[FLAT_IDX:.+]] = spv.IMul %[[ONE]], %[[ARG1]] : i32 + // CHECK: %[[ZERO:.+]] = spv.constant 0 : i32 + // CHECK: %[[TWO:.+]] = spv.constant 2 : i32 + // CHECK: %[[SIXTEEN:.+]] = spv.constant 16 : i32 + // CHECK: %[[IDX:.+]] = spv.SMod %[[FLAT_IDX]], %[[TWO]] : i32 + // CHECK: %[[OFFSET:.+]] = spv.IMul %[[IDX]], %[[SIXTEEN]] : i32 + // CHECK: %[[MASK1:.+]] = spv.constant 65535 : i32 + // CHECK: %[[TMP1:.+]] = spv.ShiftLeftLogical %[[MASK1]], %[[OFFSET]] : i32, i32 + // CHECK: %[[MASK:.+]] = spv.Not %[[TMP1]] : i32 + // CHECK: %[[CLAMPED_VAL:.+]] = spv.BitwiseAnd %[[ARG2]], %[[MASK1]] : i32 + // CHECK: %[[STORE_VAL:.+]] = spv.ShiftLeftLogical %[[CLAMPED_VAL]], %[[OFFSET]] : i32, i32 + // CHECK: %[[TWO2:.+]] = spv.constant 2 : i32 + // CHECK: %[[ACCESS_IDX:.+]] = spv.SDiv %[[FLAT_IDX]], %[[TWO2]] : i32 + // CHECK: %[[PTR:.+]] = spv.AccessChain %[[ARG0]][%[[ZERO]], %[[ACCESS_IDX]]] + // CHECK: spv.AtomicAnd "Device" "AcquireRelease" %[[PTR]], %[[MASK]] + // CHECK: spv.AtomicOr "Device" "AcquireRelease" %[[PTR]], %[[STORE_VAL]] + store %value, %arg0[%index] : memref<10xi16> + return +} + +// CHECK-LABEL: @store_i32 +func @store_i32(%arg0: memref, %value: i32) { + // CHECK: spv.Store + // CHECK-NOT: spv.AtomicAnd + // CHECK-NOT: spv.AtomicOr + store %value, %arg0[] : memref + return +} + +// CHECK-LABEL: @store_f32 +func @store_f32(%arg0: memref, %value: f32) { + // CHECK: spv.Store + // CHECK-NOT: spv.AtomicAnd + // CHECK-NOT: spv.AtomicOr + store %value, %arg0[] : memref + return +} + +} // end module + +// ----- + +// Check that access chain indices are properly adjusted if non-16/32-bit types +// are emulated via 32-bit types. +module attributes { + spv.target_env = #spv.target_env< + #spv.vce, + {max_compute_workgroup_invocations = 128 : i32, + max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}> +} { + +// CHECK-LABEL: @load_i8 +func @load_i8(%arg0: memref) { + // CHECK: %[[ZERO:.+]] = spv.constant 0 : i32 + // CHECK: %[[FOUR1:.+]] = spv.constant 4 : i32 + // CHECK: %[[QUOTIENT:.+]] = spv.SDiv %[[ZERO]], %[[FOUR1]] : i32 + // CHECK: %[[PTR:.+]] = spv.AccessChain %{{.+}}[%[[ZERO]], %[[QUOTIENT]]] + // CHECK: %[[LOAD:.+]] = spv.Load "StorageBuffer" %[[PTR]] + // CHECK: %[[FOUR2:.+]] = spv.constant 4 : i32 + // CHECK: %[[EIGHT:.+]] = spv.constant 8 : i32 + // CHECK: %[[IDX:.+]] = spv.SMod %[[ZERO]], %[[FOUR2]] : i32 + // CHECK: %[[BITS:.+]] = spv.IMul %[[IDX]], %[[EIGHT]] : i32 + // CHECK: %[[VALUE:.+]] = spv.ShiftRightArithmetic %[[LOAD]], %[[BITS]] : i32, i32 + // CHECK: %[[MASK:.+]] = spv.constant 255 : i32 + // CHECK: spv.BitwiseAnd %[[VALUE]], %[[MASK]] : i32 + %0 = load %arg0[] : memref + return +} + +// CHECK-LABEL: @load_i16 +func @load_i16(%arg0: memref) { + // CHECK-NOT: spv.SDiv + // CHECK: spv.Load + // CHECK-NOT: spv.ShiftRightArithmetic + %0 = load %arg0[] : memref + return +} + +// CHECK-LABEL: @store_i8 +// CHECK: (%[[ARG0:.+]]: {{.*}}, %[[ARG1:.+]]: i32) +func @store_i8(%arg0: memref, %value: i8) { + // CHECK: %[[ZERO:.+]] = spv.constant 0 : i32 + // CHECK: %[[FOUR:.+]] = spv.constant 4 : i32 + // CHECK: %[[EIGHT:.+]] = spv.constant 8 : i32 + // CHECK: %[[IDX:.+]] = spv.SMod %[[ZERO]], %[[FOUR]] : i32 + // CHECK: %[[OFFSET:.+]] = spv.IMul %[[IDX]], %[[EIGHT]] : i32 + // CHECK: %[[MASK1:.+]] = spv.constant 255 : i32 + // CHECK: %[[TMP1:.+]] = spv.ShiftLeftLogical %[[MASK1]], %[[OFFSET]] : i32, i32 + // CHECK: %[[MASK:.+]] = spv.Not %[[TMP1]] : i32 + // CHECK: %[[CLAMPED_VAL:.+]] = spv.BitwiseAnd %[[ARG1]], %[[MASK1]] : i32 + // CHECK: %[[STORE_VAL:.+]] = spv.ShiftLeftLogical %[[CLAMPED_VAL]], %[[OFFSET]] : i32, i32 + // CHECK: %[[FOUR2:.+]] = spv.constant 4 : i32 + // CHECK: %[[ACCESS_IDX:.+]] = spv.SDiv %[[ZERO]], %[[FOUR2]] : i32 + // CHECK: %[[PTR:.+]] = spv.AccessChain %[[ARG0]][%[[ZERO]], %[[ACCESS_IDX]]] + // CHECK: spv.AtomicAnd "Device" "AcquireRelease" %[[PTR]], %[[MASK]] + // CHECK: spv.AtomicOr "Device" "AcquireRelease" %[[PTR]], %[[STORE_VAL]] + store %value, %arg0[] : memref + return +} + +// CHECK-LABEL: @store_i16 +func @store_i16(%arg0: memref<10xi16>, %index: index, %value: i16) { + // CHECK: spv.Store + // CHECK-NOT: spv.AtomicAnd + // CHECK-NOT: spv.AtomicOr + store %value, %arg0[%index] : memref<10xi16> + return +} + +} // end module diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 96d4343b1a4b0..4d42b8d9b5704 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -418,6 +418,30 @@ func @vector_type_cast(%arg0: memref<8x8x8xf32>) -> memref> { // CHECK: llvm.mlir.constant(0 : index // CHECK: llvm.insertvalue {{.*}}[2] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64 }"> +func @vector_type_cast_non_zero_addrspace(%arg0: memref<8x8x8xf32, 3>) -> memref, 3> { + %0 = vector.type_cast %arg0: memref<8x8x8xf32, 3> to memref, 3> + return %0 : memref, 3> +} +// CHECK-LABEL: llvm.func @vector_type_cast_non_zero_addrspace +// CHECK: llvm.mlir.undef : !llvm<"{ [8 x [8 x <8 x float>]] addrspace(3)*, [8 x [8 x <8 x float>]] addrspace(3)*, i64 }"> +// CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [3 x i64], [3 x i64] }"> +// CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm<"float addrspace(3)*"> to !llvm<"[8 x [8 x <8 x float>]] addrspace(3)*"> +// CHECK: llvm.insertvalue %[[allocatedBit]], {{.*}}[0] : !llvm<"{ [8 x [8 x <8 x float>]] addrspace(3)*, [8 x [8 x <8 x float>]] addrspace(3)*, i64 }"> +// CHECK: %[[aligned:.*]] = llvm.extractvalue {{.*}}[1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [3 x i64], [3 x i64] }"> +// CHECK: %[[alignedBit:.*]] = llvm.bitcast %[[aligned]] : !llvm<"float addrspace(3)*"> to !llvm<"[8 x [8 x <8 x float>]] addrspace(3)*"> +// CHECK: llvm.insertvalue %[[alignedBit]], {{.*}}[1] : !llvm<"{ [8 x [8 x <8 x float>]] addrspace(3)*, [8 x [8 x <8 x float>]] addrspace(3)*, i64 }"> +// CHECK: llvm.mlir.constant(0 : index +// CHECK: llvm.insertvalue {{.*}}[2] : !llvm<"{ [8 x [8 x <8 x float>]] addrspace(3)*, [8 x [8 x <8 x float>]] addrspace(3)*, i64 }"> + +func @vector_print_scalar_i1(%arg0: i1) { + vector.print %arg0 : i1 + return +} +// CHECK-LABEL: llvm.func @vector_print_scalar_i1( +// CHECK-SAME: %[[A:.*]]: !llvm.i1) +// CHECK: llvm.call @print_i1(%[[A]]) : (!llvm.i1) -> () +// CHECK: llvm.call @print_newline() : () -> () + func @vector_print_scalar_i32(%arg0: i32) { vector.print %arg0 : i32 return @@ -828,3 +852,68 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // CHECK: llvm.intr.masked.store %[[loaded]], %[[vecPtr_b]], %[[mask_b]] // CHECK-SAME: {alignment = 1 : i32} : // CHECK-SAME: !llvm<"<17 x float>">, !llvm<"<17 x i1>"> into !llvm<"<17 x float>*"> + +func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) -> vector<17xf32> { + %f7 = constant 7.0: f32 + %f = vector.transfer_read %A[%base0, %base1], %f7 + {permutation_map = affine_map<(d0, d1) -> (d1)>} : + memref, vector<17xf32> + return %f: vector<17xf32> +} +// CHECK-LABEL: func @transfer_read_2d_to_1d +// CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: !llvm.i64, %[[BASE_1:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm<"<17 x float>"> +// +// Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. +// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm<"<17 x i64>"> +// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 +// Here we check we properly use %BASE_1 +// CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[BASE_1]], %[[offsetVec]][%[[c0]] : +// CHECK-SAME: !llvm.i32] : !llvm<"<17 x i64>"> +// CHECK: %[[offsetVec3:.*]] = llvm.shufflevector %[[offsetVec2]], %{{.*}} [ +// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, +// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, +// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : +// +// Let dim the memref dimension, compute the vector comparison mask: +// [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] +// Here we check we properly use %DIM[1] +// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 1] : +// CHECK-SAME: !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm<"<17 x i64>"> +// CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 +// CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[DIM]], %[[dimVec]][%[[c01]] : +// CHECK-SAME: !llvm.i32] : !llvm<"<17 x i64>"> +// CHECK: %[[dimVec3:.*]] = llvm.shufflevector %[[dimVec2]], %{{.*}} [ +// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, +// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, +// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : +// CHECK-SAME: !llvm<"<17 x i64>">, !llvm<"<17 x i64>"> + +func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: index) -> vector<17xf32> { + %f7 = constant 7.0: f32 + %f = vector.transfer_read %A[%base], %f7 + {permutation_map = affine_map<(d0) -> (d0)>} : + memref, vector<17xf32> + vector.transfer_write %f, %A[%base] + {permutation_map = affine_map<(d0) -> (d0)>} : + vector<17xf32>, memref + return %f: vector<17xf32> +} +// CHECK-LABEL: func @transfer_read_1d_non_zero_addrspace +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm<"<17 x float>"> +// +// 1. Check address space for GEP is correct. +// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : +// CHECK-SAME: (!llvm<"float addrspace(3)*">, !llvm.i64) -> !llvm<"float addrspace(3)*"> +// CHECK: %[[vecPtr:.*]] = llvm.addrspacecast %[[gep]] : +// CHECK-SAME: !llvm<"float addrspace(3)*"> to !llvm<"<17 x float>*"> +// +// 2. Check address space of the memref is correct. +// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] : +// CHECK-SAME: !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [1 x i64], [1 x i64] }"> +// +// 3. Check address apce for GEP is correct. +// CHECK: %[[gep_b:.*]] = llvm.getelementptr {{.*}} : +// CHECK-SAME: (!llvm<"float addrspace(3)*">, !llvm.i64) -> !llvm<"float addrspace(3)*"> +// CHECK: %[[vecPtr_b:.*]] = llvm.addrspacecast %[[gep_b]] : +// CHECK-SAME: !llvm<"float addrspace(3)*"> to !llvm<"<17 x float>*"> diff --git a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir index c601cf98dcd6d..7b12254b6d35d 100644 --- a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir @@ -1,7 +1,4 @@ -// RUN: mlir-opt %s -test-convert-vector-to-loops | FileCheck %s - -// CHECK: #[[ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK: #[[SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> +// RUN: mlir-opt %s -test-convert-vector-to-loops -split-input-file | FileCheck %s // CHECK-LABEL: func @materialize_read_1d() { func @materialize_read_1d() { @@ -27,6 +24,8 @@ func @materialize_read_1d() { return } +// ----- + // CHECK-LABEL: func @materialize_read_1d_partially_specialized func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) { %f0 = constant 0.0: f32 @@ -50,6 +49,11 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d return } +// ----- + +// CHECK: #[[ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECK: #[[SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> + // CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_read(%M: index, %N: index, %O: index, %P: index) { %f0 = constant 0.0: f32 @@ -122,6 +126,11 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { return } +// ----- + +// CHECK: #[[ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECK: #[[SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> + // CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // CHECK-DAG: %{{.*}} = constant dense<1.000000e+00> : vector<5x4x3xf32> @@ -198,3 +207,67 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) { } return } + +// ----- + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d1)> + +// CHECK-LABEL: transfer_read_progressive( +// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref, +// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index +func @transfer_read_progressive(%A : memref, %base: index) -> vector<17x15xf32> { + // CHECK: %[[cst:.*]] = constant 7.000000e+00 : f32 + %f7 = constant 7.0: f32 + + // CHECK-DAG: %[[cond0:.*]] = constant 1 : i1 + // CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32> + // CHECK-DAG: %[[alloc:.*]] = alloc() : memref<17xvector<15xf32>> + // CHECK-DAG: %[[dim:.*]] = dim %[[A]], 0 : memref + // CHECK: affine.for %[[I:.*]] = 0 to 17 { + // CHECK: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]] + // CHECK: %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index + // CHECK: %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1 + // CHECK: loop.if %[[cond1]] { + // CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %[[cst]] {permutation_map = #[[MAP1]]} : memref, vector<15xf32> + // CHECK: store %[[vec_1d]], %[[alloc]][%[[I]]] : memref<17xvector<15xf32>> + // CHECK: } else { + // CHECK: store %[[splat]], %[[alloc]][%[[I]]] : memref<17xvector<15xf32>> + // CHECK: } + // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<17xvector<15xf32>> to memref> + // CHECK: %[[cst:.*]] = load %[[vmemref]][] : memref> + %f = vector.transfer_read %A[%base, %base], %f7 + {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : + memref, vector<17x15xf32> + + return %f: vector<17x15xf32> +} + +// ----- + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d1)> + +// CHECK-LABEL: transfer_write_progressive( +// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref, +// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<17x15xf32> +func @transfer_write_progressive(%A : memref, %base: index, %vec: vector<17x15xf32>) { + // CHECK: %[[cond0:.*]] = constant 1 : i1 + // CHECK: %[[alloc:.*]] = alloc() : memref<17xvector<15xf32>> + // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<17xvector<15xf32>> to memref> + // CHECK: store %[[vec]], %[[vmemref]][] : memref> + // CHECK: %[[dim:.*]] = dim %[[A]], 0 : memref + // CHECK: affine.for %[[I:.*]] = 0 to 17 { + // CHECK: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]] + // CHECK: %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index + // CHECK: %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1 + // CHECK: loop.if %[[cond1]] { + // CHECK: %[[vec_1d:.*]] = load %0[%[[I]]] : memref<17xvector<15xf32>> + // CHECK: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {permutation_map = #[[MAP1]]} : vector<15xf32>, memref + // CHECK: } + vector.transfer_write %vec, %A[%base, %base] + {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : + vector<17x15xf32>, memref + return +} diff --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir index fc4fcd45231ef..5c7fba52976a6 100644 --- a/mlir/test/Dialect/Affine/canonicalize.mlir +++ b/mlir/test/Dialect/Affine/canonicalize.mlir @@ -481,19 +481,19 @@ func @canonicalize_bounds(%M : index, %N : index) { // CHECK-LABEL: @compose_into_affine_load_store func @compose_into_affine_load_store(%A : memref<1024xf32>, %u : index) { - %cf1 = constant 1.0 : f32 // CHECK: affine.for %[[IV:.*]] = 0 to 1024 affine.for %i = 0 to 1024 { // Make sure the unused operand (%u below) gets dropped as well. %idx = affine.apply affine_map<(d0, d1) -> (d0 + 1)> (%i, %u) - affine.load %A[%idx] : memref<1024xf32> - affine.store %cf1, %A[%idx] : memref<1024xf32> + %0 = affine.load %A[%idx] : memref<1024xf32> + affine.store %0, %A[%idx] : memref<1024xf32> // CHECK-NEXT: affine.load %{{.*}}[%[[IV]] + 1] - // CHECK-NEXT: affine.store %cst, %{{.*}}[%[[IV]] + 1] + // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%[[IV]] + 1] // Map remains the same, but operand changes on composition. %copy = affine.apply affine_map<(d0) -> (d0)> (%i) - affine.load %A[%copy] : memref<1024xf32> + %1 = affine.load %A[%copy] : memref<1024xf32> + "prevent.dce"(%1) : (f32) -> () // CHECK-NEXT: affine.load %{{.*}}[%[[IV]]] } return diff --git a/mlir/test/Dialect/Affine/invalid.mlir b/mlir/test/Dialect/Affine/invalid.mlir index 4ad8b7ed82470..c0855987ac323 100644 --- a/mlir/test/Dialect/Affine/invalid.mlir +++ b/mlir/test/Dialect/Affine/invalid.mlir @@ -124,7 +124,7 @@ func @affine_if_invalid_dimop_dim(%arg0: index, %arg1: index, %arg2: index, %arg %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref %dim = dim %0, 0 : memref - // expected-error@+1 {{operand cannot be used as a dimension id}} + // expected-error@+1 {{operand cannot be used as a symbol}} affine.if #set0(%dim)[%n0] {} } return diff --git a/mlir/test/Dialect/Affine/loop-unswitch.mlir b/mlir/test/Dialect/Affine/loop-unswitch.mlir index 801eb059511cd..59eb5ff70ff06 100644 --- a/mlir/test/Dialect/Affine/loop-unswitch.mlir +++ b/mlir/test/Dialect/Affine/loop-unswitch.mlir @@ -6,35 +6,35 @@ func @if_else_imperfect(%A : memref<100xi32>, %B : memref<100xi32>, %v : i32) { // CHECK: %[[A:.*]]: memref<100xi32>, %[[B:.*]]: memref affine.for %i = 0 to 100 { - affine.load %A[%i] : memref<100xi32> + affine.store %v, %A[%i] : memref<100xi32> affine.for %j = 0 to 100 { - affine.load %A[%j] : memref<100xi32> + affine.store %v, %A[%j] : memref<100xi32> affine.if affine_set<(d0) : (d0 - 2 >= 0)>(%i) { - affine.load %B[%j] : memref<100xi32> + affine.store %v, %B[%j] : memref<100xi32> } call @external() : () -> () } - affine.load %A[%i] : memref<100xi32> + affine.store %v, %A[%i] : memref<100xi32> } return } func @external() // CHECK: affine.for %[[I:.*]] = 0 to 100 { -// CHECK-NEXT: affine.load %[[A]][%[[I]]] +// CHECK-NEXT: affine.store %{{.*}}, %[[A]][%[[I]]] // CHECK-NEXT: affine.if #[[SET]](%[[I]]) { // CHECK-NEXT: affine.for %[[J:.*]] = 0 to 100 { -// CHECK-NEXT: affine.load %[[A]][%[[J]]] -// CHECK-NEXT: affine.load %[[B]][%[[J]]] +// CHECK-NEXT: affine.store %{{.*}}, %[[A]][%[[J]]] +// CHECK-NEXT: affine.store %{{.*}}, %[[B]][%[[J]]] // CHECK-NEXT: call // CHECK-NEXT: } // CHECK-NEXT: } else { // CHECK-NEXT: affine.for %[[JJ:.*]] = 0 to 100 { -// CHECK-NEXT: affine.load %[[A]][%[[JJ]]] +// CHECK-NEXT: affine.store %{{.*}}, %[[A]][%[[J]]] // CHECK-NEXT: call // CHECK-NEXT: } // CHECK-NEXT: } -// CHECK-NEXT: affine.load %[[A]][%[[I]]] +// CHECK-NEXT: affine.store %{{.*}}, %[[A]][%[[I]]] // CHECK-NEXT: } // CHECK-NEXT: return @@ -51,7 +51,7 @@ func @if_then_perfect(%A : memref<100xi32>, %v : i32) { affine.for %j = 0 to 100 { affine.for %k = 0 to 100 { affine.if affine_set<(d0) : (d0 - 2 >= 0)>(%i) { - affine.load %A[%i] : memref<100xi32> + affine.store %v, %A[%i] : memref<100xi32> } } } @@ -72,10 +72,10 @@ func @if_else_perfect(%A : memref<100xi32>, %v : i32) { affine.for %k = 0 to 100 { call @foo() : () -> () affine.if affine_set<(d0, d1) : (d0 - 2 >= 0, -d1 + 80 >= 0)>(%i, %j) { - affine.load %A[%i] : memref<100xi32> + affine.store %v, %A[%i] : memref<100xi32> call @abc() : () -> () } else { - affine.load %A[%i + 1] : memref<100xi32> + affine.store %v, %A[%i + 1] : memref<100xi32> call @xyz() : () -> () } call @bar() : () -> () @@ -89,14 +89,14 @@ func @if_else_perfect(%A : memref<100xi32>, %v : i32) { // CHECK-NEXT: affine.if // CHECK-NEXT: affine.for // CHECK-NEXT: call @foo -// CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] +// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] // CHECK-NEXT: call @abc // CHECK-NEXT: call @bar // CHECK-NEXT: } // CHECK-NEXT: else // CHECK-NEXT: affine.for // CHECK-NEXT: call @foo -// CHECK-NEXT: affine.load %{{.*}}[%{{.*}} + 1] +// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}} + 1] // CHECK-NEXT: call @xyz // CHECK-NEXT: call @bar // CHECK-NEXT: } @@ -105,23 +105,23 @@ func @if_else_perfect(%A : memref<100xi32>, %v : i32) { // CHECK-NEXT: } // CHECK-LABEL: func @if_then_imperfect -func @if_then_imperfect(%A : memref<100xi32>, %N : index) { +func @if_then_imperfect(%A : memref<100xi32>, %N : index, %v: i32) { affine.for %i = 0 to 100 { - affine.load %A[0] : memref<100xi32> + affine.store %v, %A[0] : memref<100xi32> affine.if affine_set<(d0) : (d0 - 2 >= 0)>(%N) { - affine.load %A[%i] : memref<100xi32> + affine.store %v, %A[%i] : memref<100xi32> } } return } // CHECK: affine.if // CHECK-NEXT: affine.for -// CHECK-NEXT: affine.load -// CHECK-NEXT: affine.load +// CHECK-NEXT: affine.store +// CHECK-NEXT: affine.store // CHECK-NEXT: } // CHECK-NEXT: } else { // CHECK-NEXT: affine.for -// CHECK-NEXT: affine.load +// CHECK-NEXT: affine.store // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: return @@ -182,21 +182,21 @@ func @handle_dead_if(%N : index) { #set0 = affine_set<(d0, d1)[s0, s1] : (d0 * -16 + s0 - 16 >= 0, d1 * -3 + s1 - 3 >= 0)> // CHECK-LABEL: func @perfect_if_else -func @perfect_if_else(%arg0 : memref, %arg1 : memref, %arg4 : index, - %arg5 : index, %arg6 : index, %sym : index) { +func @perfect_if_else(%arg0 : memref, %arg1 : memref, %v : f64, + %arg4 : index, %arg5 : index, %arg6 : index, %sym : index) { affine.for %arg7 = #lb0(%arg5) to min #ub0(%arg5)[%sym] { affine.parallel (%i0, %j0) = (0, 0) to (symbol(%sym), 100) step (10, 10) { affine.for %arg8 = #lb1(%arg4) to min #ub1(%arg4)[%sym] { affine.if #set0(%arg6, %arg7)[%sym, %sym] { affine.for %arg9 = #flb0(%arg6) to #fub0(%arg6) { affine.for %arg10 = #flb1(%arg7) to #fub1(%arg7) { - affine.load %arg0[0, 0] : memref + affine.store %v, %arg0[0, 0] : memref } } } else { affine.for %arg9 = #lb0(%arg6) to min #pub0(%arg6)[%sym] { affine.for %arg10 = #lb1(%arg7) to min #pub1(%arg7)[%sym] { - affine.load %arg0[0, 0] : memref + affine.store %v, %arg0[0, 0] : memref } } } @@ -212,7 +212,7 @@ func @perfect_if_else(%arg0 : memref, %arg1 : memref, %arg4 : // CHECK-NEXT: affine.for // CHECK-NEXT: affine.for // CHECK-NEXT: affine.for -// CHECK-NEXT: affine.load +// CHECK-NEXT: affine.store // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } @@ -222,7 +222,7 @@ func @perfect_if_else(%arg0 : memref, %arg1 : memref, %arg4 : // CHECK-NEXT: affine.for // CHECK-NEXT: affine.for // CHECK-NEXT: affine.for -// CHECK-NEXT: affine.load +// CHECK-NEXT: affine.store // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } diff --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir index cd42980f87c97..d4b3a112dfc6a 100644 --- a/mlir/test/Dialect/Affine/ops.mlir +++ b/mlir/test/Dialect/Affine/ops.mlir @@ -115,6 +115,44 @@ func @valid_symbols(%arg0: index, %arg1: index, %arg2: index) { // ----- +// Test symbol constraints for ops with PolyhedralScope trait. + +// CHECK-LABEL: func @valid_symbol_polyhedral_scope +func @valid_symbol_polyhedral_scope(%n : index, %A : memref) { + test.polyhedral_scope { + %c1 = constant 1 : index + %l = subi %n, %c1 : index + // %l, %n are valid symbols since test.polyhedral_scope defines a new + // polyhedral scope. + affine.for %i = %l to %n { + %m = subi %l, %i : index + test.polyhedral_scope { + // %m and %n are valid symbols. + affine.for %j = %m to %n { + %v = affine.load %A[%n - 1] : memref + affine.store %v, %A[%n - 1] : memref + } + "terminate"() : () -> () + } + } + "terminate"() : () -> () + } + return +} + +// ----- + +// Test the fact that module op always provides a polyhedral scope. + +%idx = "test.foo"() : () -> (index) +"test.func"() ({ +^bb0(%A : memref): + affine.load %A[%idx] : memref + "terminate"() : () -> () +}) : () -> () + +// ----- + // CHECK-LABEL: @parallel // CHECK-SAME: (%[[N:.*]]: index) func @parallel(%N : index) { diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir index 5a7178030843c..2a6f10008f9e4 100644 --- a/mlir/test/Dialect/LLVMIR/rocdl.mlir +++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir @@ -28,3 +28,9 @@ func @rocdl_special_regs() -> !llvm.i32 { %11 = rocdl.grid.dim.z : !llvm.i32 llvm.return %0 : !llvm.i32 } + +func @rocdl.barrier() { + // CHECK: rocdl.barrier + rocdl.barrier + llvm.return +} diff --git a/mlir/test/Dialect/Linalg/affine.mlir b/mlir/test/Dialect/Linalg/affine.mlir index 70457825ce4fc..dfe130a44efb9 100644 --- a/mlir/test/Dialect/Linalg/affine.mlir +++ b/mlir/test/Dialect/Linalg/affine.mlir @@ -1,13 +1,15 @@ // RUN: mlir-opt %s -convert-linalg-to-affine-loops | FileCheck %s // Test that we can lower all the way to LLVM without crashing, don't check results here. -// RUN: mlir-opt %s --convert-linalg-to-llvm -o=/dev/null 2>&1 +// RUN: mlir-opt %s -convert-linalg-to-affine-loops -convert-linalg-to-llvm -o=/dev/null 2>&1 // CHECK-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> // CHECK-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> // CHECK-DAG: #[[stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> +// CHECK-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> + func @matmul(%arg0: memref, %M: index, %N: index, %K: index) { %c0 = constant 0 : index %c1 = constant 1 : index @@ -53,3 +55,69 @@ func @conv_view3(%arg0: memref, %arg1: // CHECK: affine.for %{{.*}} = 0 to %[[Q]] { // CHECK: affine.for %{{.*}} = 0 to %[[Z0]] { // CHECK: %[[SUM:.*]] = affine.apply #[[stride2Dilation1]](%{{.*}}, %{{.*}}) + +func @conv_padding(%arg0: memref, + %arg1: memref, + %arg2: memref) { + linalg.conv(%arg0, %arg1, %arg2) {dilations = [1, 1], + padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>, + strides = [1, 1]} : + memref, memref, memref + return +} +// CHECK-LABEL: func @conv_padding +// CHECK: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { +// CHECK: %[[ZERO:.*]] = constant 0.000000e+00 : f32 +// CHECK: %[[Z0:.*]] = dim %arg0, 0 : memref +// CHECK: %[[Z1:.*]] = dim %arg0, 1 : memref +// CHECK: %[[Q:.*]] = dim %arg0, 2 : memref +// CHECK: %[[K:.*]] = dim %arg0, 3 : memref +// CHECK: %[[B:.*]] = dim %arg1, 0 : memref +// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref +// CHECK: %[[X1:.*]] = dim %arg2, 2 : memref +// CHECK: affine.for %{{.*}} = 0 to %[[B]] { +// CHECK: affine.for %{{.*}} = 0 to %[[X0]] { +// CHECK: affine.for %{{.*}} = 0 to %[[X1]] { +// CHECK: affine.for %{{.*}} = 0 to %[[K]] { +// CHECK: affine.for %{{.*}} = 0 to %[[Q]] { +// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] { +// CHECK: affine.for %{{.*}} = 0 to %[[Z1]] { +// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) +// CHECK: %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]]) +// CHECK: %[[IDY:.*]] = affine.max #[[clampMinMap]](%[[SUM1]]) +// Padded conv involves an affine.max in the memory access which is not +// allowed by affine.load. Override to always use an std.load. +// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref +// CHECK: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 +// CHECK: %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 +// CHECK: %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref +// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECK: affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref + +//----------------------------------------------------------------------------// +// Named ops to loops. +//----------------------------------------------------------------------------// +func @named_batch_matmul(%A: memref, %B: memref, %C: memref) { + linalg.batch_matmul %A, %B, %C : (memref, memref, memref) -> () + return +} +// CHECK-LABEL: @named_batch_matmul +// CHECK-SAME: %[[mA:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[mB:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[mC:[a-zA-Z0-9]+]]: memref +// CHECK: %[[B:.*]] = dim %[[mA]], 0 : memref +// CHECK: %[[M:.*]] = dim %[[mA]], 1 : memref +// CHECK: %[[K:.*]] = dim %[[mA]], 2 : memref +// CHECK: %[[N:.*]] = dim %[[mB]], 2 : memref +// CHECK: affine.for %[[b:.*]] = 0 to %[[B]] { +// CHECK: affine.for %[[m:.*]] = 0 to %[[M]] { +// CHECK: affine.for %[[n:.*]] = 0 to %[[N]] { +// CHECK: affine.for %[[k:.*]] = 0 to %[[K]] { +// CHECK: %[[va:.*]] = affine.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref +// CHECK: %[[vb:.*]] = affine.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref +// CHECK: %[[vc:.*]] = affine.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref +// CHECK: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32 +// CHECK: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECK: affine.store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref diff --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir index 14a12840d1d0b..3f20fd8426759 100644 --- a/mlir/test/Dialect/Linalg/fusion.mlir +++ b/mlir/test/Dialect/Linalg/fusion.mlir @@ -727,3 +727,48 @@ func @fill_and_conv(%arg0: memref<1x4x5x1xf32>, %arg1: memref<2x3x1x1xf32>, %arg // CHECK: loop.for // CHECK: linalg.fill // CHECK: linalg.conv + +// ----- + +// Test that different allocation-like ops are recognized and properly handled. +func @accept_different_alloc_ops(%dim: index, %s0 : index, %s1: index) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %c3 = constant 3 : index + %c4 = constant 4 : index + + %A = alloca(%dim, %dim)[%s0, %s1] : memref + %B = alloca(%dim, %dim)[%s0, %s1] : memref + %C = alloc(%dim, %dim)[%s0, %s1] : memref + + linalg.matmul(%A, %B, %C) : + memref, + memref, + memref + + loop.for %i = %c0 to %dim step %c2 { + loop.for %j = %c0 to %dim step %c3 { + loop.for %k = %c0 to %dim step %c4 { + %0 = std.subview %A[%i, %k][%c2, %c4][%c1, %c1] : + memref to + memref + %1 = std.subview %B[%k, %j][%c4, %c3][%c1, %c1] : + memref to + memref + %2 = std.subview %C[%i, %j][%c2, %c3][%c1, %c1] : + memref to + memref + linalg.matmul(%0, %1, %2) : + memref, + memref, + memref + } + } + } + return +} + +// CHECK-LABEL: func @accept_different_alloc_ops +// CHECK-COUNT-3: loop.for +// CHECK-COUNT-2: linalg.matmul diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir index 7c71dbf893c91..6075b98247319 100644 --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -2,7 +2,7 @@ // RUN: mlir-opt %s -convert-linalg-to-parallel-loops | FileCheck --check-prefix=CHECKPARALLEL %s // Test that we can lower all the way to LLVM without crashing, don't check results here. -// RUN: mlir-opt %s --convert-linalg-to-llvm -o=/dev/null 2>&1 +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -o=/dev/null 2>&1 // CHECKLOOP-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> // CHECKLOOP-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> @@ -354,7 +354,6 @@ func @conv_view4(%arg0: memref, % // CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 // CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref - func @conv_padding(%arg0: memref, %arg1: memref, %arg2: memref) { @@ -854,8 +853,8 @@ func @scalar_code(%arg0: memref, %arg1 : memref, %arg2 : memref) // CHECKLOOP-SAME: %[[ARG1]]: memref // CHECKLOOP-SAME: %[[ARG2]]: memref // CHECKLOOP-NOT: loop.for -// CHECKLOOP-DAG: load %[[ARG0]][] -// CHECKLOOP-DAG: load %[[ARG1]][] +// CHECKLOOP: load %[[ARG0]][] +// CHECKLOOP: load %[[ARG1]][] // CHECKLOOP: addf // CHECKLOOP: store %{{.*}}, %[[ARG2]][] @@ -864,7 +863,50 @@ func @scalar_code(%arg0: memref, %arg1 : memref, %arg2 : memref) // CHECKPARALLEL-SAME: %[[ARG1]]: memref // CHECKPARALLEL-SAME: %[[ARG2]]: memref // CHECKPARALLEL-NOT: loop.for -// CHECKPARALLEL-DAG: load %[[ARG0]][] -// CHECKPARALLEL-DAG: load %[[ARG1]][] +// CHECKPARALLEL: load %[[ARG0]][] +// CHECKPARALLEL: load %[[ARG1]][] // CHECKPARALLEL: addf // CHECKPARALLEL: store %{{.*}}, %[[ARG2]][] + +//----------------------------------------------------------------------------// +// Named ops to loops. +//----------------------------------------------------------------------------// +func @named_batch_matmul(%A: memref, %B: memref, %C: memref) { + linalg.batch_matmul %A, %B, %C : (memref, memref, memref) -> () + return +} +// CHECKLOOP-LABEL: @named_batch_matmul +// CHECKLOOP-SAME: %[[mA:[a-zA-Z0-9]+]]: memref +// CHECKLOOP-SAME: %[[mB:[a-zA-Z0-9]+]]: memref +// CHECKLOOP-SAME: %[[mC:[a-zA-Z0-9]+]]: memref +// CHECKLOOP: %[[B:.*]] = dim %[[mA]], 0 : memref +// CHECKLOOP: %[[M:.*]] = dim %[[mA]], 1 : memref +// CHECKLOOP: %[[K:.*]] = dim %[[mA]], 2 : memref +// CHECKLOOP: %[[N:.*]] = dim %[[mB]], 2 : memref +// CHECKLOOP: loop.for %[[b:.*]] = %{{.*}} to %[[B]] step %{{.*}} { +// CHECKLOOP: loop.for %[[m:.*]] = %{{.*}} to %[[M]] step %{{.*}} { +// CHECKLOOP: loop.for %[[n:.*]] = %{{.*}} to %[[N]] step %{{.*}} { +// CHECKLOOP: loop.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP: %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref +// CHECKLOOP: %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref +// CHECKLOOP: %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref +// CHECKLOOP: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32 +// CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECKLOOP: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref + +// CHECKPARALLEL-LABEL: @named_batch_matmul +// CHECKPARALLEL-SAME: %[[mA:[a-zA-Z0-9]+]]: memref +// CHECKPARALLEL-SAME: %[[mB:[a-zA-Z0-9]+]]: memref +// CHECKPARALLEL-SAME: %[[mC:[a-zA-Z0-9]+]]: memref +// CHECKPARALLEL: %[[B:.*]] = dim %[[mA]], 0 : memref +// CHECKPARALLEL: %[[M:.*]] = dim %[[mA]], 1 : memref +// CHECKPARALLEL: %[[K:.*]] = dim %[[mA]], 2 : memref +// CHECKPARALLEL: %[[N:.*]] = dim %[[mB]], 2 : memref +// CHECKPARALLEL: loop.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) { +// CHECKPARALLEL: loop.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL: %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref +// CHECKPARALLEL: %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref +// CHECKPARALLEL: %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref +// CHECKPARALLEL: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32 +// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref diff --git a/mlir/test/Dialect/Linalg/matmul-to-vector.mlir b/mlir/test/Dialect/Linalg/matmul-to-vector.mlir deleted file mode 100644 index 351b2041d8c0a..0000000000000 --- a/mlir/test/Dialect/Linalg/matmul-to-vector.mlir +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: mlir-opt %s -linalg-matmul-to-vector | FileCheck %s - -func @matmul_perm(%A: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>, - %B: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>, - %C: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>) { - linalg.matmul(%A, %B, %C) {__internal_linalg_transform__ = "__with_perm__"} : - memref<1584x1584xf32, offset: 0, strides: [1584, 1]>, - memref<1584x1584xf32, offset: 0, strides: [1584, 1]>, - memref<1584x1584xf32, offset: 0, strides: [1584, 1]> - return -} - -// CHECK-LABEL:func @matmul_perm -// CHECK: vector.contract -// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] -// CHECK-SAME: : vector<8x16xf32>, vector<16x12xf32> into vector<8x12xf32> diff --git a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir new file mode 100644 index 0000000000000..ed82c93622dff --- /dev/null +++ b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir @@ -0,0 +1,75 @@ +// RUN: mlir-opt -convert-linalg-on-tensors-to-buffers -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @muliple_results_generic_op +func @muliple_results_generic_op(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { + %0, %1 = linalg.generic {args_in = 1 : i64, args_out = 2 : i64, indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} %arg0 { + ^bb0(%gen_arg1: f32): + %tmp1 = exp %gen_arg1 : f32 + linalg.yield %tmp1, %tmp1 : f32, f32 + }: tensor<4xf32> -> (tensor<4xf32>, tensor<4xf32>) + return %0, %1 : tensor<4xf32>, tensor<4xf32> +} +// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]], %[[ARG2_RESULT:.*]]: [[TYPE]]) +// CHECK: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK: linalg.generic +// CHECK-SAME: %[[NEW_ARG0]], %[[FIRST_ALLOC]], %[[SECOND_ALLOC]] +// CHECK-NEXT: ^{{[a-z0-9_]*}} +// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32 +// CHECK-NEXT: %{{.*}} = exp +// CHECK-NEXT: linalg.yield +// CHECK-NEXT: [[TYPE]], [[TYPE]], [[TYPE]] +// CHECK: linalg.copy(%[[FIRST_ALLOC]], %[[ARG1_RESULT]]) +// CHECK: dealloc %[[FIRST_ALLOC]] +// CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[ARG2_RESULT]]) +// CHECK: dealloc %[[SECOND_ALLOC]] +// CHECK: return + +// ----- + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @chained_operations +func @chained_operations(%arg0: tensor<4xf32>) -> tensor<4xf32> { + %0 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0 { + ^bb0(%gen_arg1: f32): + %tmp1 = exp %gen_arg1 : f32 + linalg.yield %tmp1 : f32 + }: tensor<4xf32> -> tensor<4xf32> + %1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %0 { + ^bb0(%gen_arg2: f32): + %tmp2 = exp %gen_arg2 : f32 + linalg.yield %tmp2 : f32 + }: tensor<4xf32> -> tensor<4xf32> + return %1 : tensor<4xf32> +} +// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]], %[[ARG1_RESULT:.*]]: [[TYPE]]) +// CHECK: %[[FIRST_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK: linalg.generic +// CHECK-SAME: %[[NEW_ARG0]], %[[FIRST_ALLOC]] +// CHECK: ^{{[a-z0-9_]*}} +// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32 +// CHECK: [[TYPE]], [[TYPE]] +// CHECK: %[[SECOND_ALLOC:.*]] = alloc() : [[TYPE]] +// CHECK: linalg.generic +// CHECK-SAME: %[[FIRST_ALLOC]], %[[SECOND_ALLOC]] +// CHECK: ^{{[a-z0-9_]*}} +// CHECK-SAME: %{{.*}}: f32, %{{.*}}: f32 +// CHECK: [[TYPE]], [[TYPE]] +// CHECK: dealloc %[[FIRST_ALLOC]] +// CHECK: linalg.copy(%[[SECOND_ALLOC]], %[[ARG1_RESULT]]) +// CHECK: dealloc %[[SECOND_ALLOC]] +// CHECK: return + +// ----- + +// CHECK-LABEL: func @no_linalg_op +func @no_linalg_op(%arg0: f32) -> (f32, f32) { + %0 = mulf %arg0, %arg0 : f32 + return %0, %0 : f32, f32 +} +// CHECK: (%[[NEW_ARG0:.*]]: [[TYPE:.*]]) -> ([[TYPE]], [[TYPE]]) +// CHECK: %[[RESULT:.*]] = mulf %[[NEW_ARG0]], %[[NEW_ARG0]] : [[TYPE]] +// CHECK: return %[[RESULT]], %[[RESULT]] : [[TYPE]], [[TYPE]] diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir index d230aa9936116..f5ef4fff81654 100644 --- a/mlir/test/Dialect/Linalg/transform-patterns.mlir +++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-linalg-transform-patterns | FileCheck %s +// RUN: mlir-opt %s -test-linalg-transform-patterns=test-patterns | FileCheck %s // CHECK-DAG: #[[STRIDED_1D:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> // Map corresponding to a 2D memory access where the stride along the last dim is known to be 1. @@ -22,10 +22,8 @@ func @dot(%x: memref, // CHECK-LABEL: func @dot // CHECK-DAG: %[[c0:.*]] = constant 0 : index // CHECK-DAG: %[[c1:.*]] = constant 1 : index -// CHECK-DAG: %[[c8:.*]] = constant 8 : index // CHECK-DAG: %[[c8000:.*]] = constant 8000 : index // CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c8000]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c8]] { // CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c1]] { // CHECK: load // CHECK: load @@ -46,8 +44,7 @@ func @matvec(%A: memref, // CHECK-DAG: %[[c0:.*]] = constant 0 : index // CHECK-DAG: %[[c5:.*]] = constant 5 : index // CHECK-DAG: %[[c6:.*]] = constant 6 : index -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c5]] -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c6]] +// CHECK: loop.parallel {{.*}} step (%[[c5]], %[[c6]]) // CHECK: linalg.matvec({{.*}}, {{.*}}, {{.*}}) : memref, memref, memref func @matmul(%A: memref, @@ -86,88 +83,6 @@ func @matmul(%A: memref, // CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4]] { // CHECK: linalg.matmul({{.*}}, {{.*}}, {{.*}}) : memref, memref, memref -#some_generic_trait = { - args_in = 1, - args_out = 1, - indexing_maps = [ - affine_map<(i, j) -> (i, j)>, - affine_map<(i, j) -> (i, j)> - ], - iterator_types = ["parallel", "parallel"] -} -func @fusion_test(%A: memref, - %B: memref, - %C: memref, - %D: memref, - %E: memref) { - // This should not be fused as it would violate dependencies. It will get - // tiled for all levels of the memory hierarchy. - linalg.matmul(%A, %A, %C) : memref, - memref, - memref - - // This should be fused. - linalg.matmul(%A, %B, %C) : memref, - memref, - memref - - // This should not be fused or transformed at all since there are no patterns - // on it. However it will be reordered because there are no dependencies. - linalg.generic #some_generic_trait %A, %D { - ^bb(%a: f32, %b: f32) : - linalg.yield %a : f32 - } : memref, - memref - - linalg.matmul(%C, %D, %E) : memref, - memref, - memref - - return -} -// CHECK-LABEL: func @fusion_test -// CHECK-DAG: %[[c0:.*]] = constant 0 : index -// CHECK-DAG: %[[c2:.*]] = constant 2 : index -// CHECK-DAG: %[[c3:.*]] = constant 3 : index -// CHECK-DAG: %[[c4:.*]] = constant 4 : index -// CHECK-DAG: %[[c20:.*]] = constant 20 : index -// CHECK-DAG: %[[c30:.*]] = constant 30 : index -// CHECK-DAG: %[[c40:.*]] = constant 40 : index -// CHECK-DAG: %[[c100:.*]] = constant 100 : index -// CHECK-DAG: %[[c150:.*]] = constant 150 : index -// CHECK-DAG: %[[c200:.*]] = constant 200 : index -// CHECK-DAG: %[[c300:.*]] = constant 300 : index -// CHECK-DAG: %[[c400:.*]] = constant 400 : index -// CHECK-DAG: %[[c2000:.*]] = constant 2000 : index -// CHECK-DAG: %[[c3000:.*]] = constant 3000 : index -// CHECK-DAG: %[[c4000:.*]] = constant 4000 : index -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c200]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c300]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c400]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c20]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c30]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4]] { -// CHECK: linalg.matmul({{.*}}, {{.*}}, {{.*}}) : memref, memref, memref -// -// CHECK: linalg.generic -// -// CHECK: loop.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c100]] { -// CHECK: loop.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c150]] { -// CHECK: loop.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c2]] { -// CHECK: loop.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c3]] { -// CHECK: loop.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c4]] { -// CHECK: linalg.matmul(%{{.*}}, %{{.*}}, %{{.*}}) : memref, memref, memref -// CHECK: loop.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c2]] { -// CHECK: loop.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c3]] { -// CHECK: loop.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c4]] { -// CHECK: linalg.matmul(%{{.*}}, %{{.*}}, %{{.*}}) : memref, memref, memref - #matmul_trait = { args_in = 2, args_out = 1, @@ -280,23 +195,6 @@ func @permute_generic_indexed( // CHECK-SAME: memref, // CHECK-SAME: memref -func @dot_perm(%x: memref, - %y: memref, - %v: memref) { - linalg.dot(%x, %y, %v) {__internal_linalg_transform__ = "__with_perm__"} : - memref, - memref, - memref - return -} -// CHECK-LABEL: func @dot_perm -// CHECK-DAG: %[[c0:.*]] = constant 0 : index -// CHECK-DAG: %[[c8:.*]] = constant 8 : index -// CHECK-DAG: %[[c8000:.*]] = constant 8000 : index -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c8000]] { -// CHECK: loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c8]] { -// CHECK: linalg.dot({{.*}}, {{.*}}, {{.*}}) : memref, memref, memref - func @matvec_perm(%A: memref, %x: memref, %y: memref) { diff --git a/mlir/test/Dialect/Loops/loop-unroll.mlir b/mlir/test/Dialect/Loops/loop-unroll.mlir new file mode 100644 index 0000000000000..fa3ebc173e510 --- /dev/null +++ b/mlir/test/Dialect/Loops/loop-unroll.mlir @@ -0,0 +1,250 @@ +// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2' | FileCheck %s --check-prefix UNROLL-BY-2 +// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=3' | FileCheck %s --check-prefix UNROLL-BY-3 +// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=0' | FileCheck %s --check-prefix UNROLL-OUTER-BY-2 +// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2 + +func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index, + %arg3: memref) { + %0 = constant 7.0 : f32 + loop.for %i0 = %arg0 to %arg1 step %arg2 { + store %0, %arg3[%i0] : memref + } + return +} +// UNROLL-BY-2-LABEL: func @dynamic_loop_unroll +// UNROLL-BY-2-SAME: %[[LB:.*0]]: index, +// UNROLL-BY-2-SAME: %[[UB:.*1]]: index, +// UNROLL-BY-2-SAME: %[[STEP:.*2]]: index, +// UNROLL-BY-2-SAME: %[[MEM:.*3]]: memref +// +// UNROLL-BY-2-DAG: %[[V0:.*]] = subi %[[UB]], %[[LB]] : index +// UNROLL-BY-2-DAG: %[[C1:.*]] = constant 1 : index +// UNROLL-BY-2-DAG: %[[V1:.*]] = subi %[[STEP]], %[[C1]] : index +// UNROLL-BY-2-DAG: %[[V2:.*]] = addi %[[V0]], %[[V1]] : index +// Compute trip count in V3. +// UNROLL-BY-2-DAG: %[[V3:.*]] = divi_signed %[[V2]], %[[STEP]] : index +// Store unroll factor in C2. +// UNROLL-BY-2-DAG: %[[C2:.*]] = constant 2 : index +// UNROLL-BY-2-DAG: %[[V4:.*]] = remi_signed %[[V3]], %[[C2]] : index +// UNROLL-BY-2-DAG: %[[V5:.*]] = subi %[[V3]], %[[V4]] : index +// UNROLL-BY-2-DAG: %[[V6:.*]] = muli %[[V5]], %[[STEP]] : index +// Compute upper bound of unrolled loop in V7. +// UNROLL-BY-2-DAG: %[[V7:.*]] = addi %[[LB]], %[[V6]] : index +// Compute step of unrolled loop in V8. +// UNROLL-BY-2-DAG: %[[V8:.*]] = muli %[[STEP]], %[[C2]] : index +// UNROLL-BY-2: loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] { +// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-2-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-BY-2-NEXT: %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index +// UNROLL-BY-2-NEXT: %[[V10:.*]] = addi %[[IV]], %[[V9]] : index +// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[V10]]] : memref +// UNROLL-BY-2-NEXT: } +// UNROLL-BY-2-NEXT: loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] { +// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-2-NEXT: } +// UNROLL-BY-2-NEXT: return + +// UNROLL-BY-3-LABEL: func @dynamic_loop_unroll +// UNROLL-BY-3-SAME: %[[LB:.*0]]: index, +// UNROLL-BY-3-SAME: %[[UB:.*1]]: index, +// UNROLL-BY-3-SAME: %[[STEP:.*2]]: index, +// UNROLL-BY-3-SAME: %[[MEM:.*3]]: memref +// +// UNROLL-BY-3-DAG: %[[V0:.*]] = subi %[[UB]], %[[LB]] : index +// UNROLL-BY-3-DAG: %[[C1:.*]] = constant 1 : index +// UNROLL-BY-3-DAG: %[[V1:.*]] = subi %[[STEP]], %[[C1]] : index +// UNROLL-BY-3-DAG: %[[V2:.*]] = addi %[[V0]], %[[V1]] : index +// Compute trip count in V3. +// UNROLL-BY-3-DAG: %[[V3:.*]] = divi_signed %[[V2]], %[[STEP]] : index +// Store unroll factor in C3. +// UNROLL-BY-3-DAG: %[[C3:.*]] = constant 3 : index +// UNROLL-BY-3-DAG: %[[V4:.*]] = remi_signed %[[V3]], %[[C3]] : index +// UNROLL-BY-3-DAG: %[[V5:.*]] = subi %[[V3]], %[[V4]] : index +// UNROLL-BY-3-DAG: %[[V6:.*]] = muli %[[V5]], %[[STEP]] : index +// Compute upper bound of unrolled loop in V7. +// UNROLL-BY-3-DAG: %[[V7:.*]] = addi %[[LB]], %[[V6]] : index +// Compute step of unrolled loop in V8. +// UNROLL-BY-3-DAG: %[[V8:.*]] = muli %[[STEP]], %[[C3]] : index +// UNROLL-BY-3: loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] { +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-BY-3-NEXT: %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index +// UNROLL-BY-3-NEXT: %[[V10:.*]] = addi %[[IV]], %[[V9]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V10]]] : memref +// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = constant 2 : index +// UNROLL-BY-3-NEXT: %[[V11:.*]] = muli %[[STEP]], %[[C2_IV]] : index +// UNROLL-BY-3-NEXT: %[[V12:.*]] = addi %[[IV]], %[[V11]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V12]]] : memref +// UNROLL-BY-3-NEXT: } +// UNROLL-BY-3-NEXT: loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] { +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-3-NEXT: } +// UNROLL-BY-3-NEXT: return + +func @dynamic_loop_unroll_outer_by_2( + %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, + %arg5 : index, %arg6: memref) { + %0 = constant 7.0 : f32 + loop.for %i0 = %arg0 to %arg1 step %arg2 { + loop.for %i1 = %arg3 to %arg4 step %arg5 { + store %0, %arg6[%i1] : memref + } + } + return +} +// UNROLL-OUTER-BY-2-LABEL: func @dynamic_loop_unroll_outer_by_2 +// UNROLL-OUTER-BY-2-SAME: %[[LB0:.*0]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[UB0:.*1]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[STEP0:.*2]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[LB1:.*3]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[UB1:.*4]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[STEP1:.*5]]: index, +// UNROLL-OUTER-BY-2-SAME: %[[MEM:.*6]]: memref +// +// UNROLL-OUTER-BY-2: loop.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} { +// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] { +// UNROLL-OUTER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref +// UNROLL-OUTER-BY-2-NEXT: } +// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] { +// UNROLL-OUTER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref +// UNROLL-OUTER-BY-2-NEXT: } +// UNROLL-OUTER-BY-2-NEXT: } +// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] { +// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] { +// UNROLL-OUTER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref +// UNROLL-OUTER-BY-2-NEXT: } +// UNROLL-OUTER-BY-2-NEXT: } +// UNROLL-OUTER-BY-2-NEXT: return + +func @dynamic_loop_unroll_inner_by_2( + %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, + %arg5 : index, %arg6: memref) { + %0 = constant 7.0 : f32 + loop.for %i0 = %arg0 to %arg1 step %arg2 { + loop.for %i1 = %arg3 to %arg4 step %arg5 { + store %0, %arg6[%i1] : memref + } + } + return +} +// UNROLL-INNER-BY-2-LABEL: func @dynamic_loop_unroll_inner_by_2 +// UNROLL-INNER-BY-2-SAME: %[[LB0:.*0]]: index, +// UNROLL-INNER-BY-2-SAME: %[[UB0:.*1]]: index, +// UNROLL-INNER-BY-2-SAME: %[[STEP0:.*2]]: index, +// UNROLL-INNER-BY-2-SAME: %[[LB1:.*3]]: index, +// UNROLL-INNER-BY-2-SAME: %[[UB1:.*4]]: index, +// UNROLL-INNER-BY-2-SAME: %[[STEP1:.*5]]: index, +// UNROLL-INNER-BY-2-SAME: %[[MEM:.*6]]: memref +// +// UNROLL-INNER-BY-2: loop.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] { +// UNROLL-INNER-BY-2: loop.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} { +// UNROLL-INNER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref +// UNROLL-INNER-BY-2-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-INNER-BY-2-NEXT: %[[V0:.*]] = muli %[[STEP1]], %[[C1_IV]] : index +// UNROLL-INNER-BY-2-NEXT: %[[V1:.*]] = addi %[[IV1]], %[[V0]] : index +// UNROLL-INNER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-INNER-BY-2-NEXT: } +// UNROLL-INNER-BY-2-NEXT: loop.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] { +// UNROLL-INNER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref +// UNROLL-INNER-BY-2-NEXT: } +// UNROLL-INNER-BY-2-NEXT: } +// UNROLL-INNER-BY-2-NEXT: return + +// Test that no epilogue clean-up loop is generated because the trip count is +// a multiple of the unroll factor. +func @static_loop_unroll_by_2(%arg0 : memref) { + %0 = constant 7.0 : f32 + %lb = constant 0 : index + %ub = constant 20 : index + %step = constant 1 : index + loop.for %i0 = %lb to %ub step %step { + store %0, %arg0[%i0] : memref + } + return +} +// UNROLL-BY-2-LABEL: func @static_loop_unroll_by_2 +// UNROLL-BY-2-SAME: %[[MEM:.*0]]: memref +// +// UNROLL-BY-2-DAG: %[[C0:.*]] = constant 0 : index +// UNROLL-BY-2-DAG: %[[C1:.*]] = constant 1 : index +// UNROLL-BY-2-DAG: %[[C20:.*]] = constant 20 : index +// UNROLL-BY-2-DAG: %[[C2:.*]] = constant 2 : index +// UNROLL-BY-2: loop.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] { +// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-2-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-BY-2-NEXT: %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index +// UNROLL-BY-2-NEXT: %[[V1:.*]] = addi %[[IV]], %[[V0]] : index +// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-BY-2-NEXT: } +// UNROLL-BY-2-NEXT: return + +// Test that epilogue clean up loop is generated (trip count is not +// a multiple of unroll factor). +func @static_loop_unroll_by_3(%arg0 : memref) { + %0 = constant 7.0 : f32 + %lb = constant 0 : index + %ub = constant 20 : index + %step = constant 1 : index + loop.for %i0 = %lb to %ub step %step { + store %0, %arg0[%i0] : memref + } + return +} + +// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3 +// UNROLL-BY-3-SAME: %[[MEM:.*0]]: memref +// +// UNROLL-BY-3-DAG: %[[C0:.*]] = constant 0 : index +// UNROLL-BY-3-DAG: %[[C1:.*]] = constant 1 : index +// UNROLL-BY-3-DAG: %[[C20:.*]] = constant 20 : index +// UNROLL-BY-3-DAG: %[[C18:.*]] = constant 18 : index +// UNROLL-BY-3-DAG: %[[C3:.*]] = constant 3 : index +// UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] { +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-BY-3-NEXT: %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index +// UNROLL-BY-3-NEXT: %[[V1:.*]] = addi %[[IV]], %[[V0]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = constant 2 : index +// UNROLL-BY-3-NEXT: %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index +// UNROLL-BY-3-NEXT: %[[V3:.*]] = addi %[[IV]], %[[V2]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V3]]] : memref +// UNROLL-BY-3-NEXT: } +// UNROLL-BY-3-NEXT: loop.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] { +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-3-NEXT: } +// UNROLL-BY-3-NEXT: return + +// Test that the single iteration epilogue loop body is promoted to the loops +// containing block. +func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref) { + %0 = constant 7.0 : f32 + %lb = constant 0 : index + %ub = constant 10 : index + %step = constant 1 : index + loop.for %i0 = %lb to %ub step %step { + store %0, %arg0[%i0] : memref + } + return +} +// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3_promote_epilogue +// UNROLL-BY-3-SAME: %[[MEM:.*0]]: memref +// +// UNROLL-BY-3-DAG: %[[C0:.*]] = constant 0 : index +// UNROLL-BY-3-DAG: %[[C1:.*]] = constant 1 : index +// UNROLL-BY-3-DAG: %[[C10:.*]] = constant 10 : index +// UNROLL-BY-3-DAG: %[[C9:.*]] = constant 9 : index +// UNROLL-BY-3-DAG: %[[C3:.*]] = constant 3 : index +// UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] { +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref +// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = constant 1 : index +// UNROLL-BY-3-NEXT: %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index +// UNROLL-BY-3-NEXT: %[[V1:.*]] = addi %[[IV]], %[[V0]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = constant 2 : index +// UNROLL-BY-3-NEXT: %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index +// UNROLL-BY-3-NEXT: %[[V3:.*]] = addi %[[IV]], %[[V2]] : index +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V3]]] : memref +// UNROLL-BY-3-NEXT: } +// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[C9]]] : memref +// UNROLL-BY-3-NEXT: return diff --git a/mlir/test/Dialect/SPIRV/Serialization/debug.mlir b/mlir/test/Dialect/SPIRV/Serialization/debug.mlir new file mode 100644 index 0000000000000..83fcca7dd0fba --- /dev/null +++ b/mlir/test/Dialect/SPIRV/Serialization/debug.mlir @@ -0,0 +1,60 @@ +// RUN: mlir-translate -test-spirv-roundtrip-debug -mlir-print-debuginfo %s | FileCheck %s + +spv.module Logical GLSL450 requires #spv.vce { + spv.func @arithmetic(%arg0 : vector<4xf32>, %arg1 : vector<4xf32>) "None" { + // CHECK: loc({{".*debug.mlir"}}:6:10) + %0 = spv.FAdd %arg0, %arg1 : vector<4xf32> + // CHECK: loc({{".*debug.mlir"}}:8:10) + %1 = spv.FNegate %arg0 : vector<4xf32> + spv.Return + } + + spv.func @atomic(%ptr: !spv.ptr, %value: i32, %comparator: i32) "None" { + // CHECK: loc({{".*debug.mlir"}}:14:10) + %1 = spv.AtomicAnd "Device" "None" %ptr, %value : !spv.ptr + spv.Return + } + + spv.func @bitwiser(%arg0 : i32, %arg1 : i32) "None" { + // CHECK: loc({{".*debug.mlir"}}:20:10) + %0 = spv.BitwiseAnd %arg0, %arg1 : i32 + spv.Return + } + + spv.func @convert(%arg0 : f32) "None" { + // CHECK: loc({{".*debug.mlir"}}:26:10) + %0 = spv.ConvertFToU %arg0 : f32 to i32 + spv.Return + } + + spv.func @composite(%arg0 : !spv.struct, f32>>, %arg1: !spv.array<4xf32>, %arg2 : f32, %arg3 : f32) "None" { + // CHECK: loc({{".*debug.mlir"}}:32:10) + %0 = spv.CompositeInsert %arg1, %arg0[1 : i32, 0 : i32] : !spv.array<4xf32> into !spv.struct, f32>> + // CHECK: loc({{".*debug.mlir"}}:34:10) + %1 = spv.CompositeConstruct %arg2, %arg3 : vector<2xf32> + spv.Return + } + + spv.func @group_non_uniform(%val: f32) "None" { + // CHECK: loc({{".*debug.mlir"}}:40:10) + %0 = spv.GroupNonUniformFAdd "Workgroup" "Reduce" %val : f32 + spv.Return + } + + spv.func @logical(%arg0: i32, %arg1: i32) "None" { + // CHECK: loc({{".*debug.mlir"}}:46:10) + %0 = spv.IEqual %arg0, %arg1 : i32 + spv.Return + } + + spv.func @memory_accesses(%arg0 : !spv.ptr>, StorageBuffer>, %arg1 : i32, %arg2 : i32) "None" { + // CHECK: loc({{".*debug.mlir"}}:52:10) + %2 = spv.AccessChain %arg0[%arg1, %arg2] : !spv.ptr>, StorageBuffer> + // CHECK: loc({{".*debug.mlir"}}:54:10) + %3 = spv.Load "StorageBuffer" %2 : f32 + // CHECK: loc({{.*debug.mlir"}}:56:5) + spv.Store "StorageBuffer" %2, %3 : f32 + // CHECK: loc({{".*debug.mlir"}}:58:5) + spv.Return + } +} diff --git a/mlir/test/Dialect/SPIRV/availability.mlir b/mlir/test/Dialect/SPIRV/availability.mlir index e31c1bdeacca4..322cc533c826f 100644 --- a/mlir/test/Dialect/SPIRV/availability.mlir +++ b/mlir/test/Dialect/SPIRV/availability.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -disable-pass-threading -test-spirv-op-availability %s | FileCheck %s +// RUN: mlir-opt -mlir-disable-threading -test-spirv-op-availability %s | FileCheck %s // CHECK-LABEL: iadd func @iadd(%arg: i32) -> i32 { diff --git a/mlir/test/Dialect/SPIRV/canonicalize.mlir b/mlir/test/Dialect/SPIRV/canonicalize.mlir index f8c3bdebda394..20ed6e96be8db 100644 --- a/mlir/test/Dialect/SPIRV/canonicalize.mlir +++ b/mlir/test/Dialect/SPIRV/canonicalize.mlir @@ -559,15 +559,18 @@ func @cannot_canonicalize_selection_op_0(%cond: i1) -> () { // CHECK: spv.selection { spv.selection { + // CHECK: spv.BranchConditional + // CHECK-SAME: ^bb1(%[[DST_VAR_0]], %[[SRC_VALUE_0]] + // CHECK-SAME: ^bb1(%[[DST_VAR_1]], %[[SRC_VALUE_1]] spv.BranchConditional %cond, ^then, ^else ^then: - // CHECK: spv.Store "Function" %[[DST_VAR_0]], %[[SRC_VALUE_0]] ["Aligned", 8] : vector<3xi32> + // CHECK: ^bb1(%[[ARG0:.*]]: !spv.ptr, Function>, %[[ARG1:.*]]: vector<3xi32>): + // CHECK: spv.Store "Function" %[[ARG0]], %[[ARG1]] ["Aligned", 8] : vector<3xi32> spv.Store "Function" %3, %1 ["Aligned", 8]: vector<3xi32> spv.Branch ^merge ^else: - // CHECK: spv.Store "Function" %[[DST_VAR_1]], %[[SRC_VALUE_1]] ["Aligned", 8] : vector<3xi32> spv.Store "Function" %4, %2 ["Aligned", 8] : vector<3xi32> spv.Branch ^merge diff --git a/mlir/test/Dialect/SPIRV/target-env.mlir b/mlir/test/Dialect/SPIRV/target-env.mlir index 9b42314e3f1d2..27c4e8d04092b 100644 --- a/mlir/test/Dialect/SPIRV/target-env.mlir +++ b/mlir/test/Dialect/SPIRV/target-env.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -disable-pass-threading -test-spirv-target-env %s | FileCheck %s +// RUN: mlir-opt -mlir-disable-threading -test-spirv-target-env %s | FileCheck %s // Note: The following tests check that a spv.target_env can properly control // the conversion target and filter unavailable ops during the conversion. diff --git a/mlir/test/Dialect/Standard/expand-atomic.mlir b/mlir/test/Dialect/Standard/expand-atomic.mlir new file mode 100644 index 0000000000000..b4e65945f58ae --- /dev/null +++ b/mlir/test/Dialect/Standard/expand-atomic.mlir @@ -0,0 +1,24 @@ +// RUN: mlir-opt %s -expand-atomic -split-input-file | FileCheck %s --dump-input-on-failure + +// CHECK-LABEL: func @atomic_rmw_to_generic +// CHECK-SAME: ([[F:%.*]]: memref<10xf32>, [[f:%.*]]: f32, [[i:%.*]]: index) +func @atomic_rmw_to_generic(%F: memref<10xf32>, %f: f32, %i: index) -> f32 { + %x = atomic_rmw "maxf" %f, %F[%i] : (f32, memref<10xf32>) -> f32 + return %x : f32 +} +// CHECK: %0 = std.generic_atomic_rmw %arg0[%arg2] : memref<10xf32> { +// CHECK: ^bb0([[CUR_VAL:%.*]]: f32): +// CHECK: [[CMP:%.*]] = cmpf "ogt", [[CUR_VAL]], [[f]] : f32 +// CHECK: [[SELECT:%.*]] = select [[CMP]], [[CUR_VAL]], [[f]] : f32 +// CHECK: atomic_yield [[SELECT]] : f32 +// CHECK: } +// CHECK: return %0 : f32 + +// ----- + +// CHECK-LABEL: func @atomic_rmw_no_conversion +func @atomic_rmw_no_conversion(%F: memref<10xf32>, %f: f32, %i: index) -> f32 { + %x = atomic_rmw "addf" %f, %F[%i] : (f32, memref<10xf32>) -> f32 + return %x : f32 +} +// CHECK-NOT: generic_atomic_rmw diff --git a/mlir/test/EDSC/CMakeLists.txt b/mlir/test/EDSC/CMakeLists.txt index d8e3be8f20796..dda2a25fdb044 100644 --- a/mlir/test/EDSC/CMakeLists.txt +++ b/mlir/test/EDSC/CMakeLists.txt @@ -1,3 +1,7 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + ) add_llvm_executable(mlir-edsc-builder-api-test builder-api-test.cpp ) @@ -16,8 +20,6 @@ target_link_libraries(mlir-edsc-builder-api-test MLIRStandardOps MLIRTransforms MLIRVector - LLVMCore - LLVMSupport -) + ) target_include_directories(mlir-edsc-builder-api-test PRIVATE ..) diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp index 76f478f46c8ae..cca6c2092b93e 100644 --- a/mlir/test/EDSC/builder-api-test.cpp +++ b/mlir/test/EDSC/builder-api-test.cpp @@ -15,7 +15,6 @@ #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/Vector/EDSC/Intrinsics.h" #include "mlir/EDSC/Builders.h" -#include "mlir/EDSC/Intrinsics.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/Builders.h" #include "mlir/IR/IntegerSet.h" @@ -176,6 +175,33 @@ TEST_FUNC(builder_max_min_for) { f.erase(); } +TEST_FUNC(builder_block_append) { + using namespace edsc::op; + auto f = makeFunction("builder_blocks"); + + OpBuilder builder(f.getBody()); + ScopedContext scope(builder, f.getLoc()); + + BlockHandle b1, functionBlock(&f.front()); + BlockBuilder(&b1, {}, {})([&] { std_constant_index(0); }); + BlockBuilder(b1, Append())([&] { std_constant_index(1); }); + BlockBuilder(b1, Append())([&] { std_ret(); }); + // Get back to entry block and add a branch into b1 + BlockBuilder(functionBlock, Append())([&] { std_br(b1, {}); }); + + // clang-format off + // CHECK-LABEL: @builder_blocks + // CHECK-NEXT: br ^bb1 + // CHECK-NEXT: ^bb1: // pred: ^bb0 + // CHECK-NEXT: constant 0 : index + // CHECK-NEXT: constant 1 : index + // CHECK-NEXT: return + // CHECK-NEXT: } + // clang-format on + f.print(llvm::outs()); + f.erase(); +} + TEST_FUNC(builder_blocks) { using namespace edsc::op; auto f = makeFunction("builder_blocks"); @@ -183,6 +209,8 @@ TEST_FUNC(builder_blocks) { OpBuilder builder(f.getBody()); ScopedContext scope(builder, f.getLoc()); Value c1(std_constant_int(42, 32)), c2(std_constant_int(1234, 32)); + ReturnOp ret = std_ret(); + Value r; Value args12[2]; Value &arg1 = args12[0], &arg2 = args12[1]; @@ -205,6 +233,7 @@ TEST_FUNC(builder_blocks) { }); // Get back to entry block and add a branch into b1 BlockBuilder(functionBlock, Append())([&] { std_br(b1, {c1, c2}); }); + ret.erase(); // clang-format off // CHECK-LABEL: @builder_blocks @@ -274,6 +303,8 @@ TEST_FUNC(builder_cond_branch) { Value funcArg(f.getArgument(0)); Value c32(std_constant_int(32, 32)), c64(std_constant_int(64, 64)), c42(std_constant_int(42, 32)); + ReturnOp ret = std_ret(); + Value arg1; Value args23[2]; BlockHandle b1, b2, functionBlock(&f.front()); @@ -283,6 +314,7 @@ TEST_FUNC(builder_cond_branch) { BlockBuilder(functionBlock, Append())([&] { std_cond_br(funcArg, b1, {c32}, b2, {c64, c42}); }); + ret.erase(); // clang-format off // CHECK-LABEL: @builder_cond_branch @@ -1066,8 +1098,8 @@ TEST_FUNC(builder_loop_for_yield) { // CHECK: [[sum:%[0-9]+]] = addf [[arg0]], [[arg1]] : f32 // CHECK: loop.yield [[arg1]], [[sum]] : f32, f32 // CHECK: addf [[res]]#0, [[res]]#1 : f32 - // clang-format on + f.print(llvm::outs()); f.erase(); } diff --git a/mlir/test/Examples/standalone/lit.local.cfg b/mlir/test/Examples/standalone/lit.local.cfg new file mode 100644 index 0000000000000..481b809a0e486 --- /dev/null +++ b/mlir/test/Examples/standalone/lit.local.cfg @@ -0,0 +1,3 @@ +config.substitutions.append(("%cmake", config.host_cmake)) +config.substitutions.append(("%host_cxx", config.host_cxx)) +config.substitutions.append(("%host_cc", config.host_cc)) diff --git a/mlir/test/Examples/standalone/test.toy b/mlir/test/Examples/standalone/test.toy new file mode 100644 index 0000000000000..4f9ba5cc78e11 --- /dev/null +++ b/mlir/test/Examples/standalone/test.toy @@ -0,0 +1,4 @@ +# RUN: %cmake %mlir_src_root/examples/standalone -DCMAKE_CXX_COMPILER=%host_cxx -DCMAKE_C_COMPILER=%host_cc -DMLIR_DIR=%llvm_lib_dir/cmake/mlir ; %cmake --build . --target check-standalone-opt | tee %t | FileCheck %s + +# CHECK: Expected Passes: 1 +# UNSUPPORTED: windows, android diff --git a/mlir/test/IR/attribute.mlir b/mlir/test/IR/attribute.mlir index 2a43f8aef127f..16f244d88aa3d 100644 --- a/mlir/test/IR/attribute.mlir +++ b/mlir/test/IR/attribute.mlir @@ -537,3 +537,23 @@ func @wrong_shape_fail() { return } +//===----------------------------------------------------------------------===// +// Test StructAttr +//===----------------------------------------------------------------------===// + +// ----- + +func @missing_fields() { + // expected-error @+1 {{failed to satisfy constraint: DictionaryAttr with field(s): 'some_field', 'some_other_field' (each field having its own constraints)}} + "test.struct_attr"() {the_struct_attr = {}} : () -> () + return +} + +// ----- + +func @erroneous_fields() { + // expected-error @+1 {{failed to satisfy constraint: DictionaryAttr with field(s): 'some_field', 'some_other_field' (each field having its own constraints)}} + "test.struct_attr"() {the_struct_attr = {some_field = 1 : i8, some_other_field = 1}} : () -> () + return +} + diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index d0a27ec68468b..21718864d94b4 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -86,6 +86,24 @@ func @standard_instrs(tensor<4x4x?xf32>, f32, i32, index, i64, f16) { // CHECK: %13 = muli %4, %4 : i32 %i6 = muli %i2, %i2 : i32 + // CHECK: %[[C0:.*]] = create_complex %[[F2:.*]], %[[F2]] : complex + %c0 = "std.create_complex"(%f2, %f2) : (f32, f32) -> complex + + // CHECK: %[[C1:.*]] = create_complex %[[F2]], %[[F2]] : complex + %c1 = create_complex %f2, %f2 : complex + + // CHECK: %[[REAL0:.*]] = re %[[CPLX0:.*]] : complex + %real0 = "std.re"(%c0) : (complex) -> f32 + + // CHECK: %[[REAL1:.*]] = re %[[CPLX0]] : complex + %real1 = re %c0 : complex + + // CHECK: %[[IMAG0:.*]] = im %[[CPLX0]] : complex + %imag0 = "std.im"(%c0) : (complex) -> f32 + + // CHECK: %[[IMAG1:.*]] = im %[[CPLX0]] : complex + %imag1 = im %c0 : complex + // CHECK: %c42_i32 = constant 42 : i32 %x = "std.constant"(){value = 42 : i32} : () -> i32 @@ -515,6 +533,18 @@ func @standard_instrs(tensor<4x4x?xf32>, f32, i32, index, i64, f16) { // CHECK: %{{[0-9]+}} = rsqrt %arg1 : f32 %145 = rsqrt %f : f32 + // CHECK: %{{[0-9]+}} = sin %arg1 : f32 + %146 = "std.sin"(%f) : (f32) -> f32 + + // CHECK: %{{[0-9]+}} = sin %arg1 : f32 + %147 = sin %f : f32 + + // CHECK: %{{[0-9]+}} = sin %cst_8 : vector<4xf32> + %148 = sin %vcf32 : vector<4xf32> + + // CHECK: %{{[0-9]+}} = sin %arg0 : tensor<4x4x?xf32> + %149 = sin %t : tensor<4x4x?xf32> + return } diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 80fdf33429954..2a14c3ae6c419 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -303,6 +303,13 @@ func @invalid_cmp_shape(%idx : () -> ()) { // ----- +func @dma_start_not_enough_operands() { + // expected-error@+1 {{expected at least 4 operands}} + "std.dma_start"() : () -> () +} + +// ----- + func @dma_no_src_memref(%m : f32, %tag : f32, %c0 : index) { // expected-error@+1 {{expected source to be of memref type}} dma_start %m[%c0], %m[%c0], %c0, %tag[%c0] : f32, f32, f32 @@ -310,6 +317,24 @@ func @dma_no_src_memref(%m : f32, %tag : f32, %c0 : index) { // ----- +func @dma_start_not_enough_operands_for_src( + %src: memref<2x2x2xf32>, %idx: index) { + // expected-error@+1 {{expected at least 7 operands}} + "std.dma_start"(%src, %idx, %idx, %idx) : (memref<2x2x2xf32>, index, index, index) -> () +} + +// ----- + +func @dma_start_src_index_wrong_type( + %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>, + %tag: memref, %flt: f32) { + // expected-error@+1 {{expected source indices to be of index type}} + "std.dma_start"(%src, %idx, %flt, %dst, %idx, %tag, %idx) + : (memref<2x2xf32>, index, f32, memref<2xf32,1>, index, memref, index) -> () +} + +// ----- + func @dma_no_dst_memref(%m : f32, %tag : f32, %c0 : index) { %mref = alloc() : memref<8 x f32> // expected-error@+1 {{expected destination to be of memref type}} @@ -318,6 +343,36 @@ func @dma_no_dst_memref(%m : f32, %tag : f32, %c0 : index) { // ----- +func @dma_start_not_enough_operands_for_dst( + %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>, + %tag: memref) { + // expected-error@+1 {{expected at least 7 operands}} + "std.dma_start"(%src, %idx, %idx, %dst, %idx, %idx) + : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, index) -> () +} + +// ----- + +func @dma_start_dst_index_wrong_type( + %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>, + %tag: memref, %flt: f32) { + // expected-error@+1 {{expected destination indices to be of index type}} + "std.dma_start"(%src, %idx, %idx, %dst, %flt, %tag, %idx) + : (memref<2x2xf32>, index, index, memref<2xf32,1>, f32, memref, index) -> () +} + +// ----- + +func @dma_start_dst_index_wrong_type( + %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>, + %tag: memref, %flt: f32) { + // expected-error@+1 {{expected num elements to be of index type}} + "std.dma_start"(%src, %idx, %idx, %dst, %idx, %flt, %tag) + : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, f32, memref) -> () +} + +// ----- + func @dma_no_tag_memref(%tag : f32, %c0 : index) { %mref = alloc() : memref<8 x f32> // expected-error@+1 {{expected tag to be of memref type}} @@ -326,9 +381,80 @@ func @dma_no_tag_memref(%tag : f32, %c0 : index) { // ----- +func @dma_start_not_enough_operands_for_tag( + %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>, + %tag: memref<2xi32,2>) { + // expected-error@+1 {{expected at least 8 operands}} + "std.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag) + : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, index, memref<2xi32,2>) -> () +} + +// ----- + +func @dma_start_dst_index_wrong_type( + %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>, + %tag: memref<2xi32,2>, %flt: f32) { + // expected-error@+1 {{expected tag indices to be of index type}} + "std.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag, %flt) + : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, index, memref<2xi32,2>, f32) -> () +} + +// ----- + +func @dma_start_same_space( + %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32>, + %tag: memref) { + // expected-error@+1 {{DMA should be between different memory spaces}} + dma_start %src[%idx, %idx], %dst[%idx], %idx, %tag[] : memref<2x2xf32>, memref<2xf32>, memref +} + +// ----- + +func @dma_start_too_many_operands( + %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>, + %tag: memref) { + // expected-error@+1 {{incorrect number of operands}} + "std.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag, %idx, %idx, %idx) + : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, index, memref, index, index, index) -> () +} + + +// ----- + +func @dma_start_wrong_stride_type( + %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>, + %tag: memref, %flt: f32) { + // expected-error@+1 {{expected stride and num elements per stride to be of type index}} + "std.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag, %idx, %flt) + : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, index, memref, index, f32) -> () +} + +// ----- + +func @dma_wait_not_enough_operands() { + // expected-error@+1 {{expected at least 2 operands}} + "std.dma_wait"() : () -> () +} + +// ----- + func @dma_wait_no_tag_memref(%tag : f32, %c0 : index) { // expected-error@+1 {{expected tag to be of memref type}} - dma_wait %tag[%c0], %arg0 : f32 + "std.dma_wait"(%tag, %c0, %c0) : (f32, index, index) -> () +} + +// ----- + +func @dma_wait_wrong_index_type(%tag : memref<2xi32>, %idx: index, %flt: f32) { + // expected-error@+1 {{expected tag indices to be of index type}} + "std.dma_wait"(%tag, %flt, %idx) : (memref<2xi32>, f32, index) -> () +} + +// ----- + +func @dma_wait_wrong_num_elements_type(%tag : memref<2xi32>, %idx: index, %flt: f32) { + // expected-error@+1 {{expected the number of elements to be of index type}} + "std.dma_wait"(%tag, %idx, %flt) : (memref<2xi32>, index, f32) -> () } // ----- @@ -1220,3 +1346,47 @@ func @assume_alignment(%0: memref<4x4xf16>) { // expected-error@-1 {{requires an ancestor op with AutomaticAllocationScope trait}} return }) : () -> () + +// ----- + +func @complex_number_from_non_float_operands(%real: i32, %imag: i32) { + // expected-error@+1 {{'complex' must be complex type with floating-point elements, but got 'complex'}} + std.create_complex %real, %imag : complex + return +} + +// ----- + +// expected-note@+1 {{prior use here}} +func @complex_number_from_different_float_types(%real: f32, %imag: f64) { + // expected-error@+1 {{expects different type than prior uses: 'f32' vs 'f64'}} + std.create_complex %real, %imag : complex + return +} + +// ----- + +// expected-note@+1 {{prior use here}} +func @complex_number_from_incompatible_float_type(%real: f32, %imag: f32) { + // expected-error@+1 {{expects different type than prior uses: 'f64' vs 'f32'}} + std.create_complex %real, %imag : complex + return +} + +// ----- + +// expected-note@+1 {{prior use here}} +func @real_part_from_incompatible_complex_type(%cplx: complex) { + // expected-error@+1 {{expects different type than prior uses: 'complex' vs 'complex'}} + std.re %cplx : complex + return +} + +// ----- + +// expected-note@+1 {{prior use here}} +func @imaginary_part_from_incompatible_complex_type(%cplx: complex) { + // expected-error@+1 {{expects different type than prior uses: 'complex' vs 'complex'}} + std.re %cplx : complex + return +} diff --git a/mlir/test/IR/invalid.mlir b/mlir/test/IR/invalid.mlir index cf7f7eb3152fb..96b2348344811 100644 --- a/mlir/test/IR/invalid.mlir +++ b/mlir/test/IR/invalid.mlir @@ -1489,3 +1489,10 @@ func @really_large_bound() { } : () -> () return } + +// ----- + +func @duplicate_dictionary_attr_key() { + // expected-error @+1 {{duplicate key in dictionary attribute}} + "foo.op"() {a, a} : () -> () +} diff --git a/mlir/test/IR/test-matchers.mlir b/mlir/test/IR/test-matchers.mlir index 60d5bcf7d81b4..925b01bda110a 100644 --- a/mlir/test/IR/test-matchers.mlir +++ b/mlir/test/IR/test-matchers.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -disable-pass-threading=true -test-matchers -o /dev/null 2>&1 | FileCheck %s +// RUN: mlir-opt %s -mlir-disable-threading=true -test-matchers -o /dev/null 2>&1 | FileCheck %s func @test1(%a: f32, %b: f32, %c: f32) { %0 = addf %a, %b: f32 diff --git a/mlir/test/Pass/crash-recovery.mlir b/mlir/test/Pass/crash-recovery.mlir index 5a2e88b48e889..2de7e505b445c 100644 --- a/mlir/test/Pass/crash-recovery.mlir +++ b/mlir/test/Pass/crash-recovery.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s -pass-pipeline='func(test-function-pass, test-pass-crash)' -pass-pipeline-crash-reproducer=%t -verify-diagnostics // RUN: cat %t | FileCheck -check-prefix=REPRO %s +// RUN: mlir-opt %s -pass-pipeline='func(test-function-pass, test-pass-crash)' -pass-pipeline-crash-reproducer=%t -verify-diagnostics -pass-pipeline-local-reproducer +// RUN: cat %t | FileCheck -check-prefix=REPRO_LOCAL %s // expected-error@+1 {{A failure has been detected while processing the MLIR module}} module { @@ -13,3 +15,9 @@ module { // REPRO: module // REPRO: func @foo() { // REPRO-NEXT: return + +// REPRO_LOCAL: configuration: -pass-pipeline='func(test-pass-crash)' + +// REPRO_LOCAL: module +// REPRO_LOCAL: func @foo() { +// REPRO_LOCAL-NEXT: return diff --git a/mlir/test/Pass/ir-printing.mlir b/mlir/test/Pass/ir-printing.mlir index 892dc40a034ad..8bb86b36c1813 100644 --- a/mlir/test/Pass/ir-printing.mlir +++ b/mlir/test/Pass/ir-printing.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -disable-pass-threading=true -pass-pipeline='func(cse,canonicalize)' -print-ir-before=cse -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE %s -// RUN: mlir-opt %s -disable-pass-threading=true -pass-pipeline='func(cse,canonicalize)' -print-ir-before-all -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_ALL %s -// RUN: mlir-opt %s -disable-pass-threading=true -pass-pipeline='func(cse,canonicalize)' -print-ir-after=cse -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER %s -// RUN: mlir-opt %s -disable-pass-threading=true -pass-pipeline='func(cse,canonicalize)' -print-ir-after-all -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL %s -// RUN: mlir-opt %s -disable-pass-threading=true -pass-pipeline='func(cse,canonicalize)' -print-ir-before=cse -print-ir-module-scope -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_MODULE %s -// RUN: mlir-opt %s -disable-pass-threading=true -pass-pipeline='func(cse,cse)' -print-ir-after-all -print-ir-after-change -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL_CHANGE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func(cse,canonicalize)' -print-ir-before=cse -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func(cse,canonicalize)' -print-ir-before-all -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_ALL %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func(cse,canonicalize)' -print-ir-after=cse -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func(cse,canonicalize)' -print-ir-after-all -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func(cse,canonicalize)' -print-ir-before=cse -print-ir-module-scope -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_MODULE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func(cse,cse)' -print-ir-after-all -print-ir-after-change -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL_CHANGE %s func @foo() { %0 = constant 0 : i32 diff --git a/mlir/test/Pass/pass-timing.mlir b/mlir/test/Pass/pass-timing.mlir index db39ad6a1633b..6cd8a29e6f485 100644 --- a/mlir/test/Pass/pass-timing.mlir +++ b/mlir/test/Pass/pass-timing.mlir @@ -1,8 +1,8 @@ -// RUN: mlir-opt %s -disable-pass-threading=true -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=list 2>&1 | FileCheck -check-prefix=LIST %s -// RUN: mlir-opt %s -disable-pass-threading=true -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=pipeline 2>&1 | FileCheck -check-prefix=PIPELINE %s -// RUN: mlir-opt %s -disable-pass-threading=false -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s -// RUN: mlir-opt %s -disable-pass-threading=false -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=pipeline 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s -// RUN: mlir-opt %s -disable-pass-threading=false -verify-each=false -test-pm-nested-pipeline -pass-timing -pass-timing-display=pipeline 2>&1 | FileCheck -check-prefix=NESTED_MT_PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=list 2>&1 | FileCheck -check-prefix=LIST %s +// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=pipeline 2>&1 | FileCheck -check-prefix=PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s +// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func(cse,canonicalize,cse)' -pass-timing -pass-timing-display=pipeline 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=false -test-pm-nested-pipeline -pass-timing -pass-timing-display=pipeline 2>&1 | FileCheck -check-prefix=NESTED_MT_PIPELINE %s // LIST: Pass execution timing report // LIST: Total Execution Time: diff --git a/mlir/test/SDBM/CMakeLists.txt b/mlir/test/SDBM/CMakeLists.txt index 9e0023750e68b..633fae707c855 100644 --- a/mlir/test/SDBM/CMakeLists.txt +++ b/mlir/test/SDBM/CMakeLists.txt @@ -1,3 +1,8 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + ) + add_llvm_executable(mlir-sdbm-api-test sdbm-api-test.cpp ) @@ -9,8 +14,6 @@ target_link_libraries(mlir-sdbm-api-test MLIRIR MLIRSDBM MLIRSupport - LLVMCore - LLVMSupport ) target_include_directories(mlir-sdbm-api-test PRIVATE ..) diff --git a/mlir/test/Target/rocdl.mlir b/mlir/test/Target/rocdl.mlir index 276203d416785..773c4e3928ee0 100644 --- a/mlir/test/Target/rocdl.mlir +++ b/mlir/test/Target/rocdl.mlir @@ -33,3 +33,11 @@ llvm.func @kernel_func() attributes {gpu.kernel} { // CHECK-LABEL: amdgpu_kernel void @kernel_func llvm.return } + +llvm.func @rocdl.barrier() { + // CHECK: fence syncscope("workgroup") release + // CHECK-NEXT: call void @llvm.amdgcn.s.barrier() + // CHECK-NEXT: fence syncscope("workgroup") acquire + rocdl.barrier + llvm.return +} diff --git a/mlir/test/Transforms/buffer-placement-prepration.mlir b/mlir/test/Transforms/buffer-placement-prepration.mlir new file mode 100644 index 0000000000000..76212538aa3f9 --- /dev/null +++ b/mlir/test/Transforms/buffer-placement-prepration.mlir @@ -0,0 +1,143 @@ +// RUN: mlir-opt -test-buffer-placement-preparation -split-input-file %s | FileCheck %s -dump-input-on-failure + +// CHECK-LABEL: func @func_signature_conversion +func @func_signature_conversion(%arg0: tensor<4x8xf32>) { + return +} +// CHECK: ({{.*}}: memref<4x8xf32>) { + +// ----- + +// CHECK-LABEL: func @non_void_to_void_return_op_converter +func @non_void_to_void_return_op_converter(%arg0: tensor<4x8xf32>) -> tensor<4x8xf32> { + return %arg0 : tensor<4x8xf32> +} +// CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>, %[[RESULT:.*]]: [[TYPE]]<[[RANK]]>) { +// CHECK-NEXT: linalg.copy(%[[ARG0]], %[[RESULT]]) +// CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @func_and_block_signature_conversion +func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32>{ + cond_br %cond, ^bb1, ^bb2 + ^bb1: + br ^exit(%arg0 : tensor<2xf32>) + ^bb2: + br ^exit(%arg0 : tensor<2xf32>) + ^exit(%arg2: tensor<2xf32>): + return %arg1 : tensor<4x4xf32> +} +// CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, %[[ARG1:.*]]: [[ARG1_TYPE:.*]], %[[RESULT:.*]]: [[RESULT_TYPE:.*]]) { +// CHECK: br ^[[EXIT_BLOCK:.*]](%[[ARG0]] : [[ARG0_TYPE]]) +// CHECK: br ^[[EXIT_BLOCK]](%[[ARG0]] : [[ARG0_TYPE]]) +// CHECK: ^[[EXIT_BLOCK]](%{{.*}}: [[ARG0_TYPE]]) +// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[RESULT]]) +// CHECK-NEXT: return + +// ----- + +// Test Case: Simple case for checking if BufferAssignmentPlacer creates AllocOps right before GenericOps. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @compute_allocs_position_simple +func @compute_allocs_position_simple(%cond: i1, %arg0: tensor<2xf32>) -> tensor<2xf32>{ + %0 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0 { + ^bb0(%gen1_arg0: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: tensor<2xf32> -> tensor<2xf32> + %1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %0 { + ^bb0(%gen2_arg0: f32): + %tmp2 = exp %gen2_arg0 : f32 + linalg.yield %tmp2 : f32 + }: tensor<2xf32> -> tensor<2xf32> + return %1 : tensor<2xf32> +} +// CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>, +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[ARG0]], %[[FIRST_ALLOC]] +// CHECK: %[[SECOND_ALLOC:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[FIRST_ALLOC]], %[[SECOND_ALLOC]] + +// ----- + +// Test Case: if-else case for checking if BufferAssignmentPlacer creates AllocOps right before GenericOps. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @compute_allocs_position +func @compute_allocs_position(%cond: i1, %arg0: tensor<2xf32>) -> tensor<2xf32>{ + %0 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0 { + ^bb0(%gen1_arg0: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: tensor<2xf32> -> tensor<2xf32> + %1 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %0 { + ^bb0(%gen2_arg0: f32): + %tmp2 = exp %gen2_arg0 : f32 + linalg.yield %tmp2 : f32 + }: tensor<2xf32> -> tensor<2xf32> + cond_br %cond, ^bb1(%arg0, %0: tensor<2xf32>, tensor<2xf32>), + ^bb2(%0, %arg0: tensor<2xf32>, tensor<2xf32>) + ^bb1(%arg1 : tensor<2xf32>, %arg2 : tensor<2xf32>): + %2 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0 { + ^bb0(%gen3_arg0: f32): + %tmp3 = exp %gen3_arg0 : f32 + linalg.yield %tmp3 : f32 + }: tensor<2xf32> -> tensor<2xf32> + %3 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %2 { + ^bb0(%gen4_arg0: f32): + %tmp4 = exp %gen4_arg0 : f32 + linalg.yield %tmp4 : f32 + }: tensor<2xf32> -> tensor<2xf32> + br ^exit(%arg1, %arg2 : tensor<2xf32>, tensor<2xf32>) + ^bb2(%arg3 : tensor<2xf32>, %arg4 : tensor<2xf32>): + %4 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0 { + ^bb0(%gen5_arg0: f32): + %tmp5 = exp %gen5_arg0 : f32 + linalg.yield %tmp5 : f32 + }: tensor<2xf32> -> tensor<2xf32> + %5 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %4 { + ^bb0(%gen6_arg0: f32): + %tmp6 = exp %gen6_arg0 : f32 + linalg.yield %tmp6 : f32 + }: tensor<2xf32> -> tensor<2xf32> + br ^exit(%arg3, %arg4 : tensor<2xf32>, tensor<2xf32>) + ^exit(%arg5 : tensor<2xf32>, %arg6 : tensor<2xf32>): + %6 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0 { + ^bb0(%gen7_arg0: f32): + %tmp7 = exp %gen7_arg0 : f32 + linalg.yield %tmp7 : f32 + }: tensor<2xf32> -> tensor<2xf32> + %7 = linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %6 { + ^bb0(%gen8_arg0: f32): + %tmp8 = exp %gen8_arg0 : f32 + linalg.yield %tmp8 : f32 + }: tensor<2xf32> -> tensor<2xf32> + return %7 : tensor<2xf32> +} +// CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>, +// CHECK-NEXT: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[ARG0]], %[[ALLOC0]] +// CHECK: %[[ALLOC1:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[ALLOC0]], %[[ALLOC1]] +// CHECK: cond_br %{{.*}}, ^[[BB0:.*]]({{.*}}), ^[[BB1:.*]]( +// CHECK-NEXT: ^[[BB0]] +// CHECK-NEXT: %[[ALLOC2:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[ARG0]], %[[ALLOC2]] +// CHECK: %[[ALLOC3:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[ALLOC2]], %[[ALLOC3]] +// CHECK: br ^[[EXIT:.*]]({{.*}}) +// CHECK-NEXT: ^[[BB1]] +// CHECK-NEXT: %[[ALLOC4:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[ARG0]], %[[ALLOC4]] +// CHECK: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[ALLOC4]], %[[ALLOC5]] +// CHECK: br ^[[EXIT]] +// CHECK-NEXT: ^[[EXIT]] +// CHECK-NEXT: %[[ALLOC6:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[ARG0]], %[[ALLOC6]] +// CHECK: %[[ALLOC7:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[ALLOC6]], %[[ALLOC7]] diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir new file mode 100644 index 0000000000000..8c81dd9da0973 --- /dev/null +++ b/mlir/test/Transforms/buffer-placement.mlir @@ -0,0 +1,412 @@ +// RUN: mlir-opt -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure + +// This file checks the behaviour of BufferPlacement pass for moving Alloc and Dealloc +// operations and inserting the missing the DeallocOps in their correct positions. + +// Test Case: +// bb0 +// / \ +// bb1 bb2 <- Initial position of AllocOp +// \ / +// bb3 +// BufferPlacement Expected Behaviour: It should move the existing AllocOp to the entry block, +// and insert a DeallocOp at the exit block after CopyOp since %1 is an alias for %0 and %arg1. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @condBranch +func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + cond_br %arg0, ^bb1, ^bb2 +^bb1: + br ^bb3(%arg1 : memref<2xf32>) +^bb2: + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + br ^bb3(%0 : memref<2xf32>) +^bb3(%1: memref<2xf32>): + "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK-NEXT: %[[ALLOC:.*]] = alloc() +// CHECK-NEXT: cond_br +// CHECK: linalg.copy +// CHECK-NEXT: dealloc %[[ALLOC]] +// CHECK-NEXT: return + +// ----- + +// Test Case: Existing AllocOp with no users. +// BufferPlacement Expected Behaviour: It should insert a DeallocOp right before ReturnOp. + +// CHECK-LABEL: func @emptyUsesValue +func @emptyUsesValue(%arg0: memref<4xf32>) { + %0 = alloc() : memref<4xf32> + return +} +// CHECK-NEXT: %[[ALLOC:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC]] +// CHECK-NEXT: return + +// ----- + +// Test Case: +// bb0 +// / \ +// | bb1 <- Initial position of AllocOp +// \ / +// bb2 +// BufferPlacement Expected Behaviour: It should move the existing AllocOp to the entry block +// and insert a DeallocOp at the exit block after CopyOp since %1 is an alias for %0 and %arg1. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @criticalEdge +func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) +^bb1: + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + br ^bb2(%0 : memref<2xf32>) +^bb2(%1: memref<2xf32>): + "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK-NEXT: %[[ALLOC:.*]] = alloc() +// CHECK-NEXT: cond_br +// CHECK: linalg.copy +// CHECK-NEXT: dealloc %[[ALLOC]] +// CHECK-NEXT: return + +// ----- + +// Test Case: +// bb0 <- Initial position of AllocOp +// / \ +// | bb1 +// \ / +// bb2 +// BufferPlacement Expected Behaviour: It shouldn't move the alloc position. It only inserts +// a DeallocOp at the exit block after CopyOp since %1 is an alias for %0 and %arg1. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @invCriticalEdge +func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) +^bb1: + br ^bb2(%0 : memref<2xf32>) +^bb2(%1: memref<2xf32>): + "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: dealloc +// CHECK-NEXT: return + +// ----- + +// Test Case: +// bb0 <- Initial position of the first AllocOp +// / \ +// bb1 bb2 +// \ / +// bb3 <- Initial position of the second AllocOp +// BufferPlacement Expected Behaviour: It shouldn't move the AllocOps. It only inserts two missing DeallocOps in the exit block. +// %5 is an alias for %0. Therefore, the DeallocOp for %0 should occur after the last GenericOp. The Dealloc for %7 should +// happen after the CopyOp. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @ifElse +func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) +^bb1(%1: memref<2xf32>, %2: memref<2xf32>): + br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) +^bb2(%3: memref<2xf32>, %4: memref<2xf32>): + br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) +^bb3(%5: memref<2xf32>, %6: memref<2xf32>): + %7 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %5, %7 { + ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): + %tmp2 = exp %gen2_arg0 : f32 + linalg.yield %tmp2 : f32 + }: memref<2xf32>, memref<2xf32> + "linalg.copy"(%7, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() +// CHECK-NEXT: linalg.generic +// CHECK: %[[SECOND_ALLOC:.*]] = alloc() +// CHECK-NEXT: linalg.generic +// CHECK: dealloc %[[FIRST_ALLOC]] +// CHECK-NEXT: linalg.copy +// CHECK-NEXT: dealloc %[[SECOND_ALLOC]] +// CHECK-NEXT: return + +// ----- + +// Test Case: No users for buffer in if-else CFG +// bb0 <- Initial position of AllocOp +// / \ +// bb1 bb2 +// \ / +// bb3 +// BufferPlacement Expected Behaviour: It shouldn't move the AllocOp. It only inserts a missing DeallocOp +// in the exit block since %5 or %6 are the latest aliases of %0. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @ifElseNoUsers +func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) +^bb1(%1: memref<2xf32>, %2: memref<2xf32>): + br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) +^bb2(%3: memref<2xf32>, %4: memref<2xf32>): + br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) +^bb3(%5: memref<2xf32>, %6: memref<2xf32>): + "linalg.copy"(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: dealloc +// CHECK-NEXT: return + +// ----- + +// Test Case: +// bb0 <- Initial position of the first AllocOp +// / \ +// bb1 bb2 +// | / \ +// | bb3 bb4 +// \ \ / +// \ / +// bb5 <- Initial position of the second AllocOp +// BufferPlacement Expected Behaviour: AllocOps shouldn't be moved. +// Two missing DeallocOps should be inserted in the exit block. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @ifElseNested +func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + cond_br %arg0, ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) +^bb1(%1: memref<2xf32>, %2: memref<2xf32>): + br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>) +^bb2(%3: memref<2xf32>, %4: memref<2xf32>): + cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>) +^bb3(%5: memref<2xf32>): + br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>) +^bb4(%6: memref<2xf32>): + br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) +^bb5(%7: memref<2xf32>, %8: memref<2xf32>): + %9 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %7, %9 { + ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): + %tmp2 = exp %gen2_arg0 : f32 + linalg.yield %tmp2 : f32 + }: memref<2xf32>, memref<2xf32> + "linalg.copy"(%9, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() +// CHECK-NEXT: linalg.generic +// CHECK: %[[SECOND_ALLOC:.*]] = alloc() +// CHECK-NEXT: linalg.generic +// CHECK: dealloc %[[FIRST_ALLOC]] +// CHECK-NEXT: linalg.copy +// CHECK-NEXT: dealloc %[[SECOND_ALLOC]] +// CHECK-NEXT: return + +// ----- + +// Test Case: Dead operations in a single block. +// BufferPlacement Expected Behaviour: It shouldn't move the AllocOps. It only inserts the two missing DeallocOps +// after the last GenericOp. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @redundantOperations +func @redundantOperations(%arg0: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + %1 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %0, %1 { + ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): + %tmp2 = exp %gen2_arg0 : f32 + linalg.yield %tmp2 : f32 + }: memref<2xf32>, memref<2xf32> + return +} + +// CHECK: (%[[ARG0:.*]]: {{.*}}) +// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[ARG0]], %[[FIRST_ALLOC]] +// CHECK: %[[SECOND_ALLOC:.*]] = alloc() +// CHECK-NEXT: linalg.generic {{.*}} %[[FIRST_ALLOC]], %[[SECOND_ALLOC]] +// CHECK: dealloc +// CHECK-NEXT: dealloc +// CHECK-NEXT: return + +// ----- + +// Test Case: +// bb0 +// / \ +// Initial position of the first AllocOp -> bb1 bb2 <- Initial position of the second AllocOp +// \ / +// bb3 +// BufferPlacement Expected Behaviour: Both AllocOps should be moved to the entry block. Both missing DeallocOps should be moved to +// the exit block after CopyOp since %arg2 is an alias for %0 and %1. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc +func @moving_alloc_and_inserting_missing_dealloc(%cond: i1, %arg0: memref<2xf32>, %arg1: memref<2xf32>){ + cond_br %cond, ^bb1, ^bb2 +^bb1: + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + br ^exit(%0 : memref<2xf32>) +^bb2: + %1 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %1 { + ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): + %tmp2 = exp %gen2_arg0 : f32 + linalg.yield %tmp2 : f32 + }: memref<2xf32>, memref<2xf32> + br ^exit(%1 : memref<2xf32>) +^exit(%arg2: memref<2xf32>): + "linalg.copy"(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK-NEXT: %{{.*}} = alloc() +// CHECK-NEXT: %{{.*}} = alloc() +// CHECK: linalg.copy +// CHECK-NEXT: dealloc +// CHECK-NEXT: dealloc +// CHECK-NEXT: return + +// ----- + +// Test Case: Invalid position of the DeallocOp. There is a user after deallocation. +// bb0 +// / \ +// bb1 bb2 <- Initial position of AllocOp +// \ / +// bb3 +// BufferPlacement Expected Behaviour: It should move the AllocOp to the entry block. +// The existing DeallocOp should be moved to exit block. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @moving_invalid_dealloc_op_complex +func @moving_invalid_dealloc_op_complex(%cond: i1, %arg0: memref<2xf32>, %arg1: memref<2xf32>){ + cond_br %cond, ^bb1, ^bb2 +^bb1: + br ^exit(%arg0 : memref<2xf32>) +^bb2: + %1 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %1 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + dealloc %1 : memref<2xf32> + br ^exit(%1 : memref<2xf32>) +^exit(%arg2: memref<2xf32>): + "linalg.copy"(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK-NEXT: %{{.*}} = alloc() +// CHECK: linalg.copy +// CHECK-NEXT: dealloc +// CHECK-NEXT: return + +// ----- + +// Test Case: Iserting missing DeallocOp in a single block. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @inserting_missing_dealloc_simple +func @inserting_missing_dealloc_simple(%arg0 : memref<2xf32>, %arg1: memref<2xf32>){ + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + "linalg.copy"(%0, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: linalg.copy +// CHECK-NEXT: dealloc + +// ----- + +// Test Case: Moving invalid DeallocOp (there is a user after deallocation) in a single block. + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @moving_invalid_dealloc_op +func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>){ + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg0, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + dealloc %0 : memref<2xf32> + "linalg.copy"(%0, %arg1) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: linalg.copy +// CHECK-NEXT: dealloc \ No newline at end of file diff --git a/mlir/test/Transforms/canonicalize-block-merge.mlir b/mlir/test/Transforms/canonicalize-block-merge.mlir new file mode 100644 index 0000000000000..86cac9dddbb3d --- /dev/null +++ b/mlir/test/Transforms/canonicalize-block-merge.mlir @@ -0,0 +1,204 @@ +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(canonicalize)' -split-input-file | FileCheck %s + +// Check the simple case of single operation blocks with a return. + +// CHECK-LABEL: func @return_blocks( +func @return_blocks() { + // CHECK: "foo.cond_br"()[^bb1, ^bb1] + // CHECK: ^bb1: + // CHECK-NEXT: return + // CHECK-NOT: ^bb2 + + "foo.cond_br"() [^bb1, ^bb2] : () -> () + +^bb1: + return +^bb2: + return +} + +// Check the case of identical blocks with matching arguments. + +// CHECK-LABEL: func @matching_arguments( +func @matching_arguments() -> i32 { + // CHECK: "foo.cond_br"()[^bb1, ^bb1] + // CHECK: ^bb1(%{{.*}}: i32): + // CHECK-NEXT: return + // CHECK-NOT: ^bb2 + + "foo.cond_br"() [^bb1, ^bb2] : () -> () + +^bb1(%arg0 : i32): + return %arg0 : i32 +^bb2(%arg1 : i32): + return %arg1 : i32 +} + +// Check that no merging occurs if there is an operand mismatch and we can't +// update th predecessor. + +// CHECK-LABEL: func @mismatch_unknown_terminator +func @mismatch_unknown_terminator(%arg0 : i32, %arg1 : i32) -> i32 { + // CHECK: "foo.cond_br"()[^bb1, ^bb2] + + "foo.cond_br"() [^bb1, ^bb2] : () -> () + +^bb1: + return %arg0 : i32 +^bb2: + return %arg1 : i32 +} + +// Check that merging does occurs if there is an operand mismatch and we can +// update th predecessor. + +// CHECK-LABEL: func @mismatch_operands +// CHECK-SAME: %[[COND:.*]]: i1, %[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32 +func @mismatch_operands(%cond : i1, %arg0 : i32, %arg1 : i32) -> i32 { + // CHECK: %[[RES:.*]] = select %[[COND]], %[[ARG0]], %[[ARG1]] + // CHECK: return %[[RES]] + + cond_br %cond, ^bb1, ^bb2 + +^bb1: + return %arg0 : i32 +^bb2: + return %arg1 : i32 +} + +// Check the same as above, but with pre-existing arguments. + +// CHECK-LABEL: func @mismatch_operands_matching_arguments( +// CHECK-SAME: %[[COND:.*]]: i1, %[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32 +func @mismatch_operands_matching_arguments(%cond : i1, %arg0 : i32, %arg1 : i32) -> (i32, i32) { + // CHECK: %[[RES0:.*]] = select %[[COND]], %[[ARG1]], %[[ARG0]] + // CHECK: %[[RES1:.*]] = select %[[COND]], %[[ARG0]], %[[ARG1]] + // CHECK: return %[[RES1]], %[[RES0]] + + cond_br %cond, ^bb1(%arg1 : i32), ^bb2(%arg0 : i32) + +^bb1(%arg2 : i32): + return %arg0, %arg2 : i32, i32 +^bb2(%arg3 : i32): + return %arg1, %arg3 : i32, i32 +} + +// Check that merging does not occur if the uses of the arguments differ. + +// CHECK-LABEL: func @mismatch_argument_uses( +func @mismatch_argument_uses(%cond : i1, %arg0 : i32, %arg1 : i32) -> (i32, i32) { + // CHECK: cond_br %{{.*}}, ^bb1(%{{.*}}), ^bb2 + + cond_br %cond, ^bb1(%arg1 : i32), ^bb2(%arg0 : i32) + +^bb1(%arg2 : i32): + return %arg0, %arg2 : i32, i32 +^bb2(%arg3 : i32): + return %arg3, %arg1 : i32, i32 +} + +// Check that merging does not occur if the types of the arguments differ. + +// CHECK-LABEL: func @mismatch_argument_types( +func @mismatch_argument_types(%cond : i1, %arg0 : i32, %arg1 : i16) { + // CHECK: cond_br %{{.*}}, ^bb1(%{{.*}}), ^bb2 + + cond_br %cond, ^bb1(%arg0 : i32), ^bb2(%arg1 : i16) + +^bb1(%arg2 : i32): + "foo.return"(%arg2) : (i32) -> () +^bb2(%arg3 : i16): + "foo.return"(%arg3) : (i16) -> () +} + +// Check that merging does not occur if the number of the arguments differ. + +// CHECK-LABEL: func @mismatch_argument_count( +func @mismatch_argument_count(%cond : i1, %arg0 : i32) { + // CHECK: cond_br %{{.*}}, ^bb1(%{{.*}}), ^bb2 + + cond_br %cond, ^bb1(%arg0 : i32), ^bb2 + +^bb1(%arg2 : i32): + "foo.return"(%arg2) : (i32) -> () +^bb2: + "foo.return"() : () -> () +} + +// Check that merging does not occur if the operations differ. + +// CHECK-LABEL: func @mismatch_operations( +func @mismatch_operations(%cond : i1) { + // CHECK: cond_br %{{.*}}, ^bb1, ^bb2 + + cond_br %cond, ^bb1, ^bb2 + +^bb1: + "foo.return"() : () -> () +^bb2: + return +} + +// Check that merging does not occur if the number of operations differ. + +// CHECK-LABEL: func @mismatch_operation_count( +func @mismatch_operation_count(%cond : i1) { + // CHECK: cond_br %{{.*}}, ^bb1, ^bb2 + + cond_br %cond, ^bb1, ^bb2 + +^bb1: + "foo.op"() : () -> () + return +^bb2: + return +} + +// Check that merging does not occur if the blocks contain regions. + +// CHECK-LABEL: func @contains_regions( +func @contains_regions(%cond : i1) { + // CHECK: cond_br %{{.*}}, ^bb1, ^bb2 + + cond_br %cond, ^bb1, ^bb2 + +^bb1: + loop.if %cond { + "foo.op"() : () -> () + } + return +^bb2: + loop.if %cond { + "foo.op"() : () -> () + } + return +} + +// Check that properly handles back edges and the case where a value from one +// block is used in another. + +// CHECK-LABEL: func @mismatch_loop( +// CHECK-SAME: %[[ARG:.*]]: i1 +func @mismatch_loop(%cond : i1) { + // CHECK: cond_br %{{.*}}, ^bb1(%[[ARG]] : i1), ^bb2 + + cond_br %cond, ^bb2, ^bb3 + +^bb1: + // CHECK: ^bb1(%[[ARG2:.*]]: i1): + // CHECK-NEXT: %[[LOOP_CARRY:.*]] = "foo.op" + // CHECK-NEXT: cond_br %[[ARG2]], ^bb1(%[[LOOP_CARRY]] : i1), ^bb2 + + %ignored = "foo.op"() : () -> (i1) + cond_br %cond2, ^bb1, ^bb3 + +^bb2: + %cond2 = "foo.op"() : () -> (i1) + cond_br %cond, ^bb1, ^bb3 + +^bb3: + // CHECK: ^bb2: + // CHECK-NEXT: return + + return +} diff --git a/mlir/test/Transforms/canonicalize-dce.mlir b/mlir/test/Transforms/canonicalize-dce.mlir index b93af002823a8..6028821934ff0 100644 --- a/mlir/test/Transforms/canonicalize-dce.mlir +++ b/mlir/test/Transforms/canonicalize-dce.mlir @@ -62,10 +62,6 @@ func @f(%arg0: f32) { // Test case: Delete block arguments for cond_br. // CHECK: func @f(%arg0: f32, %arg1: i1) -// CHECK-NEXT: cond_br %arg1, ^bb1, ^bb2 -// CHECK-NEXT: ^bb1: -// CHECK-NEXT: return -// CHECK-NEXT: ^bb2: // CHECK-NEXT: return func @f(%arg0: f32, %pred: i1) { diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 6528d10ad5cf8..1cff314d731a7 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(canonicalize)' -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(canonicalize)' -split-input-file | FileCheck %s -dump-input-on-failure // CHECK-LABEL: func @test_subi_zero func @test_subi_zero(%arg0: i32) -> i32 { @@ -310,26 +310,26 @@ func @xor_self_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> { } // CHECK-LABEL: func @memref_cast_folding -func @memref_cast_folding(%arg0: memref<4 x f32>, %arg1: f32) -> f32 { - %1 = memref_cast %arg0 : memref<4xf32> to memref +func @memref_cast_folding(%arg0: memref<4 x f32>, %arg1: f32) -> (f32, f32) { + %0 = memref_cast %arg0 : memref<4xf32> to memref // CHECK-NEXT: %c0 = constant 0 : index %c0 = constant 0 : index - %dim = dim %1, 0 : memref + %dim = dim %0, 0 : memref // CHECK-NEXT: affine.load %arg0[3] - affine.load %1[%dim - 1] : memref + %1 = affine.load %0[%dim - 1] : memref // CHECK-NEXT: store %arg1, %arg0[%c0] : memref<4xf32> - store %arg1, %1[%c0] : memref + store %arg1, %0[%c0] : memref // CHECK-NEXT: %{{.*}} = load %arg0[%c0] : memref<4xf32> - %0 = load %1[%c0] : memref + %2 = load %0[%c0] : memref // CHECK-NEXT: dealloc %arg0 : memref<4xf32> - dealloc %1: memref + dealloc %0: memref // CHECK-NEXT: return %{{.*}} - return %0 : f32 + return %1, %2 : f32, f32 } // CHECK-LABEL: func @alloc_const_fold @@ -361,19 +361,15 @@ func @dead_dealloc_fold() { // CHECK-LABEL: func @dead_dealloc_fold_multi_use func @dead_dealloc_fold_multi_use(%cond : i1) { - // CHECK-NEXT: cond_br + // CHECK-NEXT: return %a = alloc() : memref<4xf32> cond_br %cond, ^bb1, ^bb2 - // CHECK-LABEL: bb1: ^bb1: - // CHECK-NEXT: return dealloc %a: memref<4xf32> return - // CHECK-LABEL: bb2: ^bb2: - // CHECK-NEXT: return dealloc %a: memref<4xf32> return } @@ -869,7 +865,8 @@ func @remove_dead_else(%M : memref<100 x i32>) { affine.load %M[%i] : memref<100xi32> affine.if affine_set<(d0) : (d0 - 2 >= 0)>(%i) { affine.for %j = 0 to 100 { - affine.load %M[%j] : memref<100xi32> + %1 = affine.load %M[%j] : memref<100xi32> + "prevent.dce"(%1) : (i32) -> () } } else { // Nothing @@ -881,6 +878,44 @@ func @remove_dead_else(%M : memref<100 x i32>) { // CHECK: affine.if // CHECK-NEXT: affine.for // CHECK-NEXT: affine.load +// CHECK-NEXT: "prevent.dce" // CHECK-NEXT: } // CHECK-NEXT: } -// CHECK-NEXT: affine.load + +// ----- + +// CHECK-LABEL: func @divi_signed_by_one +// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]] +func @divi_signed_by_one(%arg0: i32) -> (i32) { + %c1 = constant 1 : i32 + %res = divi_signed %arg0, %c1 : i32 + // CHECK: return %[[ARG]] + return %res : i32 +} + +// CHECK-LABEL: func @divi_unsigned_by_one +// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]] +func @divi_unsigned_by_one(%arg0: i32) -> (i32) { + %c1 = constant 1 : i32 + %res = divi_unsigned %arg0, %c1 : i32 + // CHECK: return %[[ARG]] + return %res : i32 +} + +// CHECK-LABEL: func @tensor_divi_signed_by_one +// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]] +func @tensor_divi_signed_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> { + %c1 = constant dense<1> : tensor<4x5xi32> + %res = divi_signed %arg0, %c1 : tensor<4x5xi32> + // CHECK: return %[[ARG]] + return %res : tensor<4x5xi32> +} + +// CHECK-LABEL: func @tensor_divi_unsigned_by_one +// CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]] +func @tensor_divi_unsigned_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> { + %c1 = constant dense<1> : tensor<4x5xi32> + %res = divi_unsigned %arg0, %c1 : tensor<4x5xi32> + // CHECK: return %[[ARG]] + return %res : tensor<4x5xi32> +} diff --git a/mlir/test/Transforms/loop-fusion-transformation.mlir b/mlir/test/Transforms/loop-fusion-transformation.mlir index 1a01063016272..1b335fa1669a7 100644 --- a/mlir/test/Transforms/loop-fusion-transformation.mlir +++ b/mlir/test/Transforms/loop-fusion-transformation.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-loop-fusion -test-loop-fusion-transformation -split-input-file -canonicalize | FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -test-loop-fusion -test-loop-fusion-transformation -split-input-file -canonicalize | FileCheck %s // CHECK-LABEL: func @slice_depth1_loop_nest() { func @slice_depth1_loop_nest() { @@ -9,10 +9,12 @@ func @slice_depth1_loop_nest() { } affine.for %i1 = 0 to 5 { %1 = affine.load %0[%i1] : memref<100xf32> + "prevent.dce"(%1) : (f32) -> () } // CHECK: affine.for %[[IV0:.*]] = 0 to 5 { // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%[[IV0]]] : memref<100xf32> // CHECK-NEXT: affine.load %{{.*}}[%[[IV0]]] : memref<100xf32> + // CHECK-NEXT: "prevent.dce"(%1) : (f32) -> () // CHECK-NEXT: } // CHECK-NEXT: return return @@ -74,15 +76,16 @@ func @should_fuse_avoiding_dependence_cycle() { // 3) loop1 -> loop2 on memref '%{{.*}}' affine.for %i0 = 0 to 10 { %v0 = affine.load %a[%i0] : memref<10xf32> - affine.store %cf7, %b[%i0] : memref<10xf32> + affine.store %v0, %b[%i0] : memref<10xf32> } affine.for %i1 = 0 to 10 { affine.store %cf7, %a[%i1] : memref<10xf32> %v1 = affine.load %c[%i1] : memref<10xf32> + "prevent.dce"(%v1) : (f32) -> () } affine.for %i2 = 0 to 10 { %v2 = affine.load %b[%i2] : memref<10xf32> - affine.store %cf7, %c[%i2] : memref<10xf32> + affine.store %v2, %c[%i2] : memref<10xf32> } // Fusing loop first loop into last would create a cycle: // {1} <--> {0, 2} @@ -97,6 +100,7 @@ func @should_fuse_avoiding_dependence_cycle() { // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32> + // CHECK-NEXT: "prevent.dce" // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32> // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> // CHECK-NEXT: } diff --git a/mlir/test/Transforms/memref-normalize.mlir b/mlir/test/Transforms/memref-normalize.mlir index 2ed69da5fb0d0..6254899b1fe8e 100644 --- a/mlir/test/Transforms/memref-normalize.mlir +++ b/mlir/test/Transforms/memref-normalize.mlir @@ -1,11 +1,12 @@ -// RUN: mlir-opt -simplify-affine-structures %s | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect -simplify-affine-structures %s | FileCheck %s // CHECK-LABEL: func @permute() func @permute() { %A = alloc() : memref<64x256xf32, affine_map<(d0, d1) -> (d1, d0)>> affine.for %i = 0 to 64 { affine.for %j = 0 to 256 { - affine.load %A[%i, %j] : memref<64x256xf32, affine_map<(d0, d1) -> (d1, d0)>> + %1 = affine.load %A[%i, %j] : memref<64x256xf32, affine_map<(d0, d1) -> (d1, d0)>> + "prevent.dce"(%1) : (f32) -> () } } dealloc %A : memref<64x256xf32, affine_map<(d0, d1) -> (d1, d0)>> @@ -17,6 +18,7 @@ func @permute() { // CHECK-NEXT: affine.for %[[I:arg[0-9]+]] = 0 to 64 { // CHECK-NEXT: affine.for %[[J:arg[0-9]+]] = 0 to 256 { // CHECK-NEXT: affine.load [[MEM]][%[[J]], %[[I]]] : memref<256x64xf32> +// CHECK-NEXT: "prevent.dce" // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: dealloc [[MEM]] @@ -29,7 +31,8 @@ func @shift(%idx : index) { // CHECK-NEXT: affine.load %{{.*}}[symbol(%arg0) + 1] : memref<65xf32> affine.load %A[%idx] : memref<64xf32, affine_map<(d0) -> (d0 + 1)>> affine.for %i = 0 to 64 { - affine.load %A[%i] : memref<64xf32, affine_map<(d0) -> (d0 + 1)>> + %1 = affine.load %A[%i] : memref<64xf32, affine_map<(d0) -> (d0 + 1)>> + "prevent.dce"(%1) : (f32) -> () // CHECK: %{{.*}} = affine.load %{{.*}}[%arg{{.*}} + 1] : memref<65xf32> } return @@ -45,8 +48,9 @@ func @high_dim_permute() { affine.for %j = 0 to 128 { // CHECK: %[[K:arg[0-9]+]] affine.for %k = 0 to 256 { - affine.load %A[%i, %j, %k] : memref<64x128x256xf32, affine_map<(d0, d1, d2) -> (d2, d0, d1)>> + %1 = affine.load %A[%i, %j, %k] : memref<64x128x256xf32, affine_map<(d0, d1, d2) -> (d2, d0, d1)>> // CHECK: %{{.*}} = affine.load %{{.*}}[%[[K]], %[[I]], %[[J]]] : memref<256x64x128xf32> + "prevent.dce"(%1) : (f32) -> () } } } @@ -66,7 +70,8 @@ func @data_tiling(%idx : index) { // CHECK: alloc() : memref<8x32x8x16xf32> %A = alloc() : memref<64x512xf32, affine_map<(d0, d1) -> (d0 floordiv 8, d1 floordiv 16, d0 mod 8, d1 mod 16)>> // CHECK: affine.load %{{.*}}[symbol(%arg0) floordiv 8, symbol(%arg0) floordiv 16, symbol(%arg0) mod 8, symbol(%arg0) mod 16] - affine.load %A[%idx, %idx] : memref<64x512xf32, affine_map<(d0, d1) -> (d0 floordiv 8, d1 floordiv 16, d0 mod 8, d1 mod 16)>> + %1 = affine.load %A[%idx, %idx] : memref<64x512xf32, affine_map<(d0, d1) -> (d0 floordiv 8, d1 floordiv 16, d0 mod 8, d1 mod 16)>> + "prevent.dce"(%1) : (f32) -> () return } @@ -79,7 +84,8 @@ func @strided() { // CHECK: affine.for %[[IV1:.*]] = affine.for %j = 0 to 128 { // CHECK: affine.load %{{.*}}[%[[IV0]] * 2, %[[IV1]] * 4] : memref<127x509xf32> - affine.load %A[%i, %j] : memref<64x128xf32, affine_map<(d0, d1) -> (2*d0, 4*d1)>> + %1 = affine.load %A[%i, %j] : memref<64x128xf32, affine_map<(d0, d1) -> (2*d0, 4*d1)>> + "prevent.dce"(%1) : (f32) -> () } } return @@ -94,7 +100,8 @@ func @strided_cumulative() { // CHECK: affine.for %[[IV1:.*]] = affine.for %j = 0 to 5 { // CHECK: affine.load %{{.*}}[%[[IV0]] * 3 + %[[IV1]] * 17] : memref<72xf32> - affine.load %A[%i, %j] : memref<2x5xf32, affine_map<(d0, d1) -> (3*d0 + 17*d1)>> + %1 = affine.load %A[%i, %j] : memref<2x5xf32, affine_map<(d0, d1) -> (3*d0 + 17*d1)>> + "prevent.dce"(%1) : (f32) -> () } } return @@ -109,7 +116,8 @@ func @symbolic_operands(%s : index) { affine.for %i = 0 to 10 { affine.for %j = 0 to 10 { // CHECK: affine.load %{{.*}}[%{{.*}} * 10 + %{{.*}}] : memref<100xf32> - affine.load %A[%i, %j] : memref<10x10xf32, affine_map<(d0,d1)[s0] -> (10*d0 + d1)>> + %1 = affine.load %A[%i, %j] : memref<10x10xf32, affine_map<(d0,d1)[s0] -> (10*d0 + d1)>> + "prevent.dce"(%1) : (f32) -> () } } return diff --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir index 6fcb78cf4c567..55c851dce9137 100644 --- a/mlir/test/Transforms/parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir @@ -37,11 +37,9 @@ func @parallel_many_dims() { // CHECK: [[C2:%.*]] = constant 2 : index // CHECK: loop.parallel ([[NEW_I0:%.*]], [[NEW_I1:%.*]], [[NEW_I2:%.*]]) = ([[C0]], [[C0]], [[C0]]) to ([[C2]], [[C1]], [[C1]]) step ([[C1]], [[C1]], [[C1]]) { // CHECK: [[I0:%.*]] = remi_signed [[NEW_I0]], [[C2]] : index -// CHECK: [[I3_COUNT:%.*]] = divi_signed [[NEW_I0]], [[C1]] : index -// CHECK: [[I4_COUNT:%.*]] = divi_signed [[NEW_I1]], [[C1]] : index -// CHECK: [[VAL_16:%.*]] = muli [[I4_COUNT]], [[C13]] : index +// CHECK: [[VAL_16:%.*]] = muli [[NEW_I1]], [[C13]] : index // CHECK: [[I4:%.*]] = addi [[VAL_16]], [[C12]] : index -// CHECK: [[VAL_18:%.*]] = muli [[I3_COUNT]], [[C10]] : index +// CHECK: [[VAL_18:%.*]] = muli [[NEW_I0]], [[C10]] : index // CHECK: [[I3:%.*]] = addi [[VAL_18]], [[C9]] : index // CHECK: [[VAL_20:%.*]] = muli [[NEW_I2]], [[C7]] : index // CHECK: [[I2:%.*]] = addi [[VAL_20]], [[C6]] : index diff --git a/mlir/test/Transforms/sccp-callgraph.mlir b/mlir/test/Transforms/sccp-callgraph.mlir new file mode 100644 index 0000000000000..5d47a277df931 --- /dev/null +++ b/mlir/test/Transforms/sccp-callgraph.mlir @@ -0,0 +1,257 @@ +// RUN: mlir-opt -allow-unregistered-dialect %s -sccp -split-input-file | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="module(sccp)" -split-input-file | FileCheck %s --check-prefix=NESTED -dump-input-on-failure + +/// Check that a constant is properly propagated through the arguments and +/// results of a private function. + +// CHECK-LABEL: func @private( +func @private(%arg0 : i32) -> i32 attributes { sym_visibility = "private" } { + // CHECK: %[[CST:.*]] = constant 1 : i32 + // CHECK: return %[[CST]] : i32 + + return %arg0 : i32 +} + +// CHECK-LABEL: func @simple_private( +func @simple_private() -> i32 { + // CHECK: %[[CST:.*]] = constant 1 : i32 + // CHECK: return %[[CST]] : i32 + + %1 = constant 1 : i32 + %result = call @private(%1) : (i32) -> i32 + return %result : i32 +} + +// ----- + +/// Check that a constant is properly propagated through the arguments and +/// results of a visible nested function. + +// CHECK: func @nested( +func @nested(%arg0 : i32) -> i32 attributes { sym_visibility = "nested" } { + // CHECK: %[[CST:.*]] = constant 1 : i32 + // CHECK: return %[[CST]] : i32 + + return %arg0 : i32 +} + +// CHECK-LABEL: func @simple_nested( +func @simple_nested() -> i32 { + // CHECK: %[[CST:.*]] = constant 1 : i32 + // CHECK: return %[[CST]] : i32 + + %1 = constant 1 : i32 + %result = call @nested(%1) : (i32) -> i32 + return %result : i32 +} + +// ----- + +/// Check that non-visible nested functions do not track arguments. +module { + // NESTED-LABEL: module @nested_module + module @nested_module attributes { sym_visibility = "public" } { + + // NESTED: func @nested( + func @nested(%arg0 : i32) -> (i32, i32) attributes { sym_visibility = "nested" } { + // NESTED: %[[CST:.*]] = constant 1 : i32 + // NESTED: return %[[CST]], %arg0 : i32, i32 + + %1 = constant 1 : i32 + return %1, %arg0 : i32, i32 + } + + // NESTED: func @nested_not_all_uses_visible( + func @nested_not_all_uses_visible() -> (i32, i32) { + // NESTED: %[[CST:.*]] = constant 1 : i32 + // NESTED: %[[CALL:.*]]:2 = call @nested + // NESTED: return %[[CST]], %[[CALL]]#1 : i32, i32 + + %1 = constant 1 : i32 + %result:2 = call @nested(%1) : (i32) -> (i32, i32) + return %result#0, %result#1 : i32, i32 + } + } +} + +// ----- + +/// Check that public functions do not track arguments. + +// CHECK-LABEL: func @public( +func @public(%arg0 : i32) -> (i32, i32) attributes { sym_visibility = "public" } { + %1 = constant 1 : i32 + return %1, %arg0 : i32, i32 +} + +// CHECK-LABEL: func @simple_public( +func @simple_public() -> (i32, i32) { + // CHECK: %[[CST:.*]] = constant 1 : i32 + // CHECK: %[[CALL:.*]]:2 = call @public + // CHECK: return %[[CST]], %[[CALL]]#1 : i32, i32 + + %1 = constant 1 : i32 + %result:2 = call @public(%1) : (i32) -> (i32, i32) + return %result#0, %result#1 : i32, i32 +} + +// ----- + +/// Check that functions with non-call users don't have arguments tracked. + +func @callable(%arg0 : i32) -> (i32, i32) attributes { sym_visibility = "private" } { + %1 = constant 1 : i32 + return %1, %arg0 : i32, i32 +} + +// CHECK-LABEL: func @non_call_users( +func @non_call_users() -> (i32, i32) { + // CHECK: %[[CST:.*]] = constant 1 : i32 + // CHECK: %[[CALL:.*]]:2 = call @callable + // CHECK: return %[[CST]], %[[CALL]]#1 : i32, i32 + + %1 = constant 1 : i32 + %result:2 = call @callable(%1) : (i32) -> (i32, i32) + return %result#0, %result#1 : i32, i32 +} + +"live.user"() {uses = [@callable]} : () -> () + +// ----- + +/// Check that return values are overdefined in the presence of an unknown terminator. + +func @callable(%arg0 : i32) -> i32 attributes { sym_visibility = "private" } { + "unknown.return"(%arg0) : (i32) -> () +} + +// CHECK-LABEL: func @unknown_terminator( +func @unknown_terminator() -> i32 { + // CHECK: %[[CALL:.*]] = call @callable + // CHECK: return %[[CALL]] : i32 + + %1 = constant 1 : i32 + %result = call @callable(%1) : (i32) -> i32 + return %result : i32 +} + +// ----- + +/// Check that return values are overdefined when the constant conflicts. + +func @callable(%arg0 : i32) -> i32 attributes { sym_visibility = "private" } { + "unknown.return"(%arg0) : (i32) -> () +} + +// CHECK-LABEL: func @conflicting_constant( +func @conflicting_constant() -> (i32, i32) { + // CHECK: %[[CALL1:.*]] = call @callable + // CHECK: %[[CALL2:.*]] = call @callable + // CHECK: return %[[CALL1]], %[[CALL2]] : i32, i32 + + %1 = constant 1 : i32 + %2 = constant 2 : i32 + %result = call @callable(%1) : (i32) -> i32 + %result2 = call @callable(%2) : (i32) -> i32 + return %result, %result2 : i32, i32 +} + +// ----- + +/// Check that return values are overdefined when the constant conflicts with a +/// non-constant. + +func @callable(%arg0 : i32) -> i32 attributes { sym_visibility = "private" } { + "unknown.return"(%arg0) : (i32) -> () +} + +// CHECK-LABEL: func @conflicting_constant( +func @conflicting_constant(%arg0 : i32) -> (i32, i32) { + // CHECK: %[[CALL1:.*]] = call @callable + // CHECK: %[[CALL2:.*]] = call @callable + // CHECK: return %[[CALL1]], %[[CALL2]] : i32, i32 + + %1 = constant 1 : i32 + %result = call @callable(%1) : (i32) -> i32 + %result2 = call @callable(%arg0) : (i32) -> i32 + return %result, %result2 : i32, i32 +} + +// ----- + +/// Check a more complex interaction with calls and control flow. + +// CHECK-LABEL: func @complex_inner_if( +func @complex_inner_if(%arg0 : i32) -> i32 attributes { sym_visibility = "private" } { + // CHECK-DAG: %[[TRUE:.*]] = constant 1 : i1 + // CHECK-DAG: %[[CST:.*]] = constant 1 : i32 + // CHECK: cond_br %[[TRUE]], ^bb1 + + %cst_20 = constant 20 : i32 + %cond = cmpi "ult", %arg0, %cst_20 : i32 + cond_br %cond, ^bb1, ^bb2 + +^bb1: + // CHECK: ^bb1: + // CHECK: return %[[CST]] : i32 + + %cst_1 = constant 1 : i32 + return %cst_1 : i32 + +^bb2: + %cst_1_2 = constant 1 : i32 + %arg_inc = addi %arg0, %cst_1_2 : i32 + return %arg_inc : i32 +} + +func @complex_cond() -> i1 + +// CHECK-LABEL: func @complex_callee( +func @complex_callee(%arg0 : i32) -> i32 attributes { sym_visibility = "private" } { + // CHECK: %[[CST:.*]] = constant 1 : i32 + + %loop_cond = call @complex_cond() : () -> i1 + cond_br %loop_cond, ^bb1, ^bb2 + +^bb1: + // CHECK: ^bb1: + // CHECK-NEXT: return %[[CST]] : i32 + return %arg0 : i32 + +^bb2: + // CHECK: ^bb2: + // CHECK: call @complex_inner_if(%[[CST]]) : (i32) -> i32 + // CHECK: call @complex_callee(%[[CST]]) : (i32) -> i32 + // CHECK: return %[[CST]] : i32 + + %updated_arg = call @complex_inner_if(%arg0) : (i32) -> i32 + %res = call @complex_callee(%updated_arg) : (i32) -> i32 + return %res : i32 +} + +// CHECK-LABEL: func @complex_caller( +func @complex_caller(%arg0 : i32) -> i32 { + // CHECK: %[[CST:.*]] = constant 1 : i32 + // CHECK: return %[[CST]] : i32 + + %1 = constant 1 : i32 + %result = call @complex_callee(%1) : (i32) -> i32 + return %result : i32 +} + +// ----- + +/// Check that non-symbol defining callables currently go to overdefined. + +// CHECK-LABEL: func @non_symbol_defining_callable +func @non_symbol_defining_callable() -> i32 { + // CHECK: %[[RES:.*]] = call_indirect + // CHECK: return %[[RES]] : i32 + + %fn = "test.functional_region_op"() ({ + %1 = constant 1 : i32 + "test.return"(%1) : (i32) -> () + }) : () -> (() -> i32) + %res = call_indirect %fn() : () -> (i32) + return %res : i32 +} diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index 5c5434446abe8..b1f9ffe88b2b4 100644 --- a/mlir/test/Transforms/test-legalizer.mlir +++ b/mlir/test/Transforms/test-legalizer.mlir @@ -4,6 +4,7 @@ func @verifyDirectPattern() -> i32 { // CHECK-NEXT: "test.legal_op_a"() {status = "Success"} %result = "test.illegal_op_a"() : () -> (i32) + // expected-remark@+1 {{op 'std.return' is not legalizable}} return %result : i32 } @@ -11,6 +12,7 @@ func @verifyDirectPattern() -> i32 { func @verifyLargerBenefit() -> i32 { // CHECK-NEXT: "test.legal_op_a"() {status = "Success"} %result = "test.illegal_op_c"() : () -> (i32) + // expected-remark@+1 {{op 'std.return' is not legalizable}} return %result : i32 } @@ -26,7 +28,9 @@ func @remap_input_1_to_1(%arg0: i64) { // CHECK-LABEL: func @remap_call_1_to_1(%arg0: f64) func @remap_call_1_to_1(%arg0: i64) { // CHECK-NEXT: call @remap_input_1_to_1(%arg0) : (f64) -> () + // expected-remark@+1 {{op 'std.call' is not legalizable}} call @remap_input_1_to_1(%arg0) : (i64) -> () + // expected-remark@+1 {{op 'std.return' is not legalizable}} return } @@ -40,6 +44,7 @@ func @remap_input_1_to_N(%arg0: f32) -> f32 { func @remap_input_1_to_N_remaining_use(%arg0: f32) { // CHECK-NEXT: [[CAST:%.*]] = "test.cast"(%arg0, %arg1) : (f16, f16) -> f32 // CHECK-NEXT: "work"([[CAST]]) : (f32) -> () + // expected-remark@+1 {{op 'work' is not legalizable}} "work"(%arg0) : (f32) -> () } @@ -47,6 +52,7 @@ func @remap_input_1_to_N_remaining_use(%arg0: f32) { func @remap_input_to_self(%arg0: index) { // CHECK-NOT: test.cast // CHECK: "work" + // expected-remark@+1 {{op 'work' is not legalizable}} "work"(%arg0) : (index) -> () } @@ -59,12 +65,14 @@ func @remap_multi(%arg0: i64, %unused: i16, %arg1: i64) -> (i64, i64) { // CHECK-LABEL: func @no_remap_nested func @no_remap_nested() { // CHECK-NEXT: "foo.region" + // expected-remark@+1 {{op 'foo.region' is not legalizable}} "foo.region"() ({ // CHECK-NEXT: ^bb0(%{{.*}}: i64, %{{.*}}: i16, %{{.*}}: i64): ^bb0(%i0: i64, %unused: i16, %i1: i64): // CHECK-NEXT: "test.valid"{{.*}} : (i64, i64) "test.invalid"(%i0, %i1) : (i64, i64) -> () }) : () -> () + // expected-remark@+1 {{op 'std.return' is not legalizable}} return } @@ -78,6 +86,7 @@ func @remap_moved_region_args() { ^bb1(%i0: i64, %unused: i16, %i1: i64, %2: f32): "test.invalid"(%i0, %i1, %2) : (i64, i64, f32) -> () }) : () -> () + // expected-remark@+1 {{op 'std.return' is not legalizable}} return } @@ -91,6 +100,7 @@ func @remap_cloned_region_args() { ^bb1(%i0: i64, %unused: i16, %i1: i64, %2: f32): "test.invalid"(%i0, %i1, %2) : (i64, i64, f32) -> () }) {legalizer.should_clone} : () -> () + // expected-remark@+1 {{op 'std.return' is not legalizable}} return } @@ -102,6 +112,7 @@ func @remap_drop_region() { ^bb1(%i0: i64, %unused: i16, %i1: i64, %2: f32): "test.invalid"(%i0, %i1, %2) : (i64, i64, f32) -> () }) : () -> () + // expected-remark@+1 {{op 'std.return' is not legalizable}} return } @@ -109,6 +120,7 @@ func @remap_drop_region() { func @dropped_input_in_use(%arg: i16, %arg2: i64) { // CHECK-NEXT: "test.cast"{{.*}} : () -> i16 // CHECK-NEXT: "work"{{.*}} : (i16) + // expected-remark@+1 {{op 'work' is not legalizable}} "work"(%arg) : (i16) -> () } @@ -117,6 +129,7 @@ func @up_to_date_replacement(%arg: i8) -> i8 { // CHECK-NEXT: return %repl_1 = "test.rewrite"(%arg) : (i8) -> i8 %repl_2 = "test.rewrite"(%repl_1) : (i8) -> i8 + // expected-remark@+1 {{op 'std.return' is not legalizable}} return %repl_2 : i8 } @@ -127,11 +140,13 @@ func @remove_foldable_op(%arg0 : i32) -> (i32) { %0 = "test.op_with_region_fold"(%arg0) ({ "foo.op_with_region_terminator"() : () -> () }) : (i32) -> (i32) + // expected-remark@+1 {{op 'std.return' is not legalizable}} return %0 : i32 } // CHECK-LABEL: @create_block func @create_block() { + // expected-remark@+1 {{op 'test.container' is not legalizable}} "test.container"() ({ // Check that we created a block with arguments. // CHECK-NOT: test.create_block @@ -140,6 +155,7 @@ func @create_block() { "test.create_block"() : () -> () "test.finish"() : () -> () }) : () -> () + // expected-remark@+1 {{op 'std.return' is not legalizable}} return } @@ -147,6 +163,7 @@ func @create_block() { func @bounded_recursion() { // CHECK: test.recursive_rewrite 0 test.recursive_rewrite 3 + // expected-remark@+1 {{op 'std.return' is not legalizable}} return } @@ -188,13 +205,16 @@ func @fail_to_convert_region() { // CHECK-LABEL: @create_illegal_block func @create_illegal_block() { + // expected-remark@+1 {{op 'test.container' is not legalizable}} "test.container"() ({ // Check that we can undo block creation, i.e. that the block was removed. // CHECK: test.create_illegal_block // CHECK-NOT: ^{{.*}}(%{{.*}}: i32, %{{.*}}: i32): + // expected-remark@+1 {{op 'test.create_illegal_block' is not legalizable}} "test.create_illegal_block"() : () -> () "test.finish"() : () -> () }) : () -> () + // expected-remark@+1 {{op 'std.return' is not legalizable}} return } @@ -202,6 +222,7 @@ func @create_illegal_block() { // CHECK-LABEL: @undo_block_arg_replace func @undo_block_arg_replace() { + // expected-remark@+1 {{op 'test.undo_block_arg_replace' is not legalizable}} "test.undo_block_arg_replace"() ({ ^bb0(%arg0: i32): // CHECK: ^bb0(%[[ARG:.*]]: i32): @@ -209,5 +230,6 @@ func @undo_block_arg_replace() { "test.return"(%arg0) : (i32) -> () }) : () -> () + // expected-remark@+1 {{op 'std.return' is not legalizable}} return } diff --git a/mlir/test/lib/DeclarativeTransforms/CMakeLists.txt b/mlir/test/lib/DeclarativeTransforms/CMakeLists.txt index 0ac97b9291d36..67d194ff868a5 100644 --- a/mlir/test/lib/DeclarativeTransforms/CMakeLists.txt +++ b/mlir/test/lib/DeclarativeTransforms/CMakeLists.txt @@ -1,15 +1,3 @@ -set(LLVM_TARGET_DEFINITIONS TestLinalgTransformPatterns.td) -mlir_tablegen(TestLinalgTransformPatterns.h.inc -gen-rewriters) -add_public_tablegen_target(MLIRTestLinalgTransformPatternsIncGen) -# Including Linalg in TableGen requires to depends on generated files -add_dependencies(MLIRTestLinalgTransformPatternsIncGen LinalgOdsGen) - set(LLVM_TARGET_DEFINITIONS TestVectorTransformPatterns.td) mlir_tablegen(TestVectorTransformPatterns.h.inc -gen-rewriters) add_public_tablegen_target(MLIRTestVectorTransformPatternsIncGen) - -set(LLVM_TARGET_DEFINITIONS TestLinalgMatmulToVectorPatterns.td) -mlir_tablegen(TestLinalgMatmulToVectorPatterns.h.inc -gen-rewriters) -add_public_tablegen_target(MLIRTestLinalgMatmulToVectorPatternsIncGen) -# Including Linalg in TableGen requires to depends on generated files -add_dependencies(MLIRTestLinalgTransformPatternsIncGen LinalgOdsGen) diff --git a/mlir/test/lib/DeclarativeTransforms/TestLinalgMatmulToVectorPatterns.td b/mlir/test/lib/DeclarativeTransforms/TestLinalgMatmulToVectorPatterns.td deleted file mode 100644 index 7fa4a3db61286..0000000000000 --- a/mlir/test/lib/DeclarativeTransforms/TestLinalgMatmulToVectorPatterns.td +++ /dev/null @@ -1,43 +0,0 @@ -//===- TestLinalgMatmulToVectorPatterns.td - Test patterns -*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is the pattern definition file for declarative Linalg transformations -// tests. -// -//===----------------------------------------------------------------------===// - -#ifndef TEST_LINALG_MATMUL_TO_VECTOR_PATTERNS -#define TEST_LINALG_MATMUL_TO_VECTOR_PATTERNS - -include "mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td" -include "mlir/Dialect/Vector/VectorTransformPatterns.td" - -//===----------------------------------------------------------------------===// -// Linalg tiling and permutation patterns. -//===----------------------------------------------------------------------===// -def : Pat<(MatmulOp:$op $_, $_, $_), - (TileLinalgOp<[768, 264, 768], "L2__with_perm__", [1, 2, 0]>), - [(Constraint>)]>; -def : Pat<(MatmulOp:$op $_, $_, $_), - (TileLinalgOp<[8, 12, 16], "L1__with_perm__", [1, 0, 2]>), - [(Constraint>)]>; -def : Pat<(MatmulOp:$op $_, $_, $_), - (PromoteSubviewsLinalgOp), - [(Constraint>), - (Constraint>)]>; - -//===----------------------------------------------------------------------===// -// Linalg to vector contraction patterns. -//===----------------------------------------------------------------------===// -def : Pattern<(MatmulOp:$op $_, $_, $_), - [(VectorizeLinalgOp)], - [(Constraint, - PreconditionVectorizeLinalgOp]>>)]>; - -#endif // TEST_LINALG_MATMUL_TO_VECTOR_PATTERNS diff --git a/mlir/test/lib/DeclarativeTransforms/TestLinalgTransformPatterns.td b/mlir/test/lib/DeclarativeTransforms/TestLinalgTransformPatterns.td deleted file mode 100644 index 313e2f8171a8d..0000000000000 --- a/mlir/test/lib/DeclarativeTransforms/TestLinalgTransformPatterns.td +++ /dev/null @@ -1,168 +0,0 @@ -//===- TestLinalgTransformPatterns.td - Test patterns --*- tablegen ----*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is the pattern definition file for declarative Linalg transformations -// tests. -// -//===----------------------------------------------------------------------===// - -#ifndef TEST_LINALG_TRANSFORMS_PATTERNS -#define TEST_LINALG_TRANSFORMS_PATTERNS - -include "mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td" - -//===----------------------------------------------------------------------===// -// Test Linalg fusion patterns. -//===----------------------------------------------------------------------===// -def : Pat<(MatmulOp:$op $A, $_, $_), - (TileAndFuseLinalgOp<[100, 150], [0], "L1">), - [ - (Constraint), - (Constraint> $A), - ], - // In the buffer world there is no use-def chains or dags so benefits - // cannot be computed automatically from the length of the matched - // pattern. Instead we specify the benefit ourselves for now. - // This is not expected to be a big challenge long-term because - // pattern benefits are akin to feature engineering: features should - // be learned. - (addBenefit 1)>; - -//===----------------------------------------------------------------------===// -// Linalg tiling patterns. -//===----------------------------------------------------------------------===// -def : Pat<(MatmulOp:$op $_, $_, $_), - (TileLinalgOp<[2000, 3000, 4000], "L3">), - [(Constraint]>>)]>; -def : Pat<(MatmulOp:$op $_, $_, $_), - (TileLinalgOp<[200, 300, 400], "L2">), - [(Constraint>)]>; -def : Pat<(MatmulOp:$op $_, $_, $_), - (TileLinalgOp<[20, 30, 40], "L1">), - [(Constraint>)]>; -def : Pat<(MatmulOp:$op $_, $_, $_), - (TileLinalgOp<[2, 3, 4], "REG">), - [(Constraint>)]>; - -def : Pattern<(MatvecOp:$op $_, $_, $_), - [(TileLinalgOp<[5, 6], "L1">)], - [(Constraint)]>; - -def : Pattern<(DotOp:$op $_, $_, $_), - [(TileLinalgOp<[8000], "L1">)], - [(Constraint, - HasLinalgTransformMarker<"L3">, - HasLinalgTransformMarker<"L2">]>>)]>; -def : Pattern<(DotOp:$op $_, $_, $_), - [(TileLinalgOp<[8], "REG">)], - [(Constraint>)]>; - -//===----------------------------------------------------------------------===// -// Linalg tiling and permutation patterns. -//===----------------------------------------------------------------------===// -def : Pat<(MatmulOp:$op $_, $_, $_), - (TileLinalgOp<[2000, 3000, 4000], "L2__with_perm__", [1,2,0]>), - [(Constraint>)]>; -def : Pat<(MatmulOp:$op $_, $_, $_), - (TileLinalgOp<[200, 300, 400], "L1__with_perm__", [1,0,2]>), - [(Constraint>)]>; -def : Pat<(MatmulOp:$op $_, $_, $_), - (TileLinalgOp<[20, 30, 40], "REG__with_perm__">), - [(Constraint>)]>; - - -def : Pattern<(MatvecOp:$op $_, $_, $_), - [(TileLinalgOp<[5, 6], "L1__with_perm__", [1,0]>)], - [(Constraint>)]>; - -def : Pattern<(DotOp:$op $_, $_, $_), - [(TileLinalgOp<[8000], "L1__with_perm__">)], - [(Constraint>)]>; -def : Pattern<(DotOp:$op $_, $_, $_), - [(TileLinalgOp<[8], "REG__with_perm__">)], - [(Constraint>)]>; - -//===----------------------------------------------------------------------===// -// Linalg to loops patterns. -//===----------------------------------------------------------------------===// -def : Pattern<(DotOp:$op $_, $_, $_), - [(LinalgOpToLoops<"DotOp">)], - [(Constraint>)]>; - -//===----------------------------------------------------------------------===// -// Linalg to vector contraction patterns. -//===----------------------------------------------------------------------===// -def : Pattern<(MatmulOp:$op $_, $_, $_), - [(VectorizeLinalgOp)], - [(Constraint, - PreconditionVectorizeLinalgOp - ]>>)]>; -def : Pattern<(FillOp:$op $_, $_), - [(VectorizeLinalgOp)], - [(Constraint, - PreconditionVectorizeLinalgOp - ]>>)]>; -def : Pattern<(GenericOp:$op $_, $_, $_, $_, $_, $_, $_), - [(VectorizeLinalgOp)], - [(Constraint, - PreconditionVectorizeLinalgOp - ]>>)]>; - - -//===----------------------------------------------------------------------===// -// Linalg generic permutation patterns. -//===----------------------------------------------------------------------===// -def : Pat<(GenericOp:$op $_, $_, $_, $_, $_, $_, $_), - (PermuteGenericLinalgOp<[1, 2, 0], "PERMUTE"> $op), - [(Constraint, - PreconditionPermuteGenericLinalgOp<[1, 2, 0]> - ]>>)]>; - -def : Pat<(IndexedGenericOp:$op $_, $_, $_, $_, $_, $_, $_), - (PermuteGenericLinalgOp<[1, 2, 0], "PERMUTE"> $op), - [(Constraint, - PreconditionPermuteGenericLinalgOp<[1, 2, 0]> - ]>>)]>; - -//===----------------------------------------------------------------------===// -// Linalg subview operands promotion. -//===----------------------------------------------------------------------===// -def : Pat<(MatmulOp:$op $_, $_, $_), - (PromoteSubviewsLinalgOp), - [(Constraint, - HasLinalgTransformMarker<"_promote_views_">]>> - )]>; - -def : Pat<(MatmulOp:$op $_, $_, $_), - (PromoteSelectedSubviewsLinalgOp<[0], "first_view_promotion">), - [(Constraint, - HasLinalgTransformMarker<"_promote_first_view_">]>> - )]>; - -def : Pat<(FillOp:$op $_, $_), - (PromoteSelectedSubviewsLinalgOp<[0], "aligned_promotion", 32>), - [(Constraint, - HasLinalgTransformMarker<"_promote_views_aligned_">]>> - )]>; - -#endif // TEST_LINALG_TRANSFORMS_PATTERNS diff --git a/mlir/test/lib/Dialect/Affine/CMakeLists.txt b/mlir/test/lib/Dialect/Affine/CMakeLists.txt index 56195ba2e8758..68a0b06e0e318 100644 --- a/mlir/test/lib/Dialect/Affine/CMakeLists.txt +++ b/mlir/test/lib/Dialect/Affine/CMakeLists.txt @@ -1,16 +1,21 @@ -add_llvm_library(MLIRAffineTransformsTestPasses +# Exclude tests from libMLIR.so +add_mlir_library(MLIRAffineTransformsTestPasses TestAffineDataCopy.cpp TestAffineLoopUnswitching.cpp TestLoopPermutation.cpp TestParallelismDetection.cpp TestVectorizationUtils.cpp + EXCLUDE_FROM_LIBMLIR + ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Affine ${MLIR_MAIN_INCLUDE_DIR}/mlir/IR - ) -target_link_libraries(MLIRAffineTransformsTestPasses PRIVATE + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC MLIRIR MLIRPass MLIRAffineTransforms diff --git a/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt b/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt index 5035c9cc8d490..15d4673f381f3 100644 --- a/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt +++ b/mlir/test/lib/Dialect/SPIRV/CMakeLists.txt @@ -1,12 +1,14 @@ -add_llvm_library(MLIRSPIRVTestPasses +# Exclude tests from libMLIR.so +add_mlir_library(MLIRSPIRVTestPasses TestAvailability.cpp + EXCLUDE_FROM_LIBMLIR + ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SPIRV ${MLIR_MAIN_INCLUDE_DIR}/mlir/IR - ) -target_link_libraries(MLIRSPIRVTestPasses PRIVATE + LINK_LIBS PUBLIC MLIRIR MLIRPass MLIRSPIRV diff --git a/mlir/test/lib/Dialect/Test/CMakeLists.txt b/mlir/test/lib/Dialect/Test/CMakeLists.txt index 5b97e113d6841..542be7b6ac9ca 100644 --- a/mlir/test/lib/Dialect/Test/CMakeLists.txt +++ b/mlir/test/lib/Dialect/Test/CMakeLists.txt @@ -9,19 +9,22 @@ mlir_tablegen(TestOps.cpp.inc -gen-op-defs) mlir_tablegen(TestOpsDialect.h.inc -gen-dialect-decls) mlir_tablegen(TestOpEnums.h.inc -gen-enum-decls) mlir_tablegen(TestOpEnums.cpp.inc -gen-enum-defs) +mlir_tablegen(TestOpStructs.h.inc -gen-struct-attr-decls) +mlir_tablegen(TestOpStructs.cpp.inc -gen-struct-attr-defs) mlir_tablegen(TestPatterns.inc -gen-rewriters) add_public_tablegen_target(MLIRTestOpsIncGen) -add_llvm_library(MLIRTestDialect +# Exclude tests from libMLIR.so +add_mlir_library(MLIRTestDialect TestDialect.cpp TestPatterns.cpp + EXCLUDE_FROM_LIBMLIR + DEPENDS MLIRTestOpsIncGen -) -target_link_libraries(MLIRTestDialect - PUBLIC - LLVMSupport + + LINK_LIBS PUBLIC MLIRControlFlowInterfaces MLIRDerivedAttributeOpInterface MLIRDialect diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp index fa99d472676cd..1a40f9989eaea 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -167,13 +167,12 @@ TestDialect::verifyRegionResultAttribute(Operation *op, unsigned regionIndex, // TestBranchOp //===----------------------------------------------------------------------===// -Optional TestBranchOp::getSuccessorOperands(unsigned index) { +Optional +TestBranchOp::getMutableSuccessorOperands(unsigned index) { assert(index == 0 && "invalid successor index"); - return getOperands(); + return targetOperandsMutable(); } -bool TestBranchOp::canEraseSuccessorOperand() { return true; } - //===----------------------------------------------------------------------===// // Test IsolatedRegionOp - parse passthrough region arguments. //===----------------------------------------------------------------------===// @@ -201,6 +200,22 @@ static void print(OpAsmPrinter &p, IsolatedRegionOp op) { p.printRegion(op.region(), /*printEntryBlockArgs=*/false); } +//===----------------------------------------------------------------------===// +// Test PolyhedralScopeOp +//===----------------------------------------------------------------------===// + +static ParseResult parsePolyhedralScopeOp(OpAsmParser &parser, + OperationState &result) { + // Parse the body region, and reuse the operand info as the argument info. + Region *body = result.addRegion(); + return parser.parseRegion(*body, /*arguments=*/{}, /*argTypes=*/{}); +} + +static void print(OpAsmPrinter &p, PolyhedralScopeOp op) { + p << "test.polyhedral_scope "; + p.printRegion(op.region(), /*printEntryBlockArgs=*/false); +} + //===----------------------------------------------------------------------===// // Test parser. //===----------------------------------------------------------------------===// @@ -480,6 +495,7 @@ void StringAttrPrettyNameOp::getAsmResultNames( static mlir::DialectRegistration testDialect; #include "TestOpEnums.cpp.inc" +#include "TestOpStructs.cpp.inc" #define GET_OP_CLASSES #include "TestOps.cpp.inc" diff --git a/mlir/test/lib/Dialect/Test/TestDialect.h b/mlir/test/lib/Dialect/Test/TestDialect.h index b4ca125cb3d61..ea386c0586422 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.h +++ b/mlir/test/lib/Dialect/Test/TestDialect.h @@ -30,6 +30,7 @@ namespace mlir { +#include "TestOpStructs.h.inc" #include "TestOpsDialect.h.inc" #define GET_OP_CLASSES diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index d5259639f4b5f..f9140f2e9bdc0 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -9,8 +9,10 @@ #ifndef TEST_OPS #define TEST_OPS +include "mlir/Dialect/Affine/IR/AffineOpsBase.td" include "mlir/IR/OpBase.td" include "mlir/IR/OpAsmInterface.td" +include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/SideEffects.td" include "mlir/Interfaces/CallInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" @@ -195,6 +197,15 @@ def I64EnumAttrOp : TEST_Op<"i64_enum_attr"> { let results = (outs I32:$val); } +def SomeStructAttr : StructAttr<"SomeStructAttr", Test_Dialect, [ + StructFieldAttr<"some_field", I64Attr>, + StructFieldAttr<"some_other_field", I64Attr> +]> {} + +def StructAttrOp : TEST_Op<"struct_attr"> { + let arguments = (ins SomeStructAttr:$the_struct_attr); + let results = (outs); +} def IntAttrOp : TEST_Op<"int_attrs"> { let arguments = (ins @@ -737,9 +748,9 @@ def OpSymbolBindingB : TEST_Op<"symbol_binding_b", []> { let builders = [ OpBuilder< - "Builder *builder, OperationState &state, Value operand", + "OpBuilder &builder, OperationState &state, Value operand", [{ - state.types.assign({builder->getIntegerType(32)}); + state.types.assign({builder.getIntegerType(32)}); state.addOperands({operand}); }]> ]; @@ -849,10 +860,10 @@ def TwoResultOp : TEST_Op<"two_result"> { let builders = [ OpBuilder< - "Builder *builder, OperationState &state, IntegerAttr kind", + "OpBuilder &builder, OperationState &state, IntegerAttr kind", [{ - auto i32 = builder->getIntegerType(32); - auto f32 = builder->getF32Type(); + auto i32 = builder.getIntegerType(32); + auto f32 = builder.getF32Type(); state.types.assign({i32, f32}); state.addAttribute("kind", kind); }]> @@ -1005,9 +1016,9 @@ def MixedVResultOp3 : TEST_Op<"mixed_variadic_out3", // result type. So need to provide a builder not requiring result types. let builders = [ OpBuilder< - "Builder *builder, OperationState &state, IntegerAttr count", + "OpBuilder &builder, OperationState &state, IntegerAttr count", [{ - auto i32Type = builder->getIntegerType(32); + auto i32Type = builder.getIntegerType(32); state.addTypes(i32Type); // $output1 SmallVector types(count.getInt(), i32Type); state.addTypes(types); // $output2 @@ -1089,7 +1100,7 @@ def TestRecursiveRewriteOp : TEST_Op<"recursive_rewrite"> { //===----------------------------------------------------------------------===// def TestRegionBuilderOp : TEST_Op<"region_builder">; -def TestReturnOp : TEST_Op<"return", [Terminator]>, +def TestReturnOp : TEST_Op<"return", [ReturnLike, Terminator]>, Arguments<(ins Variadic)>; def TestCastOp : TEST_Op<"cast">, Arguments<(ins Variadic)>, Results<(outs AnyType)>; @@ -1128,6 +1139,17 @@ def IsolatedRegionOp : TEST_Op<"isolated_region", [IsolatedFromAbove]> { let printer = [{ return ::print(p, *this); }]; } +def PolyhedralScopeOp : TEST_Op<"polyhedral_scope", [PolyhedralScope]> { + let summary = "polyhedral scope operation"; + let description = [{ + Test op that defines a new polyhedral scope. + }]; + + let regions = (region SizedRegion<1>:$region); + let parser = [{ return ::parse$cppClass(parser, result); }]; + let printer = [{ return ::print(p, *this); }]; +} + def WrappingRegionOp : TEST_Op<"wrapping_region", [SingleBlockImplicitTerminator<"TestReturnOp">]> { let summary = "wrapping region operation"; @@ -1257,6 +1279,16 @@ def FormatOptionalOperandResultBOp : FormatOptionalOperandResultOpBase<"b", [{ (`[` $variadic^ `]`)? attr-dict }]>; +def FormatTwoVariadicOperandsNoBuildableTypeOp + : TEST_Op<"format_two_variadic_operands_no_buildable_type_op", + [AttrSizedOperandSegments]> { + let arguments = (ins Variadic:$a, + Variadic:$b); + let assemblyFormat = [{ + `(` $a `:` type($a) `)` `->` `(` $b `:` type($b) `)` attr-dict + }]; +} + //===----------------------------------------------------------------------===// // Test SideEffects //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index d21d59ca1e8b9..deb1cf5bb0758 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -77,7 +77,7 @@ static void invokeCreateWithInferredReturnType(Operation *op) { inferredReturnTypes))) { OperationState state(location, OpTy::getOperationName()); // TODO(jpienaar): Expand to regions. - OpTy::build(&b, state, values, op->getAttrs()); + OpTy::build(b, state, values, op->getAttrs()); (void)b.createOperation(state); } } @@ -515,8 +515,12 @@ struct TestLegalizePatternDriver // Handle a partial conversion. if (mode == ConversionMode::Partial) { - (void)applyPartialConversion(getOperation(), target, patterns, - &converter); + DenseSet unlegalizedOps; + (void)applyPartialConversion(getOperation(), target, patterns, &converter, + &unlegalizedOps); + // Emit remarks for each legalizable operation. + for (auto *op : unlegalizedOps) + op->emitRemark() << "op '" << op->getName() << "' is not legalizable"; return; } diff --git a/mlir/test/lib/IR/CMakeLists.txt b/mlir/test/lib/IR/CMakeLists.txt index c4e4ebc9b88ee..0a55a82b1c3d7 100644 --- a/mlir/test/lib/IR/CMakeLists.txt +++ b/mlir/test/lib/IR/CMakeLists.txt @@ -1,16 +1,15 @@ -add_llvm_library(MLIRTestIR +# Exclude tests from libMLIR.so +add_mlir_library(MLIRTestIR TestFunc.cpp TestMatchers.cpp TestSideEffects.cpp TestSymbolUses.cpp - ADDITIONAL_HEADER_DIRS - ) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../Dialect/Test) -include_directories(${CMAKE_CURRENT_BINARY_DIR}/../Dialect/Test) + EXCLUDE_FROM_LIBMLIR -target_link_libraries(MLIRTestIR - PUBLIC + LINK_LIBS PUBLIC MLIRPass MLIRTestDialect ) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../Dialect/Test) +include_directories(${CMAKE_CURRENT_BINARY_DIR}/../Dialect/Test) diff --git a/mlir/test/lib/IR/TestSymbolUses.cpp b/mlir/test/lib/IR/TestSymbolUses.cpp index 13188485ec418..0ec7f8258050a 100644 --- a/mlir/test/lib/IR/TestSymbolUses.cpp +++ b/mlir/test/lib/IR/TestSymbolUses.cpp @@ -66,7 +66,7 @@ struct SymbolUsesPass // Walk nested symbols. SmallVector deadFunctions; module.getBodyRegion().walk([&](Operation *nestedOp) { - if (SymbolTable::isSymbol(nestedOp)) + if (isa(nestedOp)) return operateOnSymbol(nestedOp, module, deadFunctions); return WalkResult::advance(); }); diff --git a/mlir/test/lib/Pass/CMakeLists.txt b/mlir/test/lib/Pass/CMakeLists.txt index 7d79e1137f40c..608141e349d10 100644 --- a/mlir/test/lib/Pass/CMakeLists.txt +++ b/mlir/test/lib/Pass/CMakeLists.txt @@ -1,11 +1,13 @@ -add_llvm_library(MLIRTestPass +# Exclude tests from libMLIR.so +add_mlir_library(MLIRTestPass TestPassManager.cpp + EXCLUDE_FROM_LIBMLIR + ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Pass - ) -target_link_libraries(MLIRTestPass - PUBLIC + + LINK_LIBS PUBLIC MLIRIR MLIRPass ) diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt index 0417bee750ffe..e7b31b3d0bcfe 100644 --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -1,5 +1,7 @@ -add_llvm_library(MLIRTestTransforms +# Exclude tests from libMLIR.so +add_mlir_library(MLIRTestTransforms TestAllReduceLowering.cpp + TestBufferPlacement.cpp TestCallGraph.cpp TestConstantFold.cpp TestConvertGPUKernelToCubin.cpp @@ -8,11 +10,11 @@ add_llvm_library(MLIRTestTransforms TestGpuMemoryPromotion.cpp TestGpuParallelLoopMapping.cpp TestInlining.cpp - TestLinalgMatmulToVector.cpp TestLinalgTransforms.cpp TestLiveness.cpp TestLoopMapping.cpp TestLoopParametricTiling.cpp + TestLoopUnrolling.cpp TestOpaqueLoc.cpp TestMemRefBoundCheck.cpp TestMemRefDependenceCheck.cpp @@ -20,23 +22,16 @@ add_llvm_library(MLIRTestTransforms TestVectorToLoopsConversion.cpp TestVectorTransforms.cpp + EXCLUDE_FROM_LIBMLIR + ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms DEPENDS MLIRStandardOpsIncGen - MLIRTestLinalgMatmulToVectorPatternsIncGen - MLIRTestLinalgTransformPatternsIncGen MLIRTestVectorTransformPatternsIncGen -) - -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../Dialect/Test) -include_directories(${CMAKE_CURRENT_BINARY_DIR}/../Dialect/Test) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../DeclarativeTransforms) -include_directories(${CMAKE_CURRENT_BINARY_DIR}/../DeclarativeTransforms) -target_link_libraries(MLIRTestTransforms - PUBLIC + LINK_LIBS PUBLIC MLIRAffineOps MLIRAnalysis MLIREDSC @@ -53,3 +48,8 @@ target_link_libraries(MLIRTestTransforms MLIRVectorToLoops MLIRVector ) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../Dialect/Test) +include_directories(${CMAKE_CURRENT_BINARY_DIR}/../Dialect/Test) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../DeclarativeTransforms) +include_directories(${CMAKE_CURRENT_BINARY_DIR}/../DeclarativeTransforms) diff --git a/mlir/test/lib/Transforms/TestBufferPlacement.cpp b/mlir/test/lib/Transforms/TestBufferPlacement.cpp new file mode 100644 index 0000000000000..03c6a2a72d50a --- /dev/null +++ b/mlir/test/lib/Transforms/TestBufferPlacement.cpp @@ -0,0 +1,152 @@ +//===- TestBufferPlacement.cpp - Test for buffer placement 0----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements logic for testing buffer placement including its +// utility converters. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/BufferPlacement.h" + +using namespace mlir; + +namespace { +/// This pass tests the computeAllocPosition helper method and two provided +/// operation converters, FunctionAndBlockSignatureConverter and +/// NonVoidToVoidReturnOpConverter. Furthermore, this pass converts linalg +/// operations on tensors to linalg operations on buffers to prepare them for +/// the BufferPlacement pass that can be applied afterwards. +struct TestBufferPlacementPreparationPass + : mlir::PassWrapper> { + + /// Converts tensor-type generic linalg operations to memref ones using buffer + /// assignment. + class GenericOpConverter + : public BufferAssignmentOpConversionPattern { + public: + using BufferAssignmentOpConversionPattern< + linalg::GenericOp>::BufferAssignmentOpConversionPattern; + + LogicalResult + matchAndRewrite(linalg::GenericOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + auto loc = op.getLoc(); + SmallVector args(operands.begin(), operands.end()); + + // Update all types to memref types. + auto results = op.getOperation()->getResults(); + for (auto result : results) { + auto type = result.getType().cast(); + if (!type) + op.emitOpError() + << "tensor to buffer conversion expects ranked results"; + if (!type.hasStaticShape()) + return rewriter.notifyMatchFailure( + op, "dynamic shapes not currently supported"); + auto memrefType = + MemRefType::get(type.getShape(), type.getElementType()); + + // Compute alloc position and insert a custom allocation node. + OpBuilder::InsertionGuard guard(rewriter); + rewriter.restoreInsertionPoint( + bufferAssignment->computeAllocPosition(result)); + auto alloc = rewriter.create(loc, memrefType); + result.replaceAllUsesWith(alloc); + args.push_back(alloc); + } + + // Generate a new linalg operation that works on buffers. + auto linalgOp = rewriter.create( + loc, llvm::None, args, rewriter.getI64IntegerAttr(operands.size()), + rewriter.getI64IntegerAttr(results.size()), op.indexing_maps(), + op.iterator_types(), op.docAttr(), op.library_callAttr()); + + // Move regions from the old operation to the new one. + auto ®ion = linalgOp.region(); + rewriter.inlineRegionBefore(op.region(), region, region.end()); + + // TODO: verify the internal memref-based linalg functionality. + auto &entryBlock = region.front(); + for (auto result : results) { + auto type = result.getType().cast(); + entryBlock.addArgument(type.getElementType()); + } + + rewriter.eraseOp(op); + return success(); + } + }; + + void populateTensorLinalgToBufferLinalgConversionPattern( + MLIRContext *context, BufferAssignmentPlacer *placer, + TypeConverter *converter, OwningRewritePatternList *patterns) { + // clang-format off + patterns->insert< + FunctionAndBlockSignatureConverter, + GenericOpConverter, + NonVoidToVoidReturnOpConverter< + ReturnOp, ReturnOp, linalg::CopyOp> + >(context, placer, converter); + // clang-format on + } + + void runOnOperation() override { + auto &context = getContext(); + ConversionTarget target(context); + BufferAssignmentTypeConverter converter; + // Make all linalg operations illegal as long as they work on tensors. + target.addLegalDialect(); + target.addDynamicallyLegalDialect( + Optional( + [&](Operation *op) { + auto isIllegalType = [&](Type type) { + return !converter.isLegal(type); + }; + return llvm::none_of(op->getOperandTypes(), isIllegalType) && + llvm::none_of(op->getResultTypes(), isIllegalType); + })); + + // Mark return operations illegal as long as they return values. + target.addDynamicallyLegalOp( + [](mlir::ReturnOp returnOp) { return returnOp.getNumOperands() == 0; }); + + // Mark the function whose arguments are in tensor-type illegal. + target.addDynamicallyLegalOp([&](FuncOp funcOp) { + return converter.isSignatureLegal(funcOp.getType()); + }); + + // Walk over all the functions to apply buffer assignment. + getOperation().walk([&](FuncOp function) { + OwningRewritePatternList patterns; + BufferAssignmentPlacer placer(function); + populateTensorLinalgToBufferLinalgConversionPattern( + &context, &placer, &converter, &patterns); + + // Applying full conversion + return failed(applyFullConversion(function, target, patterns, &converter)) + ? WalkResult::interrupt() + : WalkResult::advance(); + }); + }; +}; +} // end anonymous namespace + +namespace mlir { +void registerTestBufferPlacementPreparationPass() { + PassRegistration( + "test-buffer-placement-preparation", + "Tests buffer placement helper methods including its " + "operation-conversion patterns"); +} +} // end namespace mlir \ No newline at end of file diff --git a/mlir/test/lib/Transforms/TestDominance.cpp b/mlir/test/lib/Transforms/TestDominance.cpp index 97674c400f814..fd83e603ca2b6 100644 --- a/mlir/test/lib/Transforms/TestDominance.cpp +++ b/mlir/test/lib/Transforms/TestDominance.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Analysis/Dominance.h" +#include "mlir/IR/Dominance.h" #include "mlir/Pass/Pass.h" using namespace mlir; diff --git a/mlir/test/lib/Transforms/TestLinalgMatmulToVector.cpp b/mlir/test/lib/Transforms/TestLinalgMatmulToVector.cpp deleted file mode 100644 index e32f4d3dd6c50..0000000000000 --- a/mlir/test/lib/Transforms/TestLinalgMatmulToVector.cpp +++ /dev/null @@ -1,51 +0,0 @@ -//===- TestLinalgMatmulToVector.cpp - Test VectorTransfers lowering -------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Linalg/IR/LinalgOps.h" -#include "mlir/Dialect/Linalg/Transforms/LinalgTransforms.h" -#include "mlir/Dialect/Vector/VectorOps.h" -#include "mlir/Dialect/Vector/VectorTransforms.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/IR/StandardTypes.h" -#include "mlir/Pass/Pass.h" - -using namespace mlir; -using namespace mlir::linalg; -using namespace mlir::vector; - -namespace { -#include "TestLinalgMatmulToVectorPatterns.h.inc" - -struct DeclarativeTransforms - : public PassWrapper { - void runOnFunction() override { - OwningRewritePatternList patterns; - auto *context = &getContext(); - AffineApplyOp::getCanonicalizationPatterns(patterns, context); - AffineMinOp::getCanonicalizationPatterns(patterns, context); - AffineMaxOp::getCanonicalizationPatterns(patterns, context); - AllocOp::getCanonicalizationPatterns(patterns, context); - SubViewOp::getCanonicalizationPatterns(patterns, context); - ViewOp::getCanonicalizationPatterns(patterns, context); - populateWithGenerated(context, &patterns); - applyPatternsAndFoldGreedily(getFunction(), patterns); - } -}; -} // end anonymous namespace - -namespace mlir { -void registerTestLinalgMatmulToVectorPass() { - PassRegistration pass( - "linalg-matmul-to-vector", - "Test declarative transform patterns for matmul 3-D tiling + promotion" - " + vectorization"); -} -} // namespace mlir diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp index 7fc1138ff8d4d..f3861c38fa601 100644 --- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp @@ -10,36 +10,127 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" -#include "mlir/Dialect/Linalg/Transforms/LinalgTransforms.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" +#include "llvm/ADT/SetVector.h" + using namespace mlir; using namespace mlir::linalg; -namespace mlir { -namespace linalg { -namespace { -#include "TestLinalgTransformPatterns.h.inc" -} // end namespace -} // end namespace linalg -} // end namespace mlir - namespace { struct TestLinalgTransforms : public PassWrapper { + TestLinalgTransforms() = default; + TestLinalgTransforms(const TestLinalgTransforms &pass) {} + void runOnFunction() override; + + Option testPatterns{*this, "test-patterns", + llvm::cl::desc("Test a mixed set of patterns"), + llvm::cl::init(false)}; }; } // end anonymous namespace -/// Apply transformations specified as patterns. -void TestLinalgTransforms::runOnFunction() { +static void applyPatterns(FuncOp funcOp) { + MLIRContext *ctx = funcOp.getContext(); OwningRewritePatternList patterns; - auto funcOp = getFunction(); - // Add the generated patterns to the list. - linalg::populateWithGenerated(&getContext(), &patterns); + //===--------------------------------------------------------------------===// + // Linalg tiling patterns. + //===--------------------------------------------------------------------===// + patterns.insert>( + ctx, LinalgTilingOptions().setTileSizes({2000, 3000, 4000}), + LinalgMarker({"MEM", {}}, "L3")); + patterns.insert>( + ctx, LinalgTilingOptions().setTileSizes({200, 300, 400}), + LinalgMarker({"L3"}, "L2")); + patterns.insert>( + ctx, LinalgTilingOptions().setTileSizes({20, 30, 40}), + LinalgMarker({"L2"}, "L1")); + patterns.insert>( + ctx, LinalgTilingOptions().setTileSizes({2, 3, 4}), + LinalgMarker({"L1"}, "REG")); + + patterns.insert>( + ctx, + LinalgTilingOptions().setTileSizes({5, 6}).setLoopType( + LinalgTilingLoopType::ParallelLoops), + LinalgMarker({}, "L1")); + + patterns.insert>( + ctx, LinalgTilingOptions().setTileSizes(8000), + LinalgMarker({"MEM", "L3", "L2", {}}, "REG")); + + //===--------------------------------------------------------------------===// + // Linalg tiling and permutation patterns. + //===--------------------------------------------------------------------===// + patterns.insert>( + ctx, + LinalgTilingOptions() + .setTileSizes({2000, 3000, 4000}) + .setInterchange({1, 2, 0}), + LinalgMarker({"__with_perm__"}, "L2__with_perm__")); + patterns.insert>( + ctx, + LinalgTilingOptions() + .setTileSizes({200, 300, 400}) + .setInterchange({1, 0, 2}), + LinalgMarker({"L2__with_perm__"}, "L1__with_perm__")); + patterns.insert>( + ctx, LinalgTilingOptions().setTileSizes({20, 30, 40}), + LinalgMarker({"L1__with_perm__"}, "REG__with_perm__")); + + patterns.insert>( + ctx, LinalgTilingOptions().setTileSizes({5, 6}).setInterchange({1, 0}), + LinalgMarker({"__with_perm__"}, "L1__with_perm__")); + + //===--------------------------------------------------------------------===// + // Linalg to loops patterns. + //===--------------------------------------------------------------------===// + patterns.insert>( + ctx, + /*loweringType=*/LinalgLoweringType::Loops, LinalgMarker({"REG"})); + + //===--------------------------------------------------------------------===// + // Linalg to vector contraction patterns. + //===--------------------------------------------------------------------===// + patterns.insert, + LinalgVectorizationPattern, + LinalgVectorizationPattern>( + ctx, LinalgMarker({"VECTORIZE"})); + + //===--------------------------------------------------------------------===// + // Linalg generic permutation patterns. + //===--------------------------------------------------------------------===// + patterns.insert>( + ctx, + /*interchangeVector=*/ArrayRef{1, 2, 0}, + LinalgMarker({}, "PERMUTED")); + patterns.insert>( + ctx, + /*interchangeVector=*/ArrayRef{1, 2, 0}, + LinalgMarker({}, "PERMUTED")); + + //===--------------------------------------------------------------------===// + // Linalg subview operands promotion. + //===--------------------------------------------------------------------===// + patterns.insert>( + ctx, LinalgMarker({"_promote_views_"}, "_views_promoted_")); + patterns.insert>( + ctx, + /*operandsToPromote=*/ArrayRef{0}, + LinalgMarker({"_promote_first_view_"}, "_first_view_promoted_")); + patterns.insert>( + ctx, + /*operandsToPromote=*/ArrayRef{0}, + /*alignment=*/32, + LinalgMarker({"_promote_views_aligned_"}, "_views_aligned_promoted_")); + applyPatternsAndFoldGreedily(funcOp, patterns); // Drop the marker. @@ -48,9 +139,15 @@ void TestLinalgTransforms::runOnFunction() { }); } +/// Apply transformations specified as patterns. +void TestLinalgTransforms::runOnFunction() { + if (testPatterns) + return applyPatterns(getFunction()); +} + namespace mlir { void registerTestLinalgTransforms() { - PassRegistration( + PassRegistration testTransformPatternsPass( "test-linalg-transform-patterns", "Test Linalg transformation patterns by applying them greedily."); } diff --git a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp new file mode 100644 index 0000000000000..7cd221f37f8c0 --- /dev/null +++ b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp @@ -0,0 +1,68 @@ +//===-------- TestLoopUnrolling.cpp --- loop unrolling test pass ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass to unroll loops by a specified unroll factor. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/LoopOps/LoopOps.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/LoopUtils.h" +#include "mlir/Transforms/Passes.h" + +using namespace mlir; + +namespace { + +static unsigned getNestingDepth(Operation *op) { + Operation *currOp = op; + unsigned depth = 0; + while ((currOp = currOp->getParentOp())) { + if (isa(currOp)) + depth++; + } + return depth; +} + +class TestLoopUnrollingPass + : public PassWrapper { +public: + TestLoopUnrollingPass() = default; + TestLoopUnrollingPass(const TestLoopUnrollingPass &) {} + explicit TestLoopUnrollingPass(uint64_t unrollFactorParam, + unsigned loopDepthParam) { + unrollFactor = unrollFactorParam; + loopDepth = loopDepthParam; + } + + void runOnFunction() override { + FuncOp func = getFunction(); + SmallVector loops; + func.walk([&](loop::ForOp forOp) { + if (getNestingDepth(forOp) == loopDepth) + loops.push_back(forOp); + }); + for (auto loop : loops) { + loopUnrollByFactor(loop, unrollFactor); + } + } + Option unrollFactor{*this, "unroll-factor", + llvm::cl::desc("Loop unroll factor."), + llvm::cl::init(1)}; + Option loopDepth{*this, "loop-depth", llvm::cl::desc("Loop depth."), + llvm::cl::init(0)}; +}; +} // end namespace + +namespace mlir { +void registerTestLoopUnrollingPass() { + PassRegistration( + "test-loop-unrolling", "Tests loop unrolling transformation"); +} +} // namespace mlir diff --git a/mlir/test/lib/Transforms/TestVectorToLoopsConversion.cpp b/mlir/test/lib/Transforms/TestVectorToLoopsConversion.cpp index dc9b5c8d66cd7..907e200848694 100644 --- a/mlir/test/lib/Transforms/TestVectorToLoopsConversion.cpp +++ b/mlir/test/lib/Transforms/TestVectorToLoopsConversion.cpp @@ -22,7 +22,7 @@ struct TestVectorToLoopsPass void runOnFunction() override { OwningRewritePatternList patterns; auto *context = &getContext(); - populateVectorToAffineLoopsConversionPatterns(context, patterns); + populateVectorToLoopsConversionPatterns(patterns, context); applyPatternsAndFoldGreedily(getFunction(), patterns); } }; diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index 65f80315d57aa..e78c82815b15a 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -31,6 +31,7 @@ config.substitutions.append(('%PATH%', config.environment['PATH'])) config.substitutions.append(('%shlibext', config.llvm_shlib_ext)) +config.substitutions.append(("%mlir_src_root", config.mlir_src_root)) llvm_config.with_system_environment( ['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP']) diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in index dafb1c9a3eb86..dc6286a827bb7 100644 --- a/mlir/test/lit.site.cfg.py.in +++ b/mlir/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.llvm_bindings = "@LLVM_BINDINGS@".split(' ') config.host_os = "@HOST_OS@" config.host_cc = "@HOST_CC@" config.host_cxx = "@HOST_CXX@" +config.host_cmake = "@CMAKE_COMMAND@" # Note: ldflags can contain double-quoted paths, so must use single quotes here. config.host_ldflags = '@HOST_LDFLAGS@' config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" diff --git a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc index 0b88f2aa11a20..d796d1917c035 100644 --- a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc +++ b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc @@ -7,15 +7,15 @@ // ODS-NEXT: NamedStructuredOpTraits // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // -// IMPL-LABEL: Test1Op::referenceIterators() { -// IMPL-NEXT: { {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } +// IMPL-LABEL: SmallVector Test1Op::referenceIterators +// IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } // -// IMPL: Test1Op::referenceIndexingMaps() { +// IMPL: SmallVector Test1Op::referenceIndexingMaps // IMPL: AffineMap::get(2, 0, {d0, d1}, context), // IMPL-NEXT: AffineMap::get(2, 0, {d1}, context), // IMPL-NEXT: AffineMap::get(2, 0, {d0}, context) }; // -// IMPL: Test1Op::regionBuilder(Block &block) { +// IMPL: void Test1Op::regionBuilder(Block &block) { // IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]); // IMPL: Value [[d:.*]] = std_mulf([[a]], [[b]]); // IMPL: Value [[e:.*]] = std_addf([[c]], [[d]]); @@ -32,10 +32,10 @@ def test1(A: f32(M, K), B: f32(K)) -> (C: f32(M)) { // ODS-NEXT: NamedStructuredOpTraits // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // -// IMPL-LABEL: Test2Op::referenceIterators() { -// IMPL-NEXT: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } +// IMPL-LABEL: SmallVector Test2Op::referenceIterators +// IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } // -// IMPL: Test2Op::referenceIndexingMaps() { +// IMPL: SmallVector Test2Op::referenceIndexingMaps // IMPL: AffineMap::get(3, 0, {d0, d2}, context), // IMPL-NEXT: AffineMap::get(3, 0, {d2, d1}, context), // IMPL-NEXT: AffineMap::get(3, 0, {d0, d1}, context) }; @@ -57,10 +57,10 @@ def test2(A: f32(M, K), B: f32(K, N)) -> (C: f32(M, N)) { // ODS-NEXT: NamedStructuredOpTraits // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // -// IMPL-LABEL: Test3Op::referenceIterators() { -// IMPL-NEXT: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } +// IMPL-LABEL: SmallVector Test3Op::referenceIterators +// IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } // -// IMPL: Test3Op::referenceIndexingMaps() { +// IMPL: SmallVector Test3Op::referenceIndexingMaps // IMPL: AffineMap::get(4, 0, {d0, d1, d3}, context), // IMPL-NEXT: AffineMap::get(4, 0, {d3, d2}, context), // IMPL-NEXT: AffineMap::get(4, 0, {d0, d1, d2}, context) }; diff --git a/mlir/test/mlir-tblgen/op-attribute.td b/mlir/test/mlir-tblgen/op-attribute.td index 6e22912d1a2bb..522dc2459fcaf 100644 --- a/mlir/test/mlir-tblgen/op-attribute.td +++ b/mlir/test/mlir-tblgen/op-attribute.td @@ -74,7 +74,7 @@ def AOp : NS_Op<"a_op", []> { // DEF: void AOp::build( // DEF: some-return-type aAttr, some-return-type bAttr, /*optional*/some-attr-kind cAttr -// DEF: odsState.addAttribute("aAttr", some-const-builder-call((*odsBuilder), aAttr)); +// DEF: odsState.addAttribute("aAttr", some-const-builder-call(odsBuilder, aAttr)); // DEF: void AOp::build( // DEF: ArrayRef attributes @@ -198,8 +198,8 @@ def DOp : NS_Op<"d_op", []> { // DECL: static void build({{.*}}, APInt i32_attr, APFloat f64_attr, StringRef str_attr, bool bool_attr, ::SomeI32Enum enum_attr, APInt dv_i32_attr, APFloat dv_f64_attr, StringRef dv_str_attr = "abc", bool dv_bool_attr = true, ::SomeI32Enum dv_enum_attr = ::SomeI32Enum::case5) // DEF-LABEL: DOp definitions -// DEF: odsState.addAttribute("str_attr", (*odsBuilder).getStringAttr(str_attr)); -// DEF: odsState.addAttribute("dv_str_attr", (*odsBuilder).getStringAttr(dv_str_attr)); +// DEF: odsState.addAttribute("str_attr", odsBuilder.getStringAttr(str_attr)); +// DEF: odsState.addAttribute("dv_str_attr", odsBuilder.getStringAttr(dv_str_attr)); // Test derived type attr. // --- @@ -249,7 +249,7 @@ def MixOperandsAndAttrs : NS_Op<"mix_operands_and_attrs", []> { // DEF-LABEL: MixOperandsAndAttrs definitions // DEF-DAG: Value MixOperandsAndAttrs::operand() // DEF-DAG: Value MixOperandsAndAttrs::otherArg() -// DEF-DAG: void MixOperandsAndAttrs::build(Builder *odsBuilder, OperationState &odsState, FloatAttr attr, Value operand, FloatAttr otherAttr, Value otherArg) +// DEF-DAG: void MixOperandsAndAttrs::build(OpBuilder &odsBuilder, OperationState &odsState, FloatAttr attr, Value operand, FloatAttr otherAttr, Value otherArg) // DEF-DAG: APFloat MixOperandsAndAttrs::attr() // DEF-DAG: APFloat MixOperandsAndAttrs::otherAttr() @@ -264,7 +264,7 @@ def UnitAttrOp : NS_Op<"unit_attr_op", []> { // DEF: bool UnitAttrOp::attr() { // DEF: return {{.*}} != nullptr -// DEF: build(Builder *odsBuilder, OperationState &odsState, /*optional*/UnitAttr attr) +// DEF: build(OpBuilder &odsBuilder, OperationState &odsState, /*optional*/UnitAttr attr) // Test elementAttr field of TypedArrayAttr. diff --git a/mlir/test/mlir-tblgen/op-decl.td b/mlir/test/mlir-tblgen/op-decl.td index eb736b7e6f0b1..4fa2f3af29e1d 100644 --- a/mlir/test/mlir-tblgen/op-decl.td +++ b/mlir/test/mlir-tblgen/op-decl.td @@ -67,6 +67,8 @@ def NS_AOp : NS_Op<"a_op", [IsolatedFromAbove, IsolatedFromAbove]> { // CHECK: Operation::operand_range getODSOperands(unsigned index); // CHECK: Value a(); // CHECK: Operation::operand_range b(); +// CHECK: ::mlir::MutableOperandRange aMutable(); +// CHECK: ::mlir::MutableOperandRange bMutable(); // CHECK: Operation::result_range getODSResults(unsigned index); // CHECK: Value r(); // CHECK: Region &someRegion(); @@ -76,9 +78,9 @@ def NS_AOp : NS_Op<"a_op", [IsolatedFromAbove, IsolatedFromAbove]> { // CHECK: FloatAttr attr2Attr() // CHECK: Optional< APFloat > attr2(); // CHECK: static void build(Value val); -// CHECK: static void build(Builder *odsBuilder, OperationState &odsState, Type r, ArrayRef s, Value a, ValueRange b, IntegerAttr attr1, /*optional*/FloatAttr attr2, unsigned someRegionsCount) -// CHECK: static void build(Builder *odsBuilder, OperationState &odsState, Type r, ArrayRef s, Value a, ValueRange b, APInt attr1, /*optional*/FloatAttr attr2, unsigned someRegionsCount) -// CHECK: static void build(Builder *, OperationState &odsState, ArrayRef resultTypes, ValueRange operands, ArrayRef attributes, unsigned numRegions) +// CHECK: static void build(OpBuilder &odsBuilder, OperationState &odsState, Type r, ArrayRef s, Value a, ValueRange b, IntegerAttr attr1, /*optional*/FloatAttr attr2, unsigned someRegionsCount) +// CHECK: static void build(OpBuilder &odsBuilder, OperationState &odsState, Type r, ArrayRef s, Value a, ValueRange b, APInt attr1, /*optional*/FloatAttr attr2, unsigned someRegionsCount) +// CHECK: static void build(OpBuilder &, OperationState &odsState, ArrayRef resultTypes, ValueRange operands, ArrayRef attributes, unsigned numRegions) // CHECK: static ParseResult parse(OpAsmParser &parser, OperationState &result); // CHECK: void print(OpAsmPrinter &p); // CHECK: LogicalResult verify(); @@ -119,8 +121,9 @@ def NS_EOp : NS_Op<"op_with_optionals", []> { // CHECK-LABEL: NS::EOp declarations // CHECK: Value a(); +// CHECK: ::mlir::MutableOperandRange aMutable(); // CHECK: Value b(); -// CHECK: static void build(Builder *odsBuilder, OperationState &odsState, /*optional*/Type b, /*optional*/Value a) +// CHECK: static void build(OpBuilder &odsBuilder, OperationState &odsState, /*optional*/Type b, /*optional*/Value a) // Check that default builders can be suppressed. // --- diff --git a/mlir/test/mlir-tblgen/op-interface.td b/mlir/test/mlir-tblgen/op-interface.td index 7cda61da08e04..cb53a77ac0cb3 100644 --- a/mlir/test/mlir-tblgen/op-interface.td +++ b/mlir/test/mlir-tblgen/op-interface.td @@ -1,4 +1,5 @@ // RUN: mlir-tblgen -gen-op-interface-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL --dump-input-on-failure +// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=OP_DECL --dump-input-on-failure include "mlir/IR/OpBase.td" @@ -12,6 +13,14 @@ def TestOpInterface : OpInterface<"TestOpInterface"> { /*methodName=*/"foo", /*args=*/(ins "int":$input) >, + InterfaceMethod< + /*desc=*/[{some function comment}], + /*retTy=*/"int", + /*methodName=*/"default_foo", + /*args=*/(ins "int":$input), + /*body=*/[{}], + /*defaultBody=*/[{ return 0; }] + >, ]; } @@ -27,8 +36,19 @@ def OpInterfaceOp : Op; def DeclareMethodsOp : Op]>; +def DeclareMethodsWithDefaultOp : Op]>; + // DECL-LABEL: TestOpInterfaceInterfaceTraits // DECL: class TestOpInterface : public OpInterface // DECL: int foo(int input); // DECL-NOT: TestOpInterface + +// OP_DECL-LABEL: class DeclareMethodsOp : public +// OP_DECL: int foo(int input); +// OP_DECL-NOT: int default_foo(int input); + +// OP_DECL-LABEL: class DeclareMethodsWithDefaultOp : public +// OP_DECL: int foo(int input); +// OP_DECL: int default_foo(int input); diff --git a/mlir/test/mlir-tblgen/op-result.td b/mlir/test/mlir-tblgen/op-result.td index c8bdd15cf2f12..7886c2772a212 100644 --- a/mlir/test/mlir-tblgen/op-result.td +++ b/mlir/test/mlir-tblgen/op-result.td @@ -23,9 +23,9 @@ def OpB : NS_Op<"same_input_output_type_op", [SameOperandsAndResultType]> { } // CHECK-LABEL: OpB definitions -// CHECK: void OpB::build(Builder *odsBuilder, OperationState &odsState, Type y, Value x) +// CHECK: void OpB::build(OpBuilder &odsBuilder, OperationState &odsState, Type y, Value x) // CHECK: odsState.addTypes(y); -// CHECK: void OpB::build(Builder *odsBuilder, OperationState &odsState, Value x) +// CHECK: void OpB::build(OpBuilder &odsBuilder, OperationState &odsState, Value x) // CHECK: odsState.addTypes({x.getType()}); def OpC : NS_Op<"three_normal_result_op", []> { @@ -33,12 +33,12 @@ def OpC : NS_Op<"three_normal_result_op", []> { } // CHECK-LABEL: OpC definitions -// CHECK: void OpC::build(Builder *odsBuilder, OperationState &odsState, Type x, Type resultType1, Type z) +// CHECK: void OpC::build(OpBuilder &odsBuilder, OperationState &odsState, Type x, Type resultType1, Type z) // CHECK-NEXT: odsState.addTypes(x) // CHECK-NEXT: odsState.addTypes(resultType1) // CHECK-NEXT: odsState.addTypes(z) -// CHECK: void OpC::build(Builder *odsBuilder, OperationState &odsState, ArrayRef resultTypes) { +// CHECK: void OpC::build(OpBuilder &odsBuilder, OperationState &odsState, ArrayRef resultTypes) { // CHECK-NEXT: assert(resultTypes.size() == 3u && "mismatched number of results"); // CHECK-NEXT: odsState.addTypes(resultTypes); @@ -49,7 +49,7 @@ def OpD : NS_Op<"type_attr_as_result_type", [FirstAttrDerivedResultType]> { } // CHECK-LABEL: OpD definitions -// CHECK: void OpD::build(Builder *odsBuilder, OperationState &odsState, ValueRange operands, ArrayRef attributes) +// CHECK: void OpD::build(OpBuilder &odsBuilder, OperationState &odsState, ValueRange operands, ArrayRef attributes) // CHECK: odsState.addTypes({attr.second.cast().getValue()}); def OpE : NS_Op<"value_attr_as_result_type", [FirstAttrDerivedResultType]> { @@ -58,7 +58,7 @@ def OpE : NS_Op<"value_attr_as_result_type", [FirstAttrDerivedResultType]> { } // CHECK-LABEL: OpE definitions -// CHECK: void OpE::build(Builder *odsBuilder, OperationState &odsState, ValueRange operands, ArrayRef attributes) +// CHECK: void OpE::build(OpBuilder &odsBuilder, OperationState &odsState, ValueRange operands, ArrayRef attributes) // CHECK: odsState.addTypes({attr.second.getType()}); def OpF : NS_Op<"one_variadic_result_op", []> { @@ -77,7 +77,7 @@ def OpG : NS_Op<"one_normal_and_one_variadic_result_op", []> { // CHECK-LABEL: OpG definitions -// CHECK: void OpG::build(Builder *odsBuilder, OperationState &odsState, Type x, ArrayRef y) +// CHECK: void OpG::build(OpBuilder &odsBuilder, OperationState &odsState, Type x, ArrayRef y) // CHECK-NEXT: odsState.addTypes(x); // CHECK-NEXT: odsState.addTypes(y); @@ -109,5 +109,5 @@ def OpK : NS_Op<"only_input_is_variadic_with_same_value_type_op", [SameOperandsA let results = (outs AnyTensor:$result); } -// CHECK-LABEL: OpK::build(Builder *odsBuilder, OperationState &odsState, ValueRange input) +// CHECK-LABEL: OpK::build(OpBuilder &odsBuilder, OperationState &odsState, ValueRange input) // CHECK: odsState.addTypes({input.front().getType()}); diff --git a/mlir/tools/mlir-cpu-runner/CMakeLists.txt b/mlir/tools/mlir-cpu-runner/CMakeLists.txt index 9903e8d86e7ef..596012c882280 100644 --- a/mlir/tools/mlir-cpu-runner/CMakeLists.txt +++ b/mlir/tools/mlir-cpu-runner/CMakeLists.txt @@ -1,6 +1,12 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + nativecodegen + ) + add_llvm_tool(mlir-cpu-runner mlir-cpu-runner.cpp -) + ) llvm_update_compile_flags(mlir-cpu-runner) get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) target_link_libraries(mlir-cpu-runner PRIVATE @@ -14,6 +20,4 @@ target_link_libraries(mlir-cpu-runner PRIVATE MLIRParser MLIRTargetLLVMIR MLIRSupport - LLVMCore - LLVMSupport ) diff --git a/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt b/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt index b4fa6e35fc9a6..3736a18b20091 100644 --- a/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt +++ b/mlir/tools/mlir-linalg-ods-gen/CMakeLists.txt @@ -1,3 +1,7 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + ) add_llvm_tool(mlir-linalg-ods-gen mlir-linalg-ods-gen.cpp ) @@ -5,6 +9,4 @@ llvm_update_compile_flags(mlir-linalg-ods-gen) target_link_libraries(mlir-linalg-ods-gen PRIVATE MLIRParser MLIRSupport - LLVMCore - LLVMSupport ) diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp index c1395f7655edd..d2dd1f5d9738f 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp @@ -1467,13 +1467,13 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, let results = (outs Variadic:$output_tensors); let regions = (region SizedRegion<1>:$region); let builders = [OpBuilder< - "Builder *b, OperationState &result, TypeRange outputTypes, " + "OpBuilder &b, OperationState &result, TypeRange outputTypes, " # "ValueRange views", [{{ result.addOperands(views); result.addTypes(outputTypes); - buildNamedStructuredOpRegion<{0}>( - *b, result, TypeRange(views), outputTypes); + buildNamedStructuredOpRegionAndAttributes<{0}>( + b, result, TypeRange(views), outputTypes); }]> ]; let parser = [{ @@ -1481,7 +1481,13 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, }]; let extraClassDeclaration = [{{ llvm::Optional> referenceIterators(); + static SmallVector referenceIterators( + TypeRange inputTypes, TypeRange outputTypes); + llvm::Optional> referenceIndexingMaps(); + static SmallVector referenceIndexingMaps( + TypeRange inputTypes, TypeRange outputTypes); + static void regionBuilder(Block &block); }]; })FMT"; @@ -1503,7 +1509,13 @@ void TCParser::printReferenceIterators(llvm::raw_ostream &os, ComprehensionParsingState &state) { const char *referenceReferenceIteratorsFmt = R"FMT( - llvm::Optional> {0}::referenceIterators() { + // This is temporary until we transition out of manually specified ops + // that should be auto-generated with linalg-ods-gen. + llvm::Optional> {0}::referenceIterators() {{ + llvm_unreachable("Unexpected missing `iterator_types` attribute."); + } + SmallVector {0}::referenceIterators( + TypeRange inputTypes, TypeRange outputTypes) { return SmallVector{{ {1} }; })FMT"; @@ -1536,15 +1548,27 @@ void TCParser::printReferenceIterators(llvm::raw_ostream &os, void TCParser::printReferenceIndexingMaps(llvm::raw_ostream &os, StringRef cppOpName, ComprehensionParsingState &state) { + // 1. Generic string template for specifying reference indexing maps. const char *referenceIndexingMapsFmt = R"FMT( - llvm::Optional> {0}::referenceIndexingMaps() { - MLIRContext *context = getContext(); + // This is temporary until we transition out of manually specified ops that + // should be auto-generated with linalg-ods-gen. + llvm::Optional> {0}::referenceIndexingMaps() {{ + llvm_unreachable("Unexpected missing `indexing_maps` attribute."); + } + SmallVector {0}::referenceIndexingMaps( + TypeRange inputTypes, TypeRange outputTypes) { + assert(!inputTypes.empty() && "At least one input expected"); + MLIRContext *context = (*inputTypes.begin()).getContext(); AffineExpr {1}; bindDims(context, {1}); return SmallVector{{ {2} }; })FMT"; + // 2. Print a comma-separated list of identifiers for the AffineExpr in + // `state.dims`. These will replace the `{1}` placeholder in both + // `AffineExpr {1}` and `bindDims(context, {1})` ensuring the AffineExpr + // identifiers are bound in the right order to the proper AffineDimExpr. std::string dimsStr; llvm::raw_string_ostream ss(dimsStr); llvm::interleaveComma( @@ -1552,10 +1576,14 @@ void TCParser::printReferenceIndexingMaps(llvm::raw_ostream &os, [&](std::pair p) { ss << p.second; }); ss.flush(); + // 3. Print a comma-separated list of AffineMap constructors that use the + // identifiers from 1. The AffineExpr use the common arithmetic operators on + // AffineExpr. These AffineMap constructors will replace the `{2}` placeholder + // in return `SmallVector{{ {2} };`. std::string mapsStr; llvm::raw_string_ostream mapsStringStream(mapsStr); SmallVector orderedUses(state.orderedTensorArgs.size()); - for (auto it : state.orderedTensorArgs) + for (const auto &it : state.orderedTensorArgs) orderedUses[it.second] = it.first; llvm::interleaveComma(orderedUses, mapsStringStream, [&](TensorUse u) { assert(u.indexingMap); @@ -1576,6 +1604,7 @@ void TCParser::printReferenceIndexingMaps(llvm::raw_ostream &os, }); mapsStringStream.flush(); + // 4. Apply format to 1. using 2. and 3. os << llvm::formatv(referenceIndexingMapsFmt, cppOpName, dimsStr, mapsStr); } diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt index 55b5762923c3d..ca39f37a8d8d0 100644 --- a/mlir/tools/mlir-opt/CMakeLists.txt +++ b/mlir/tools/mlir-opt/CMakeLists.txt @@ -4,6 +4,12 @@ set(LLVM_OPTIONAL_SOURCES get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) +set(LLVM_LINK_COMPONENTS + Core + Support + AsmParser + ) + set(LIBS ${dialect_libs} ${conversion_libs} @@ -24,23 +30,30 @@ set(LIBS MLIRTestTransforms MLIRSupport MLIRIR - MLIROptLib - LLVMSupport - LLVMCore - LLVMAsmParser ) -add_llvm_library(MLIRMlirOptMain +# Exclude from libMLIR.so because this has static options intended for +# opt-like tools only. +add_mlir_library(MLIRMlirOptMain mlir-opt.cpp -) -target_link_libraries(MLIRMlirOptMain - PUBLIC + + EXCLUDE_FROM_LIBMLIR + + LINK_LIBS ${LIBS} -) + + DEPENDS + intrinsics_gen + mlir-headers + ) add_llvm_tool(mlir-opt - mlir-opt.cpp -) + mlir-opt.cpp + DEPENDS + ${LIBS} + ) +target_link_libraries(mlir-opt PRIVATE ${LIBS}) llvm_update_compile_flags(mlir-opt) -target_link_libraries(mlir-opt PRIVATE ${LIBS} ${targets_to_link}) + +mlir_check_link_libraries(mlir-opt) diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index fe8ae83a8154c..9d583dc2a3198 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -41,7 +41,7 @@ void registerSymbolTestPasses(); void registerTestAffineDataCopyPass(); void registerTestAllReduceLoweringPass(); void registerTestAffineLoopUnswitchingPass(); -void registerTestLinalgMatmulToVectorPass(); +void registerTestBufferPlacementPreparationPass(); void registerTestLoopPermutationPass(); void registerTestCallGraphPass(); void registerTestConstantFold(); @@ -53,6 +53,7 @@ void registerTestLinalgTransforms(); void registerTestLivenessPass(); void registerTestLoopFusion(); void registerTestLoopMappingPass(); +void registerTestLoopUnrollingPass(); void registerTestMatchers(); void registerTestMemRefDependenceCheck(); void registerTestMemRefStrideCalculation(); @@ -105,13 +106,13 @@ void registerTestPasses() { registerTestAffineDataCopyPass(); registerTestAllReduceLoweringPass(); registerTestAffineLoopUnswitchingPass(); - registerTestLinalgMatmulToVectorPass(); registerTestLoopPermutationPass(); registerTestCallGraphPass(); registerTestConstantFold(); #if MLIR_CUDA_CONVERSIONS_ENABLED registerTestConvertGPUKernelToCubinPass(); #endif + registerTestBufferPlacementPreparationPass(); registerTestDominancePass(); registerTestFunc(); registerTestGpuMemoryPromotionPass(); @@ -119,6 +120,7 @@ void registerTestPasses() { registerTestLivenessPass(); registerTestLoopFusion(); registerTestLoopMappingPass(); + registerTestLoopUnrollingPass(); registerTestMatchers(); registerTestMemRefDependenceCheck(); registerTestMemRefStrideCalculation(); @@ -150,9 +152,9 @@ int main(int argc, char **argv) { // Parse pass names in main to ensure static initialization completed. cl::ParseCommandLineOptions(argc, argv, "MLIR modular optimizer driver\n"); - MLIRContext context; if(showDialects) { llvm::outs() << "Registered Dialects:\n"; + MLIRContext context; for(Dialect *dialect : context.getRegisteredDialects()) { llvm::outs() << dialect->getNamespace() << "\n"; } diff --git a/mlir/tools/mlir-shlib/CMakeLists.txt b/mlir/tools/mlir-shlib/CMakeLists.txt index d0e2e959652fd..32fe833cee4ea 100644 --- a/mlir/tools/mlir-shlib/CMakeLists.txt +++ b/mlir/tools/mlir-shlib/CMakeLists.txt @@ -8,8 +8,10 @@ if (MSVC) return() endif() -get_property(mlir_libs GLOBAL PROPERTY MLIR_ALL_LIBS) +get_property(mlir_libs GLOBAL PROPERTY MLIR_STATIC_LIBS) +get_property(mlir_llvm_link_components GLOBAL PROPERTY MLIR_LLVM_LINK_COMPONENTS) list(REMOVE_DUPLICATES mlir_libs) +list(REMOVE_DUPLICATES mlir_llvm_link_components) foreach (lib ${mlir_libs}) if(XCODE) @@ -19,23 +21,32 @@ foreach (lib ${mlir_libs}) else() list(APPEND _OBJECTS $) endif() - list(APPEND _DEPS $) + # libClang needs this, but it causes problems for MLIR (probably + # because we use public library dependencies within MLIR.) + # list(APPEND _DEPS $) endforeach () if(MLIR_LINK_MLIR_DYLIB) set(INSTALL_WITH_TOOLCHAIN INSTALL_WITH_TOOLCHAIN) endif() -# libMLIR.so depends on LLVM components. To avoid multiple -# copies of those LLVM components, libMLIR.so depends on libLLVM.so. -# This probably won't work if some LLVM components are not included -# in libLLVM.so. if(LLVM_BUILD_LLVM_DYLIB) - add_llvm_library(MLIR + add_mlir_library( + MLIR SHARED ${INSTALL_WITH_TOOLCHAIN} - mlir-shlib.cpp - ) - target_link_libraries(MLIR PRIVATE LLVM ${LLVM_PTHREAD_LIB}) + ${_OBJECTS} + LINK_LIBS + ${_DEPS} + + LINK_COMPONENTS + ${mlir_llvm_link_components} + ) + target_link_libraries(MLIR PRIVATE ${LLVM_PTHREAD_LIB}) endif() + +#message("Libraries included in libMLIR.so: ${mlir_libs}") +#message("LLVM Components included in libMLIR.so: ${mlir_llvm_link_components}") + +mlir_check_all_link_libraries(MLIR) diff --git a/mlir/tools/mlir-tblgen/CMakeLists.txt b/mlir/tools/mlir-tblgen/CMakeLists.txt index 19e6230e54ac8..4c54e250f2c58 100644 --- a/mlir/tools/mlir-tblgen/CMakeLists.txt +++ b/mlir/tools/mlir-tblgen/CMakeLists.txt @@ -1,6 +1,7 @@ set(LLVM_LINK_COMPONENTS - MLIRTableGen + Demangle Support + TableGen ) add_tablegen(mlir-tblgen MLIR @@ -19,4 +20,10 @@ add_tablegen(mlir-tblgen MLIR SPIRVUtilsGen.cpp StructsGen.cpp ) + set_target_properties(mlir-tblgen PROPERTIES FOLDER "Tablegenning") +target_link_libraries(mlir-tblgen + PRIVATE + LLVMMLIRTableGen) + +mlir_check_all_link_libraries(mlir-tblgen) diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index ad9c070c4530f..29df09b551b89 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -45,25 +45,23 @@ static const char *const builderOpState = "odsState"; // {1}: The total number of non-variadic operands/results. // {2}: The total number of variadic operands/results. // {3}: The total number of actual values. -// {4}: The begin iterator of the actual values. -// {5}: "operand" or "result". +// {4}: "operand" or "result". const char *sameVariadicSizeValueRangeCalcCode = R"( bool isVariadic[] = {{{0}}; int prevVariadicCount = 0; for (unsigned i = 0; i < index; ++i) if (isVariadic[i]) ++prevVariadicCount; - // Calculate how many dynamic values a static variadic {5} corresponds to. - // This assumes all static variadic {5}s have the same dynamic value count. + // Calculate how many dynamic values a static variadic {4} corresponds to. + // This assumes all static variadic {4}s have the same dynamic value count. int variadicSize = ({3} - {1}) / {2}; // `index` passed in as the parameter is the static index which counts each - // {5} (variadic or not) as size 1. So here for each previous static variadic - // {5}, we need to offset by (variadicSize - 1) to get where the dynamic - // value pack for this static {5} starts. - int offset = index + (variadicSize - 1) * prevVariadicCount; + // {4} (variadic or not) as size 1. So here for each previous static variadic + // {4}, we need to offset by (variadicSize - 1) to get where the dynamic + // value pack for this static {4} starts. + int start = index + (variadicSize - 1) * prevVariadicCount; int size = isVariadic[index] ? variadicSize : 1; - - return {{std::next({4}, offset), std::next({4}, offset + size)}; + return {{start, size}; )"; // The logic to calculate the actual value range for a declared operand/result @@ -72,14 +70,23 @@ const char *sameVariadicSizeValueRangeCalcCode = R"( // (variadic or not). // // {0}: The name of the attribute specifying the segment sizes. -// {1}: The begin iterator of the actual values. const char *attrSizedSegmentValueRangeCalcCode = R"( auto sizeAttr = getAttrOfType("{0}"); unsigned start = 0; for (unsigned i = 0; i < index; ++i) start += (*(sizeAttr.begin() + i)).getZExtValue(); - unsigned end = start + (*(sizeAttr.begin() + index)).getZExtValue(); - return {{std::next({1}, start), std::next({1}, end)}; + unsigned size = (*(sizeAttr.begin() + index)).getZExtValue(); + return {{start, size}; +)"; + +// The logic to build a range of either operand or result values. +// +// {0}: The begin iterator of the actual values. +// {1}: The call to generate the start and length of the value range. +const char *valueRangeReturnCode = R"( + auto valueRange = {1}; + return {{std::next({0}, valueRange.first), + std::next({0}, valueRange.first + valueRange.second)}; )"; static const char *const opCommentHeader = R"( @@ -177,6 +184,9 @@ class OpEmitter { // Generates getters for named operands. void genNamedOperandGetters(); + // Generates setters for named operands. + void genNamedOperandSetters(); + // Generates getters for named results. void genNamedResultGetters(); @@ -310,6 +320,7 @@ OpEmitter::OpEmitter(const Operator &op) genOpAsmInterface(); genOpNameGetter(); genNamedOperandGetters(); + genNamedOperandSetters(); genNamedResultGetters(); genNamedRegionGetters(); genNamedSuccessorGetters(); @@ -478,6 +489,37 @@ void OpEmitter::genAttrSetters() { } } +// Generates the code to compute the start and end index of an operand or result +// range. +template +static void +generateValueRangeStartAndEnd(Class &opClass, StringRef methodName, + int numVariadic, int numNonVariadic, + StringRef rangeSizeCall, bool hasAttrSegmentSize, + StringRef segmentSizeAttr, RangeT &&odsValues) { + auto &method = opClass.newMethod("std::pair", methodName, + "unsigned index"); + + if (numVariadic == 0) { + method.body() << " return {index, 1};\n"; + } else if (hasAttrSegmentSize) { + method.body() << formatv(attrSizedSegmentValueRangeCalcCode, + segmentSizeAttr); + } else { + // Because the op can have arbitrarily interleaved variadic and non-variadic + // operands, we need to embed a list in the "sink" getter method for + // calculation at run-time. + llvm::SmallVector isVariadic; + isVariadic.reserve(llvm::size(odsValues)); + for (auto &it : odsValues) + isVariadic.push_back(it.isVariableLength() ? "true" : "false"); + std::string isVariadicList = llvm::join(isVariadic, ", "); + method.body() << formatv(sameVariadicSizeValueRangeCalcCode, isVariadicList, + numNonVariadic, numVariadic, rangeSizeCall, + "operand"); + } +} + // Generates the named operand getter methods for the given Operator `op` and // puts them in `opClass`. Uses `rangeType` as the return type of getters that // return a range of operands (individual operands are `Value ` and each @@ -519,32 +561,16 @@ static void generateNamedOperandGetters(const Operator &op, Class &opClass, "'SameVariadicOperandSize' traits"); } - // First emit a "sink" getter method upon which we layer all nicer named + // First emit a few "sink" getter methods upon which we layer all nicer named // getter methods. - auto &m = opClass.newMethod(rangeType, "getODSOperands", "unsigned index"); + generateValueRangeStartAndEnd( + opClass, "getODSOperandIndexAndLength", numVariadicOperands, + numNormalOperands, rangeSizeCall, attrSizedOperands, + "operand_segment_sizes", const_cast(op).getOperands()); - if (numVariadicOperands == 0) { - // We still need to match the return type, which is a range. - m.body() << " return {std::next(" << rangeBeginCall - << ", index), std::next(" << rangeBeginCall << ", index + 1)};"; - } else if (attrSizedOperands) { - m.body() << formatv(attrSizedSegmentValueRangeCalcCode, - "operand_segment_sizes", rangeBeginCall); - } else { - // Because the op can have arbitrarily interleaved variadic and non-variadic - // operands, we need to embed a list in the "sink" getter method for - // calculation at run-time. - llvm::SmallVector isVariadic; - isVariadic.reserve(numOperands); - for (int i = 0; i < numOperands; ++i) - isVariadic.push_back(op.getOperand(i).isVariableLength() ? "true" - : "false"); - std::string isVariadicList = llvm::join(isVariadic, ", "); - - m.body() << formatv(sameVariadicSizeValueRangeCalcCode, isVariadicList, - numNormalOperands, numVariadicOperands, rangeSizeCall, - rangeBeginCall, "operand"); - } + auto &m = opClass.newMethod(rangeType, "getODSOperands", "unsigned index"); + m.body() << formatv(valueRangeReturnCode, rangeBeginCall, + "getODSOperandIndexAndLength(index)"); // Then we emit nicer named getter methods by redirecting to the "sink" getter // method. @@ -579,6 +605,26 @@ void OpEmitter::genNamedOperandGetters() { /*getOperandCallPattern=*/"getOperation()->getOperand({0})"); } +void OpEmitter::genNamedOperandSetters() { + auto *attrSizedOperands = op.getTrait("OpTrait::AttrSizedOperandSegments"); + for (int i = 0, e = op.getNumOperands(); i != e; ++i) { + const auto &operand = op.getOperand(i); + if (operand.name.empty()) + continue; + auto &m = opClass.newMethod("::mlir::MutableOperandRange", + (operand.name + "Mutable").str()); + auto &body = m.body(); + body << " auto range = getODSOperandIndexAndLength(" << i << ");\n" + << " return ::mlir::MutableOperandRange(getOperation(), " + "range.first, range.second"; + if (attrSizedOperands) + body << ", ::mlir::MutableOperandRange::OperandSegment(" << i + << "u, *getOperation()->getMutableAttrDict().getNamed(" + "\"operand_segment_sizes\"))"; + body << ");\n"; + } +} + void OpEmitter::genNamedResultGetters() { const int numResults = op.getNumResults(); const int numVariadicResults = op.getNumVariableLengthResults(); @@ -607,29 +653,14 @@ void OpEmitter::genNamedResultGetters() { "'SameVariadicResultSize' traits"); } + generateValueRangeStartAndEnd( + opClass, "getODSResultIndexAndLength", numVariadicResults, + numNormalResults, "getOperation()->getNumResults()", attrSizedResults, + "result_segment_sizes", op.getResults()); auto &m = opClass.newMethod("Operation::result_range", "getODSResults", "unsigned index"); - - if (numVariadicResults == 0) { - m.body() << " return {std::next(getOperation()->result_begin(), index), " - "std::next(getOperation()->result_begin(), index + 1)};"; - } else if (attrSizedResults) { - m.body() << formatv(attrSizedSegmentValueRangeCalcCode, - "result_segment_sizes", - "getOperation()->result_begin()"); - } else { - llvm::SmallVector isVariadic; - isVariadic.reserve(numResults); - for (int i = 0; i < numResults; ++i) - isVariadic.push_back(op.getResult(i).isVariableLength() ? "true" - : "false"); - std::string isVariadicList = llvm::join(isVariadic, ", "); - - m.body() << formatv(sameVariadicSizeValueRangeCalcCode, isVariadicList, - numNormalResults, numVariadicResults, - "getOperation()->getNumResults()", - "getOperation()->result_begin()", "result"); - } + m.body() << formatv(valueRangeReturnCode, "getOperation()->result_begin()", + "getODSResultIndexAndLength(index)"); for (int i = 0; i != numResults; ++i) { const auto &result = op.getResult(i); @@ -743,7 +774,7 @@ void OpEmitter::genSeparateArgParamBuilder() { // TODO(jpienaar): Expand to handle regions. body << formatv(R"( SmallVector inferredReturnTypes; - if (succeeded({0}::inferReturnTypes(odsBuilder->getContext(), + if (succeeded({0}::inferReturnTypes(odsBuilder.getContext(), {1}.location, {1}.operands, {1}.attributes, /*regions=*/{{}, inferredReturnTypes))) {1}.addTypes(inferredReturnTypes); @@ -800,7 +831,7 @@ void OpEmitter::genUseOperandAsResultTypeCollectiveParamBuilder() { // Signature std::string params = - std::string("Builder *odsBuilder, OperationState &") + builderOpState + + std::string("OpBuilder &odsBuilder, OperationState &") + builderOpState + ", ValueRange operands, ArrayRef attributes"; if (op.getNumVariadicRegions()) params += ", unsigned numRegions"; @@ -830,7 +861,7 @@ void OpEmitter::genUseOperandAsResultTypeCollectiveParamBuilder() { void OpEmitter::genInferredTypeCollectiveParamBuilder() { // TODO(jpienaar): Expand to support regions. const char *params = - "Builder *odsBuilder, OperationState &{0}, " + "OpBuilder &odsBuilder, OperationState &{0}, " "ValueRange operands, ArrayRef attributes"; auto &m = opClass.newMethod("void", "build", formatv(params, builderOpState).str(), @@ -838,7 +869,7 @@ void OpEmitter::genInferredTypeCollectiveParamBuilder() { auto &body = m.body(); body << formatv(R"( SmallVector inferredReturnTypes; - if (succeeded({0}::inferReturnTypes(odsBuilder->getContext(), + if (succeeded({0}::inferReturnTypes(odsBuilder.getContext(), {1}.location, operands, attributes, /*regions=*/{{}, inferredReturnTypes))) build(odsBuilder, odsState, inferredReturnTypes, operands, attributes); @@ -871,7 +902,7 @@ void OpEmitter::genUseOperandAsResultTypeSeparateParamBuilder() { void OpEmitter::genUseAttrAsResultTypeBuilder() { std::string params = - std::string("Builder *odsBuilder, OperationState &") + builderOpState + + std::string("OpBuilder &odsBuilder, OperationState &") + builderOpState + ", ValueRange operands, ArrayRef attributes"; auto &m = opClass.newMethod("void", "build", params, OpMethod::MP_Static); auto &body = m.body(); @@ -961,7 +992,7 @@ void OpEmitter::genCollectiveParamBuilder() { int numVariadicOperands = op.getNumVariableLengthOperands(); int numNonVariadicOperands = numOperands - numVariadicOperands; // Signature - std::string params = std::string("Builder *, OperationState &") + + std::string params = std::string("OpBuilder &, OperationState &") + builderOpState + ", ArrayRef resultTypes, ValueRange operands, " "ArrayRef attributes"; @@ -1013,7 +1044,7 @@ void OpEmitter::buildParamList(std::string ¶mList, auto numResults = op.getNumResults(); resultTypeNames.reserve(numResults); - paramList = "Builder *odsBuilder, OperationState &"; + paramList = "OpBuilder &odsBuilder, OperationState &"; paramList.append(builderOpState); switch (typeParamKind) { @@ -1151,7 +1182,7 @@ void OpEmitter::genCodeForAddingArgAndRegionForBuilder(OpMethodBody &body, if (op.getTrait("OpTrait::AttrSizedOperandSegments")) { body << " " << builderOpState << ".addAttribute(\"operand_segment_sizes\", " - "odsBuilder->getI32VectorAttr({"; + "odsBuilder.getI32VectorAttr({"; interleaveComma(llvm::seq(0, op.getNumOperands()), body, [&](int i) { if (op.getOperand(i).isOptional()) body << "(" << getArgumentName(op, i) << " ? 1 : 0)"; @@ -1175,7 +1206,7 @@ void OpEmitter::genCodeForAddingArgAndRegionForBuilder(OpMethodBody &body, // If this is a raw value, then we need to wrap it in an Attribute // instance. FmtContext fctx; - fctx.withBuilder("(*odsBuilder)"); + fctx.withBuilder("odsBuilder"); std::string builderTemplate = std::string(attr.getConstBuilderTemplate()); @@ -1251,10 +1282,23 @@ void OpEmitter::genOpInterfaceMethods() { if (!opTrait || !opTrait->shouldDeclareMethods()) continue; auto interface = opTrait->getOpInterface(); - for (auto method : interface.getMethods()) { - // Don't declare if the method has a body or a default implementation. - if (method.getBody() || method.getDefaultImplementation()) + + // Get the set of methods that should always be declared. + auto alwaysDeclaredMethodsVec = opTrait->getAlwaysDeclaredMethods(); + llvm::StringSet<> alwaysDeclaredMethods; + alwaysDeclaredMethods.insert(alwaysDeclaredMethodsVec.begin(), + alwaysDeclaredMethodsVec.end()); + + for (const OpInterfaceMethod &method : interface.getMethods()) { + // Don't declare if the method has a body. + if (method.getBody()) continue; + // Don't declare if the method has a default implementation and the op + // didn't request that it always be declared. + if (method.getDefaultImplementation() && + !alwaysDeclaredMethods.count(method.getName())) + continue; + std::string args; llvm::raw_string_ostream os(args); interleaveComma(method.getArguments(), os, diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index a8116e4290b48..47c145df29e0c 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -741,10 +741,8 @@ void OperationFormat::genParserTypeResolution(Operator &op, // Initialize the set of buildable types. if (!buildableTypes.empty()) { - body << " Builder &builder = parser.getBuilder();\n"; - FmtContext typeBuilderCtx; - typeBuilderCtx.withBuilder("builder"); + typeBuilderCtx.withBuilder("parser.getBuilder()"); for (auto &it : buildableTypes) body << " Type odsBuildableType" << it.second << " = " << tgfmt(it.first, &typeBuilderCtx) << ";\n"; @@ -867,7 +865,7 @@ void OperationFormat::genParserVariadicSegmentResolution(Operator &op, OpMethodBody &body) { if (!allOperands && op.getTrait("OpTrait::AttrSizedOperandSegments")) { body << " result.addAttribute(\"operand_segment_sizes\", " - << "builder.getI32VectorAttr({"; + << "parser.getBuilder().getI32VectorAttr({"; auto interleaveFn = [&](const NamedTypeConstraint &operand) { // If the operand is variadic emit the parsed size. if (operand.isVariableLength()) diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp index b988333c52542..a693b02112474 100644 --- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp +++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp @@ -146,7 +146,7 @@ static void emitTraitDecl(OpInterface &interface, raw_ostream &os, StringRef interfaceName, StringRef interfaceTraitsName) { os << " template \n " - << llvm::formatv("struct Trait : public OpInterface<{0}," + << llvm::formatv("struct {0}Trait : public OpInterface<{0}," " detail::{1}>::Trait {{\n", interfaceName, interfaceTraitsName); @@ -171,11 +171,18 @@ static void emitTraitDecl(OpInterface &interface, raw_ostream &os, tblgen::FmtContext traitCtx; traitCtx.withOp("op"); if (auto verify = interface.getVerify()) { - os << " static LogicalResult verifyTrait(Operation* op) {\n" + os << " static LogicalResult verifyTrait(Operation* op) {\n" << std::string(tblgen::tgfmt(*verify, &traitCtx)) << "\n }\n"; } + if (auto extraTraitDecls = interface.getExtraTraitClassDeclaration()) + os << extraTraitDecls << "\n"; os << " };\n"; + + // Emit a utility wrapper trait class. + os << " template \n " + << llvm::formatv("struct Trait : public {0}Trait {{};\n", + interfaceName); } static void emitInterfaceDecl(OpInterface &interface, raw_ostream &os) { diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp index 750b85481878b..b4ff1797f8432 100644 --- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp +++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp @@ -660,6 +660,8 @@ static void emitSerializationFunction(const Record *attrClass, opVar, record->getValueAsString("extendedInstSetName"), record->getValueAsInt("extendedInstOpcode"), operands); } else { + // Emit debug info. + os << formatv(" emitDebugLine(functionBody, {0}.getLoc());\n", opVar); os << formatv(" encodeInstructionInto(" "functionBody, spirv::getOpcode<{0}>(), {1});\n", op.getQualCppClassName(), operands); @@ -900,14 +902,22 @@ static void emitDeserializationFunction(const Record *attrClass, emitOperandDeserialization(op, record->getLoc(), " ", words, wordIndex, operands, attributes, os); - os << formatv( - " auto {1} = opBuilder.create<{0}>(unknownLoc, {2}, {3}, {4}); " - "(void){1};\n", - op.getQualCppClassName(), opVar, resultTypes, operands, attributes); + os << formatv(" Location loc = createFileLineColLoc(opBuilder);\n"); + os << formatv(" auto {1} = opBuilder.create<{0}>(loc, {2}, {3}, {4}); " + "(void){1};\n", + op.getQualCppClassName(), opVar, resultTypes, operands, + attributes); if (op.getNumResults() == 1) { os << formatv(" valueMap[{0}] = {1}.getResult();\n\n", valueID, opVar); } + // According to SPIR-V spec: + // This location information applies to the instructions physically following + // this instruction, up to the first occurrence of any of the following: the + // next end of block. + os << formatv(" if ({0}.hasTrait())\n", opVar); + os << formatv(" clearDebugLine();\n"); + // Decorations emitDecorationDeserialization(op, " ", valueID, attributes, os); os << " return success();\n"; diff --git a/mlir/tools/mlir-translate/CMakeLists.txt b/mlir/tools/mlir-translate/CMakeLists.txt index 9dc5971703ab1..897e7adc03bd1 100644 --- a/mlir/tools/mlir-translate/CMakeLists.txt +++ b/mlir/tools/mlir-translate/CMakeLists.txt @@ -1,16 +1,24 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS) -set(LIBS + +add_llvm_tool(mlir-translate + mlir-translate.cpp + ) +llvm_update_compile_flags(mlir-translate) +target_link_libraries(mlir-translate + PRIVATE ${dialect_libs} ${translation_libs} + MLIRIR MLIRParser MLIRPass MLIRSPIRV MLIRTranslation MLIRSupport -) -add_llvm_tool(mlir-translate - mlir-translate.cpp -) -llvm_update_compile_flags(mlir-translate) -target_link_libraries(mlir-translate PRIVATE MLIRIR MLIRTranslation ${LIBS} LLVMSupport) + ) + +mlir_check_link_libraries(mlir-translate) diff --git a/mlir/tools/mlir-translate/mlir-translate.cpp b/mlir/tools/mlir-translate/mlir-translate.cpp index 5de561a859565..914bd340b3f56 100644 --- a/mlir/tools/mlir-translate/mlir-translate.cpp +++ b/mlir/tools/mlir-translate/mlir-translate.cpp @@ -50,9 +50,13 @@ static llvm::cl::opt verifyDiagnostics( namespace mlir { // Defined in the test directory, no public header. void registerTestRoundtripSPIRV(); +void registerTestRoundtripDebugSPIRV(); } // namespace mlir -static void registerTestTranslations() { registerTestRoundtripSPIRV(); } +static void registerTestTranslations() { + registerTestRoundtripSPIRV(); + registerTestRoundtripDebugSPIRV(); +} int main(int argc, char **argv) { registerAllDialects(); diff --git a/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp b/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp index c9f1e1570fa64..06c417ca23f74 100644 --- a/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp +++ b/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp @@ -38,7 +38,7 @@ class SerializationTest : public ::testing::Test { SerializationTest() { createModuleOp(); } void createModuleOp() { - Builder builder(&context); + OpBuilder builder(&context); OperationState state(UnknownLoc::get(&context), spirv::ModuleOp::getOperationName()); state.addAttribute("addressing_model", @@ -51,7 +51,7 @@ class SerializationTest : public ::testing::Test { spirv::VerCapExtAttr::get( spirv::Version::V_1_0, ArrayRef(), ArrayRef(), &context)); - spirv::ModuleOp::build(&builder, state); + spirv::ModuleOp::build(builder, state); module = cast(Operation::create(state)); } diff --git a/mlir/unittests/IR/AttributeTest.cpp b/mlir/unittests/IR/AttributeTest.cpp index ad4b422eae91b..7d22b5e5a07fc 100644 --- a/mlir/unittests/IR/AttributeTest.cpp +++ b/mlir/unittests/IR/AttributeTest.cpp @@ -154,4 +154,12 @@ TEST(DenseSplatTest, StringSplat) { testSplat(stringType, value); } +TEST(DenseSplatTest, StringAttrSplat) { + MLIRContext context; + Type stringType = + OpaqueType::get(Identifier::get("test", &context), "string", &context); + Attribute stringAttr = StringAttr::get("test-string", stringType); + testSplat(stringType, stringAttr); +} + } // end namespace diff --git a/mlir/unittests/IR/OperationSupportTest.cpp b/mlir/unittests/IR/OperationSupportTest.cpp index 3cfb62553bfbd..95ddcccc565e3 100644 --- a/mlir/unittests/IR/OperationSupportTest.cpp +++ b/mlir/unittests/IR/OperationSupportTest.cpp @@ -33,7 +33,7 @@ TEST(OperandStorageTest, NonResizable) { Value operand = useOp->getResult(0); // Create a non-resizable operation with one operand. - Operation *user = createOp(&context, operand, builder.getIntegerType(16)); + Operation *user = createOp(&context, operand); // The same number of operands is okay. user->setOperands(operand); @@ -57,7 +57,7 @@ TEST(OperandStorageTest, Resizable) { Value operand = useOp->getResult(0); // Create a resizable operation with one operand. - Operation *user = createOp(&context, operand, builder.getIntegerType(16)); + Operation *user = createOp(&context, operand); // The same number of operands is okay. user->setOperands(operand); @@ -76,4 +76,77 @@ TEST(OperandStorageTest, Resizable) { useOp->destroy(); } +TEST(OperandStorageTest, RangeReplace) { + MLIRContext context; + Builder builder(&context); + + Operation *useOp = + createOp(&context, /*operands=*/llvm::None, builder.getIntegerType(16)); + Value operand = useOp->getResult(0); + + // Create a resizable operation with one operand. + Operation *user = createOp(&context, operand); + + // Check setting with the same number of operands. + user->setOperands(/*start=*/0, /*length=*/1, operand); + EXPECT_EQ(user->getNumOperands(), 1u); + + // Check setting with more operands. + user->setOperands(/*start=*/0, /*length=*/1, {operand, operand, operand}); + EXPECT_EQ(user->getNumOperands(), 3u); + + // Check setting with less operands. + user->setOperands(/*start=*/1, /*length=*/2, {operand}); + EXPECT_EQ(user->getNumOperands(), 2u); + + // Check inserting without replacing operands. + user->setOperands(/*start=*/2, /*length=*/0, {operand}); + EXPECT_EQ(user->getNumOperands(), 3u); + + // Check erasing operands. + user->setOperands(/*start=*/0, /*length=*/3, {}); + EXPECT_EQ(user->getNumOperands(), 0u); + + // Destroy the operations. + user->destroy(); + useOp->destroy(); +} + +TEST(OperandStorageTest, MutableRange) { + MLIRContext context; + Builder builder(&context); + + Operation *useOp = + createOp(&context, /*operands=*/llvm::None, builder.getIntegerType(16)); + Value operand = useOp->getResult(0); + + // Create a resizable operation with one operand. + Operation *user = createOp(&context, operand); + + // Check setting with the same number of operands. + MutableOperandRange mutableOperands(user); + mutableOperands.assign(operand); + EXPECT_EQ(mutableOperands.size(), 1u); + EXPECT_EQ(user->getNumOperands(), 1u); + + // Check setting with more operands. + mutableOperands.assign({operand, operand, operand}); + EXPECT_EQ(mutableOperands.size(), 3u); + EXPECT_EQ(user->getNumOperands(), 3u); + + // Check with inserting a new operand. + mutableOperands.append({operand, operand}); + EXPECT_EQ(mutableOperands.size(), 5u); + EXPECT_EQ(user->getNumOperands(), 5u); + + // Check erasing operands. + mutableOperands.clear(); + EXPECT_EQ(mutableOperands.size(), 0u); + EXPECT_EQ(user->getNumOperands(), 0u); + + // Destroy the operations. + user->destroy(); + useOp->destroy(); +} + } // end namespace diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index 4ad58e290252d..7e089a17370e5 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -380,7 +380,7 @@ class DeviceRTLTy { return DeviceId >= 0 && DeviceId < NumberOfDevices; } - bool getNumOfDevices() const { return NumberOfDevices; } + int getNumOfDevices() const { return NumberOfDevices; } void setRequiresFlag(const int64_t Flags) { this->RequiresFlags = Flags; } diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt index e754a3103a12c..2b9a77b93926b 100644 --- a/polly/lib/CMakeLists.txt +++ b/polly/lib/CMakeLists.txt @@ -21,6 +21,34 @@ if (MSVC_IDE OR XCODE) file(GLOB_RECURSE POLLY_HEADER_FILES "${POLLY_SOURCE_DIR}/include/polly/*.h") endif () +set(POLLY_COMPONENTS + Support + Core + ScalarOpts + InstCombine + TransformUtils + Analysis + ipo + MC + Passes + Linker + IRReader + Analysis + # The libraries below are required for darwin: http://PR26392 + BitReader + MCParser + Object + ProfileData + Target + Vectorize +) + +# Polly-ACC requires the NVPTX backend to work. Ask LLVM about its libraries. +if (GPU_CODEGEN) + # This call emits an error if they NVPTX backend is not enable. + list(APPEND POLLY_COMPONENTS NVPTX) +endif () + # Use an object-library to add the same files to multiple libs without requiring # the sources them to be recompiled for each of them. add_llvm_pass_plugin(Polly @@ -71,6 +99,9 @@ add_llvm_pass_plugin(Polly Transform/RewriteByReferenceParameters.cpp Transform/ScopInliner.cpp ${POLLY_HEADER_FILES} + + LINK_COMPONENTS + ${POLLY_COMPONENTS} ) set_target_properties(obj.Polly PROPERTIES FOLDER "Polly") set_target_properties(Polly PROPERTIES FOLDER "Polly") @@ -99,45 +130,7 @@ if (GPU_CODEGEN) target_link_libraries(Polly PUBLIC PollyPPCG) endif () - -# Polly-ACC requires the NVPTX backend to work. Ask LLVM about its libraries. -set(nvptx_libs) -if (GPU_CODEGEN) - # This call emits an error if they NVPTX backend is not enable. - llvm_map_components_to_libnames(nvptx_libs NVPTX) -endif () - -if (LLVM_LINK_LLVM_DYLIB AND NOT LLVM_POLLY_LINK_INTO_TOOLS) - # The shlib/dylib contains all the LLVM components - # (including NVPTX is enabled) already. Adding them to target_link_libraries - # would cause them being twice in the address space - # (their LLVM*.a/so and their copies in libLLVM.so) - # which results in errors when the two instances try to register the same - # command-line switches. - target_link_libraries(Polly PUBLIC LLVM) -else () - target_link_libraries(Polly PUBLIC - LLVMSupport - LLVMCore - LLVMScalarOpts - LLVMInstCombine - LLVMTransformUtils - LLVMAnalysis - LLVMipo - LLVMMC - LLVMPasses - LLVMLinker - LLVMIRReader - ${nvptx_libs} - # The libraries below are required for darwin: http://PR26392 - LLVMBitReader - LLVMMCParser - LLVMObject - LLVMProfileData - LLVMTarget - LLVMVectorize - ) - +if (NOT LLVM_LINK_LLVM_DYLIB AND NOT LLVM_POLLY_LINK_INTO_TOOLS) # Polly-ACC requires the NVPTX target to be present in the executable it is linked to set_property(TARGET bugpoint APPEND PROPERTY LINK_LIBRARIES LLVMTarget) endif () diff --git a/polly/lib/Support/ISLTools.cpp b/polly/lib/Support/ISLTools.cpp index c18ccdce4ed37..a7c10314c084c 100644 --- a/polly/lib/Support/ISLTools.cpp +++ b/polly/lib/Support/ISLTools.cpp @@ -159,8 +159,13 @@ isl::set polly::singleton(isl::union_set USet, isl::space ExpectedSpace) { unsigned polly::getNumScatterDims(const isl::union_map &Schedule) { unsigned Dims = 0; - for (isl::map Map : Schedule.get_map_list()) + for (isl::map Map : Schedule.get_map_list()) { + // Map.dim would return UINT_MAX. + if (!Map) + continue; + Dims = std::max(Dims, Map.dim(isl::dim::out)); + } return Dims; } @@ -435,11 +440,17 @@ isl::map polly::distributeDomain(isl::map Map) { isl::space Space = Map.get_space(); isl::space DomainSpace = Space.domain(); + if (!DomainSpace) + return {}; unsigned DomainDims = DomainSpace.dim(isl::dim::set); isl::space RangeSpace = Space.range().unwrap(); isl::space Range1Space = RangeSpace.domain(); + if (!Range1Space) + return {}; unsigned Range1Dims = Range1Space.dim(isl::dim::set); isl::space Range2Space = RangeSpace.range(); + if (!Range2Space) + return {}; unsigned Range2Dims = Range2Space.dim(isl::dim::set); isl::space OutputSpace = @@ -578,6 +589,10 @@ static void foreachPoint(isl::basic_set BSet, /// Ordering is based on the lower bounds of the set's dimensions. First /// dimensions are considered first. static int flatCompare(const isl::basic_set &A, const isl::basic_set &B) { + // Quick bail-out on out-of-quota. + if (!A || !B) + return 0; + unsigned ALen = A.dim(isl::dim::set); unsigned BLen = B.dim(isl::dim::set); unsigned Len = std::min(ALen, BLen); diff --git a/polly/test/ForwardOpTree/out-of-quota1.ll b/polly/test/ForwardOpTree/out-of-quota1.ll new file mode 100644 index 0000000000000..909d31850cd22 --- /dev/null +++ b/polly/test/ForwardOpTree/out-of-quota1.ll @@ -0,0 +1,59 @@ +; RUN: opt %loadPolly -polly-optree -analyze %s | FileCheck %s + +; This used to loop infinitely because of UINT_MAX returned by ISL on out-of-quota. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +%struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187 = type { i32, i32, i32, i32, i32, [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16**, i16***, i8*, i16**, i8***, i64***, i64***, i16****, i8**, i8**, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187*, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187*, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, %struct.DecRefPicMarking_s.4.220.388.508.628.796.916.1132.1204.1444.1468.1516.1540.1564.1588.1660.1684.1756.1780.1828.1876.2164.2284.2404.2428.2452.2476.2500.2524.2836.2860.2884.2908.4416.0.6.12.16.22.28.54.56.58.60.186*, i32 } +%struct.DecRefPicMarking_s.4.220.388.508.628.796.916.1132.1204.1444.1468.1516.1540.1564.1588.1660.1684.1756.1780.1828.1876.2164.2284.2404.2428.2452.2476.2500.2524.2836.2860.2884.2908.4416.0.6.12.16.22.28.54.56.58.60.186 = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.4.220.388.508.628.796.916.1132.1204.1444.1468.1516.1540.1564.1588.1660.1684.1756.1780.1828.1876.2164.2284.2404.2428.2452.2476.2500.2524.2836.2860.2884.2908.4416.0.6.12.16.22.28.54.56.58.60.186* } + +define void @func() { +entry: + %0 = load %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187*, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187** undef, align 8 + %1 = load %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187*, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187** undef, align 8 + %2 = load %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187*, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187** undef, align 8 + %3 = load i16, i16* undef, align 4 + %conv2081956 = zext i16 %3 to i64 + br label %for.cond212.preheader + +for.cond212.preheader: + %indvars.iv1926 = phi i64 [ %indvars.iv.next1927, %for.inc354 ], [ 0, %entry ] + br label %for.body215 + +for.body215: + %indvars.iv1921 = phi i64 [ 0, %for.cond212.preheader ], [ %indvars.iv.next1922, %for.body215 ] + %4 = shl nuw nsw i64 %indvars.iv1921, 1 + %arrayidx230 = getelementptr inbounds %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187* %1, i64 0, i32 5, i64 %indvars.iv1926, i64 1, i64 %4 + store i64 undef, i64* %arrayidx230, align 8 + %5 = or i64 %4, 1 + %arrayidx248 = getelementptr inbounds %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187* %1, i64 0, i32 5, i64 %indvars.iv1926, i64 1, i64 %5 + store i64 undef, i64* %arrayidx248, align 8 + %arrayidx264 = getelementptr inbounds %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187* %2, i64 0, i32 5, i64 %indvars.iv1926, i64 1, i64 %4 + store i64 undef, i64* %arrayidx264, align 8 + %arrayidx282 = getelementptr inbounds %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187* %2, i64 0, i32 5, i64 %indvars.iv1926, i64 1, i64 %5 + store i64 undef, i64* %arrayidx282, align 8 + %arrayidx298 = getelementptr inbounds %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187* %1, i64 0, i32 5, i64 %indvars.iv1926, i64 0, i64 %4 + store i64 undef, i64* %arrayidx298, align 8 + %arrayidx307 = getelementptr inbounds %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187* %0, i64 0, i32 5, i64 %indvars.iv1926, i64 2, i64 %5 + %6 = load i64, i64* %arrayidx307, align 8 + %arrayidx316 = getelementptr inbounds %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187* %1, i64 0, i32 5, i64 %indvars.iv1926, i64 0, i64 %5 + store i64 %6, i64* %arrayidx316, align 8 + %arrayidx332 = getelementptr inbounds %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187* %2, i64 0, i32 5, i64 %indvars.iv1926, i64 0, i64 %4 + store i64 undef, i64* %arrayidx332, align 8 + %arrayidx350 = getelementptr inbounds %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187, %struct.storable_picture.5.221.389.509.629.797.917.1133.1205.1445.1469.1517.1541.1565.1589.1661.1685.1757.1781.1829.1877.2165.2285.2405.2429.2453.2477.2501.2525.2837.2861.2885.2909.4417.1.7.13.17.23.29.55.57.59.61.187* %2, i64 0, i32 5, i64 %indvars.iv1926, i64 0, i64 %5 + store i64 undef, i64* %arrayidx350, align 8 + %indvars.iv.next1922 = add nuw nsw i64 %indvars.iv1921, 1 + %exitcond1925 = icmp eq i64 %indvars.iv.next1922, 16 + br i1 %exitcond1925, label %for.inc354, label %for.body215 + +for.inc354: + %indvars.iv.next1927 = add nuw nsw i64 %indvars.iv1926, 1 + %exitcond1930 = icmp eq i64 %indvars.iv1926, %conv2081956 + br i1 %exitcond1930, label %for.body930, label %for.cond212.preheader + +for.body930: + br label %for.body930 +} + + +; CHECK: ForwardOpTree executed, but did not modify anything diff --git a/polly/unittests/ScopPassManager/CMakeLists.txt b/polly/unittests/ScopPassManager/CMakeLists.txt index 59c5a4a717a51..ed3bbd6dcef42 100644 --- a/polly/unittests/ScopPassManager/CMakeLists.txt +++ b/polly/unittests/ScopPassManager/CMakeLists.txt @@ -1,3 +1,5 @@ +llvm_map_components_to_libnames(llvm_libs Passes Core Analysis) add_polly_unittest(ScopPassManagerTests PassManagerTest.cpp ) +target_link_libraries(ScopPassManagerTests PRIVATE ${llvm_libs})